]>
Commit | Line | Data |
---|---|---|
3d3e04ac | 1 | /* GIMPLE store merging pass. |
2 | Copyright (C) 2016 Free Software Foundation, Inc. | |
3 | Contributed by ARM Ltd. | |
4 | ||
5 | This file is part of GCC. | |
6 | ||
7 | GCC is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU General Public License as published by | |
9 | the Free Software Foundation; either version 3, or (at your option) | |
10 | any later version. | |
11 | ||
12 | GCC is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU General Public License | |
18 | along with GCC; see the file COPYING3. If not see | |
19 | <http://www.gnu.org/licenses/>. */ | |
20 | ||
21 | /* The purpose of this pass is to combine multiple memory stores of | |
22 | constant values to consecutive memory locations into fewer wider stores. | |
23 | For example, if we have a sequence peforming four byte stores to | |
24 | consecutive memory locations: | |
25 | [p ] := imm1; | |
26 | [p + 1B] := imm2; | |
27 | [p + 2B] := imm3; | |
28 | [p + 3B] := imm4; | |
29 | we can transform this into a single 4-byte store if the target supports it: | |
30 | [p] := imm1:imm2:imm3:imm4 //concatenated immediates according to endianness. | |
31 | ||
32 | The algorithm is applied to each basic block in three phases: | |
33 | ||
34 | 1) Scan through the basic block recording constant assignments to | |
35 | destinations that can be expressed as a store to memory of a certain size | |
36 | at a certain bit offset. Record store chains to different bases in a | |
37 | hash_map (m_stores) and make sure to terminate such chains when appropriate | |
38 | (for example when when the stored values get used subsequently). | |
39 | These stores can be a result of structure element initializers, array stores | |
40 | etc. A store_immediate_info object is recorded for every such store. | |
41 | Record as many such assignments to a single base as possible until a | |
42 | statement that interferes with the store sequence is encountered. | |
43 | ||
44 | 2) Analyze the chain of stores recorded in phase 1) (i.e. the vector of | |
45 | store_immediate_info objects) and coalesce contiguous stores into | |
46 | merged_store_group objects. | |
47 | ||
48 | For example, given the stores: | |
49 | [p ] := 0; | |
50 | [p + 1B] := 1; | |
51 | [p + 3B] := 0; | |
52 | [p + 4B] := 1; | |
53 | [p + 5B] := 0; | |
54 | [p + 6B] := 0; | |
55 | This phase would produce two merged_store_group objects, one recording the | |
56 | two bytes stored in the memory region [p : p + 1] and another | |
57 | recording the four bytes stored in the memory region [p + 3 : p + 6]. | |
58 | ||
59 | 3) The merged_store_group objects produced in phase 2) are processed | |
60 | to generate the sequence of wider stores that set the contiguous memory | |
61 | regions to the sequence of bytes that correspond to it. This may emit | |
62 | multiple stores per store group to handle contiguous stores that are not | |
63 | of a size that is a power of 2. For example it can try to emit a 40-bit | |
64 | store as a 32-bit store followed by an 8-bit store. | |
65 | We try to emit as wide stores as we can while respecting STRICT_ALIGNMENT or | |
66 | SLOW_UNALIGNED_ACCESS rules. | |
67 | ||
68 | Note on endianness and example: | |
69 | Consider 2 contiguous 16-bit stores followed by 2 contiguous 8-bit stores: | |
70 | [p ] := 0x1234; | |
71 | [p + 2B] := 0x5678; | |
72 | [p + 4B] := 0xab; | |
73 | [p + 5B] := 0xcd; | |
74 | ||
75 | The memory layout for little-endian (LE) and big-endian (BE) must be: | |
76 | p |LE|BE| | |
77 | --------- | |
78 | 0 |34|12| | |
79 | 1 |12|34| | |
80 | 2 |78|56| | |
81 | 3 |56|78| | |
82 | 4 |ab|ab| | |
83 | 5 |cd|cd| | |
84 | ||
85 | To merge these into a single 48-bit merged value 'val' in phase 2) | |
86 | on little-endian we insert stores to higher (consecutive) bitpositions | |
87 | into the most significant bits of the merged value. | |
88 | The final merged value would be: 0xcdab56781234 | |
89 | ||
90 | For big-endian we insert stores to higher bitpositions into the least | |
91 | significant bits of the merged value. | |
92 | The final merged value would be: 0x12345678abcd | |
93 | ||
94 | Then, in phase 3), we want to emit this 48-bit value as a 32-bit store | |
95 | followed by a 16-bit store. Again, we must consider endianness when | |
96 | breaking down the 48-bit value 'val' computed above. | |
97 | For little endian we emit: | |
98 | [p] (32-bit) := 0x56781234; // val & 0x0000ffffffff; | |
99 | [p + 4B] (16-bit) := 0xcdab; // (val & 0xffff00000000) >> 32; | |
100 | ||
101 | Whereas for big-endian we emit: | |
102 | [p] (32-bit) := 0x12345678; // (val & 0xffffffff0000) >> 16; | |
103 | [p + 4B] (16-bit) := 0xabcd; // val & 0x00000000ffff; */ | |
104 | ||
105 | #include "config.h" | |
106 | #include "system.h" | |
107 | #include "coretypes.h" | |
108 | #include "backend.h" | |
109 | #include "tree.h" | |
110 | #include "gimple.h" | |
111 | #include "builtins.h" | |
112 | #include "fold-const.h" | |
113 | #include "tree-pass.h" | |
114 | #include "ssa.h" | |
115 | #include "gimple-pretty-print.h" | |
116 | #include "alias.h" | |
117 | #include "fold-const.h" | |
118 | #include "params.h" | |
119 | #include "print-tree.h" | |
120 | #include "tree-hash-traits.h" | |
121 | #include "gimple-iterator.h" | |
122 | #include "gimplify.h" | |
123 | #include "stor-layout.h" | |
124 | #include "timevar.h" | |
125 | #include "tree-cfg.h" | |
126 | #include "tree-eh.h" | |
127 | #include "target.h" | |
128 | ||
129 | /* The maximum size (in bits) of the stores this pass should generate. */ | |
130 | #define MAX_STORE_BITSIZE (BITS_PER_WORD) | |
131 | #define MAX_STORE_BYTES (MAX_STORE_BITSIZE / BITS_PER_UNIT) | |
132 | ||
133 | namespace { | |
134 | ||
135 | /* Struct recording the information about a single store of an immediate | |
136 | to memory. These are created in the first phase and coalesced into | |
137 | merged_store_group objects in the second phase. */ | |
138 | ||
139 | struct store_immediate_info | |
140 | { | |
141 | unsigned HOST_WIDE_INT bitsize; | |
142 | unsigned HOST_WIDE_INT bitpos; | |
143 | tree val; | |
144 | tree dest; | |
145 | gimple *stmt; | |
146 | unsigned int order; | |
147 | store_immediate_info (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, tree, | |
148 | tree, gimple *, unsigned int); | |
149 | }; | |
150 | ||
151 | store_immediate_info::store_immediate_info (unsigned HOST_WIDE_INT bs, | |
152 | unsigned HOST_WIDE_INT bp, tree v, | |
153 | tree d, gimple *st, | |
154 | unsigned int ord) | |
155 | : bitsize (bs), bitpos (bp), val (v), dest (d), stmt (st), order (ord) | |
156 | { | |
157 | } | |
158 | ||
159 | /* Struct representing a group of stores to contiguous memory locations. | |
160 | These are produced by the second phase (coalescing) and consumed in the | |
161 | third phase that outputs the widened stores. */ | |
162 | ||
163 | struct merged_store_group | |
164 | { | |
165 | unsigned HOST_WIDE_INT start; | |
166 | unsigned HOST_WIDE_INT width; | |
167 | /* The size of the allocated memory for val. */ | |
168 | unsigned HOST_WIDE_INT buf_size; | |
169 | ||
170 | unsigned int align; | |
171 | unsigned int first_order; | |
172 | unsigned int last_order; | |
173 | ||
174 | auto_vec<struct store_immediate_info *> stores; | |
175 | /* We record the first and last original statements in the sequence because | |
176 | we'll need their vuse/vdef and replacement position. It's easier to keep | |
177 | track of them separately as 'stores' is reordered by apply_stores. */ | |
178 | gimple *last_stmt; | |
179 | gimple *first_stmt; | |
180 | unsigned char *val; | |
181 | ||
182 | merged_store_group (store_immediate_info *); | |
183 | ~merged_store_group (); | |
184 | void merge_into (store_immediate_info *); | |
185 | void merge_overlapping (store_immediate_info *); | |
186 | bool apply_stores (); | |
187 | }; | |
188 | ||
189 | /* Debug helper. Dump LEN elements of byte array PTR to FD in hex. */ | |
190 | ||
191 | static void | |
192 | dump_char_array (FILE *fd, unsigned char *ptr, unsigned int len) | |
193 | { | |
194 | if (!fd) | |
195 | return; | |
196 | ||
197 | for (unsigned int i = 0; i < len; i++) | |
198 | fprintf (fd, "%x ", ptr[i]); | |
199 | fprintf (fd, "\n"); | |
200 | } | |
201 | ||
202 | /* Fill a byte array PTR of SZ elements with zeroes. This is to be used by | |
203 | encode_tree_to_bitpos to zero-initialize most likely small arrays but | |
204 | with a non-compile-time-constant size. */ | |
205 | ||
206 | static inline void | |
207 | zero_char_buf (unsigned char *ptr, unsigned int sz) | |
208 | { | |
209 | for (unsigned int i = 0; i < sz; i++) | |
210 | ptr[i] = 0; | |
211 | } | |
212 | ||
213 | /* Shift left the bytes in PTR of SZ elements by AMNT bits, carrying over the | |
214 | bits between adjacent elements. AMNT should be within | |
215 | [0, BITS_PER_UNIT). | |
216 | Example, AMNT = 2: | |
217 | 00011111|11100000 << 2 = 01111111|10000000 | |
218 | PTR[1] | PTR[0] PTR[1] | PTR[0]. */ | |
219 | ||
220 | static void | |
221 | shift_bytes_in_array (unsigned char *ptr, unsigned int sz, unsigned int amnt) | |
222 | { | |
223 | if (amnt == 0) | |
224 | return; | |
225 | ||
226 | unsigned char carry_over = 0U; | |
227 | unsigned char carry_mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - amnt)); | |
228 | unsigned char clear_mask = (~0U) << amnt; | |
229 | ||
230 | for (unsigned int i = 0; i < sz; i++) | |
231 | { | |
232 | unsigned prev_carry_over = carry_over; | |
233 | carry_over | |
234 | = (ptr[i] & carry_mask) >> (BITS_PER_UNIT - amnt); | |
235 | ||
236 | ptr[i] <<= amnt; | |
237 | if (i != 0) | |
238 | { | |
239 | ptr[i] &= clear_mask; | |
240 | ptr[i] |= prev_carry_over; | |
241 | } | |
242 | } | |
243 | } | |
244 | ||
245 | /* Like shift_bytes_in_array but for big-endian. | |
246 | Shift right the bytes in PTR of SZ elements by AMNT bits, carrying over the | |
247 | bits between adjacent elements. AMNT should be within | |
248 | [0, BITS_PER_UNIT). | |
249 | Example, AMNT = 2: | |
250 | 00011111|11100000 >> 2 = 00000111|11111000 | |
251 | PTR[0] | PTR[1] PTR[0] | PTR[1]. */ | |
252 | ||
253 | static void | |
254 | shift_bytes_in_array_right (unsigned char *ptr, unsigned int sz, | |
255 | unsigned int amnt) | |
256 | { | |
257 | if (amnt == 0) | |
258 | return; | |
259 | ||
260 | unsigned char carry_over = 0U; | |
261 | unsigned char carry_mask = ~(~0U << amnt); | |
262 | ||
263 | for (unsigned int i = 0; i < sz; i++) | |
264 | { | |
265 | unsigned prev_carry_over = carry_over; | |
266 | carry_over | |
267 | = (ptr[i] & carry_mask); | |
268 | ||
269 | carry_over <<= ((unsigned char)BITS_PER_UNIT - amnt); | |
270 | ptr[i] >>= amnt; | |
271 | ptr[i] |= prev_carry_over; | |
272 | } | |
273 | } | |
274 | ||
275 | /* Clear out LEN bits starting from bit START in the byte array | |
276 | PTR. This clears the bits to the *right* from START. | |
277 | START must be within [0, BITS_PER_UNIT) and counts starting from | |
278 | the least significant bit. */ | |
279 | ||
280 | static void | |
281 | clear_bit_region_be (unsigned char *ptr, unsigned int start, | |
282 | unsigned int len) | |
283 | { | |
284 | if (len == 0) | |
285 | return; | |
286 | /* Clear len bits to the right of start. */ | |
287 | else if (len <= start + 1) | |
288 | { | |
289 | unsigned char mask = (~(~0U << len)); | |
290 | mask = mask << (start + 1U - len); | |
291 | ptr[0] &= ~mask; | |
292 | } | |
293 | else if (start != BITS_PER_UNIT - 1) | |
294 | { | |
295 | clear_bit_region_be (ptr, start, (start % BITS_PER_UNIT) + 1); | |
296 | clear_bit_region_be (ptr + 1, BITS_PER_UNIT - 1, | |
297 | len - (start % BITS_PER_UNIT) - 1); | |
298 | } | |
299 | else if (start == BITS_PER_UNIT - 1 | |
300 | && len > BITS_PER_UNIT) | |
301 | { | |
302 | unsigned int nbytes = len / BITS_PER_UNIT; | |
303 | for (unsigned int i = 0; i < nbytes; i++) | |
304 | ptr[i] = 0U; | |
305 | if (len % BITS_PER_UNIT != 0) | |
306 | clear_bit_region_be (ptr + nbytes, BITS_PER_UNIT - 1, | |
307 | len % BITS_PER_UNIT); | |
308 | } | |
309 | else | |
310 | gcc_unreachable (); | |
311 | } | |
312 | ||
313 | /* In the byte array PTR clear the bit region starting at bit | |
314 | START and is LEN bits wide. | |
315 | For regions spanning multiple bytes do this recursively until we reach | |
316 | zero LEN or a region contained within a single byte. */ | |
317 | ||
318 | static void | |
319 | clear_bit_region (unsigned char *ptr, unsigned int start, | |
320 | unsigned int len) | |
321 | { | |
322 | /* Degenerate base case. */ | |
323 | if (len == 0) | |
324 | return; | |
325 | else if (start >= BITS_PER_UNIT) | |
326 | clear_bit_region (ptr + 1, start - BITS_PER_UNIT, len); | |
327 | /* Second base case. */ | |
328 | else if ((start + len) <= BITS_PER_UNIT) | |
329 | { | |
330 | unsigned char mask = (~0U) << ((unsigned char)(BITS_PER_UNIT - len)); | |
331 | mask >>= BITS_PER_UNIT - (start + len); | |
332 | ||
333 | ptr[0] &= ~mask; | |
334 | ||
335 | return; | |
336 | } | |
337 | /* Clear most significant bits in a byte and proceed with the next byte. */ | |
338 | else if (start != 0) | |
339 | { | |
340 | clear_bit_region (ptr, start, BITS_PER_UNIT - start); | |
341 | clear_bit_region (ptr + 1, 0, len - (BITS_PER_UNIT - start) + 1); | |
342 | } | |
343 | /* Whole bytes need to be cleared. */ | |
344 | else if (start == 0 && len > BITS_PER_UNIT) | |
345 | { | |
346 | unsigned int nbytes = len / BITS_PER_UNIT; | |
347 | /* We could recurse on each byte but do the loop here to avoid | |
348 | recursing too deep. */ | |
349 | for (unsigned int i = 0; i < nbytes; i++) | |
350 | ptr[i] = 0U; | |
351 | /* Clear the remaining sub-byte region if there is one. */ | |
352 | if (len % BITS_PER_UNIT != 0) | |
353 | clear_bit_region (ptr + nbytes, 0, len % BITS_PER_UNIT); | |
354 | } | |
355 | else | |
356 | gcc_unreachable (); | |
357 | } | |
358 | ||
359 | /* Write BITLEN bits of EXPR to the byte array PTR at | |
360 | bit position BITPOS. PTR should contain TOTAL_BYTES elements. | |
361 | Return true if the operation succeeded. */ | |
362 | ||
363 | static bool | |
364 | encode_tree_to_bitpos (tree expr, unsigned char *ptr, int bitlen, int bitpos, | |
365 | unsigned int total_bytes) | |
366 | { | |
367 | unsigned int first_byte = bitpos / BITS_PER_UNIT; | |
368 | tree tmp_int = expr; | |
369 | bool sub_byte_op_p = (bitlen % BITS_PER_UNIT) || (bitpos % BITS_PER_UNIT) | |
370 | || mode_for_size (bitlen, MODE_INT, 0) == BLKmode; | |
371 | ||
372 | if (!sub_byte_op_p) | |
373 | return native_encode_expr (tmp_int, ptr + first_byte, total_bytes, 0) | |
374 | != 0; | |
375 | ||
376 | /* LITTLE-ENDIAN | |
377 | We are writing a non byte-sized quantity or at a position that is not | |
378 | at a byte boundary. | |
379 | |--------|--------|--------| ptr + first_byte | |
380 | ^ ^ | |
381 | xxx xxxxxxxx xxx< bp> | |
382 | |______EXPR____| | |
383 | ||
384 | First native_encode_expr EPXR into a temporary buffer and shift each | |
385 | byte in the buffer by 'bp' (carrying the bits over as necessary). | |
386 | |00000000|00xxxxxx|xxxxxxxx| << bp = |000xxxxx|xxxxxxxx|xxx00000| | |
387 | <------bitlen---->< bp> | |
388 | Then we clear the destination bits: | |
389 | |---00000|00000000|000-----| ptr + first_byte | |
390 | <-------bitlen--->< bp> | |
391 | ||
392 | Finally we ORR the bytes of the shifted EXPR into the cleared region: | |
393 | |---xxxxx||xxxxxxxx||xxx-----| ptr + first_byte. | |
394 | ||
395 | BIG-ENDIAN | |
396 | We are writing a non byte-sized quantity or at a position that is not | |
397 | at a byte boundary. | |
398 | ptr + first_byte |--------|--------|--------| | |
399 | ^ ^ | |
400 | <bp >xxx xxxxxxxx xxx | |
401 | |_____EXPR_____| | |
402 | ||
403 | First native_encode_expr EPXR into a temporary buffer and shift each | |
404 | byte in the buffer to the right by (carrying the bits over as necessary). | |
405 | We shift by as much as needed to align the most significant bit of EXPR | |
406 | with bitpos: | |
407 | |00xxxxxx|xxxxxxxx| >> 3 = |00000xxx|xxxxxxxx|xxxxx000| | |
408 | <---bitlen----> <bp ><-----bitlen-----> | |
409 | Then we clear the destination bits: | |
410 | ptr + first_byte |-----000||00000000||00000---| | |
411 | <bp ><-------bitlen-----> | |
412 | ||
413 | Finally we ORR the bytes of the shifted EXPR into the cleared region: | |
414 | ptr + first_byte |---xxxxx||xxxxxxxx||xxx-----|. | |
415 | The awkwardness comes from the fact that bitpos is counted from the | |
416 | most significant bit of a byte. */ | |
417 | ||
418 | /* Allocate an extra byte so that we have space to shift into. */ | |
419 | unsigned int byte_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (expr))) + 1; | |
420 | unsigned char *tmpbuf = XALLOCAVEC (unsigned char, byte_size); | |
421 | zero_char_buf (tmpbuf, byte_size); | |
422 | /* The store detection code should only have allowed constants that are | |
423 | accepted by native_encode_expr. */ | |
424 | if (native_encode_expr (expr, tmpbuf, byte_size, 0) == 0) | |
425 | gcc_unreachable (); | |
426 | ||
427 | /* The native_encode_expr machinery uses TYPE_MODE to determine how many | |
428 | bytes to write. This means it can write more than | |
429 | ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT bytes (for example | |
430 | write 8 bytes for a bitlen of 40). Skip the bytes that are not within | |
431 | bitlen and zero out the bits that are not relevant as well (that may | |
432 | contain a sign bit due to sign-extension). */ | |
433 | unsigned int padding | |
434 | = byte_size - ROUND_UP (bitlen, BITS_PER_UNIT) / BITS_PER_UNIT - 1; | |
435 | if (BYTES_BIG_ENDIAN) | |
436 | { | |
437 | tmpbuf += padding; | |
438 | byte_size -= padding; | |
439 | if (bitlen % BITS_PER_UNIT != 0) | |
440 | clear_bit_region_be (tmpbuf, BITS_PER_UNIT - 1, | |
441 | BITS_PER_UNIT - (bitlen % BITS_PER_UNIT)); | |
442 | } | |
443 | ||
444 | /* Clear the bit region in PTR where the bits from TMPBUF will be | |
445 | inerted into. */ | |
446 | if (BYTES_BIG_ENDIAN) | |
447 | clear_bit_region_be (ptr + first_byte, | |
448 | BITS_PER_UNIT - 1 - (bitpos % BITS_PER_UNIT), bitlen); | |
449 | else | |
450 | clear_bit_region (ptr + first_byte, bitpos % BITS_PER_UNIT, bitlen); | |
451 | ||
452 | int shift_amnt; | |
453 | int bitlen_mod = bitlen % BITS_PER_UNIT; | |
454 | int bitpos_mod = bitpos % BITS_PER_UNIT; | |
455 | ||
456 | bool skip_byte = false; | |
457 | if (BYTES_BIG_ENDIAN) | |
458 | { | |
459 | /* BITPOS and BITLEN are exactly aligned and no shifting | |
460 | is necessary. */ | |
461 | if (bitpos_mod + bitlen_mod == BITS_PER_UNIT | |
462 | || (bitpos_mod == 0 && bitlen_mod == 0)) | |
463 | shift_amnt = 0; | |
464 | /* |. . . . . . . .| | |
465 | <bp > <blen >. | |
466 | We always shift right for BYTES_BIG_ENDIAN so shift the beginning | |
467 | of the value until it aligns with 'bp' in the next byte over. */ | |
468 | else if (bitpos_mod + bitlen_mod < BITS_PER_UNIT) | |
469 | { | |
470 | shift_amnt = bitlen_mod + bitpos_mod; | |
471 | skip_byte = bitlen_mod != 0; | |
472 | } | |
473 | /* |. . . . . . . .| | |
474 | <----bp---> | |
475 | <---blen---->. | |
476 | Shift the value right within the same byte so it aligns with 'bp'. */ | |
477 | else | |
478 | shift_amnt = bitlen_mod + bitpos_mod - BITS_PER_UNIT; | |
479 | } | |
480 | else | |
481 | shift_amnt = bitpos % BITS_PER_UNIT; | |
482 | ||
483 | /* Create the shifted version of EXPR. */ | |
484 | if (!BYTES_BIG_ENDIAN) | |
485 | shift_bytes_in_array (tmpbuf, byte_size, shift_amnt); | |
486 | else | |
487 | { | |
488 | gcc_assert (BYTES_BIG_ENDIAN); | |
489 | shift_bytes_in_array_right (tmpbuf, byte_size, shift_amnt); | |
490 | /* If shifting right forced us to move into the next byte skip the now | |
491 | empty byte. */ | |
492 | if (skip_byte) | |
493 | { | |
494 | tmpbuf++; | |
495 | byte_size--; | |
496 | } | |
497 | } | |
498 | ||
499 | /* Insert the bits from TMPBUF. */ | |
500 | for (unsigned int i = 0; i < byte_size; i++) | |
501 | ptr[first_byte + i] |= tmpbuf[i]; | |
502 | ||
503 | return true; | |
504 | } | |
505 | ||
506 | /* Sorting function for store_immediate_info objects. | |
507 | Sorts them by bitposition. */ | |
508 | ||
509 | static int | |
510 | sort_by_bitpos (const void *x, const void *y) | |
511 | { | |
512 | store_immediate_info *const *tmp = (store_immediate_info * const *) x; | |
513 | store_immediate_info *const *tmp2 = (store_immediate_info * const *) y; | |
514 | ||
515 | if ((*tmp)->bitpos <= (*tmp2)->bitpos) | |
516 | return -1; | |
517 | else if ((*tmp)->bitpos > (*tmp2)->bitpos) | |
518 | return 1; | |
519 | ||
520 | gcc_unreachable (); | |
521 | } | |
522 | ||
523 | /* Sorting function for store_immediate_info objects. | |
524 | Sorts them by the order field. */ | |
525 | ||
526 | static int | |
527 | sort_by_order (const void *x, const void *y) | |
528 | { | |
529 | store_immediate_info *const *tmp = (store_immediate_info * const *) x; | |
530 | store_immediate_info *const *tmp2 = (store_immediate_info * const *) y; | |
531 | ||
532 | if ((*tmp)->order < (*tmp2)->order) | |
533 | return -1; | |
534 | else if ((*tmp)->order > (*tmp2)->order) | |
535 | return 1; | |
536 | ||
537 | gcc_unreachable (); | |
538 | } | |
539 | ||
540 | /* Initialize a merged_store_group object from a store_immediate_info | |
541 | object. */ | |
542 | ||
543 | merged_store_group::merged_store_group (store_immediate_info *info) | |
544 | { | |
545 | start = info->bitpos; | |
546 | width = info->bitsize; | |
547 | /* VAL has memory allocated for it in apply_stores once the group | |
548 | width has been finalized. */ | |
549 | val = NULL; | |
550 | align = get_object_alignment (info->dest); | |
551 | stores.create (1); | |
552 | stores.safe_push (info); | |
553 | last_stmt = info->stmt; | |
554 | last_order = info->order; | |
555 | first_stmt = last_stmt; | |
556 | first_order = last_order; | |
557 | buf_size = 0; | |
558 | } | |
559 | ||
560 | merged_store_group::~merged_store_group () | |
561 | { | |
562 | if (val) | |
563 | XDELETEVEC (val); | |
564 | } | |
565 | ||
566 | /* Merge a store recorded by INFO into this merged store. | |
567 | The store is not overlapping with the existing recorded | |
568 | stores. */ | |
569 | ||
570 | void | |
571 | merged_store_group::merge_into (store_immediate_info *info) | |
572 | { | |
573 | unsigned HOST_WIDE_INT wid = info->bitsize; | |
574 | /* Make sure we're inserting in the position we think we're inserting. */ | |
575 | gcc_assert (info->bitpos == start + width); | |
576 | ||
577 | width += wid; | |
578 | gimple *stmt = info->stmt; | |
579 | stores.safe_push (info); | |
580 | if (info->order > last_order) | |
581 | { | |
582 | last_order = info->order; | |
583 | last_stmt = stmt; | |
584 | } | |
585 | else if (info->order < first_order) | |
586 | { | |
587 | first_order = info->order; | |
588 | first_stmt = stmt; | |
589 | } | |
590 | } | |
591 | ||
592 | /* Merge a store described by INFO into this merged store. | |
593 | INFO overlaps in some way with the current store (i.e. it's not contiguous | |
594 | which is handled by merged_store_group::merge_into). */ | |
595 | ||
596 | void | |
597 | merged_store_group::merge_overlapping (store_immediate_info *info) | |
598 | { | |
599 | gimple *stmt = info->stmt; | |
600 | stores.safe_push (info); | |
601 | ||
602 | /* If the store extends the size of the group, extend the width. */ | |
603 | if ((info->bitpos + info->bitsize) > (start + width)) | |
604 | width += info->bitpos + info->bitsize - (start + width); | |
605 | ||
606 | if (info->order > last_order) | |
607 | { | |
608 | last_order = info->order; | |
609 | last_stmt = stmt; | |
610 | } | |
611 | else if (info->order < first_order) | |
612 | { | |
613 | first_order = info->order; | |
614 | first_stmt = stmt; | |
615 | } | |
616 | } | |
617 | ||
618 | /* Go through all the recorded stores in this group in program order and | |
619 | apply their values to the VAL byte array to create the final merged | |
620 | value. Return true if the operation succeeded. */ | |
621 | ||
622 | bool | |
623 | merged_store_group::apply_stores () | |
624 | { | |
625 | /* The total width of the stores must add up to a whole number of bytes | |
626 | and start at a byte boundary. We don't support emitting bitfield | |
627 | references for now. Also, make sure we have more than one store | |
628 | in the group, otherwise we cannot merge anything. */ | |
629 | if (width % BITS_PER_UNIT != 0 | |
630 | || start % BITS_PER_UNIT != 0 | |
631 | || stores.length () == 1) | |
632 | return false; | |
633 | ||
634 | stores.qsort (sort_by_order); | |
635 | struct store_immediate_info *info; | |
636 | unsigned int i; | |
637 | /* Create a buffer of a size that is 2 times the number of bytes we're | |
638 | storing. That way native_encode_expr can write power-of-2-sized | |
639 | chunks without overrunning. */ | |
640 | buf_size | |
641 | = 2 * (ROUND_UP (width, BITS_PER_UNIT) / BITS_PER_UNIT); | |
642 | val = XCNEWVEC (unsigned char, buf_size); | |
643 | ||
644 | FOR_EACH_VEC_ELT (stores, i, info) | |
645 | { | |
646 | unsigned int pos_in_buffer = info->bitpos - start; | |
647 | bool ret = encode_tree_to_bitpos (info->val, val, info->bitsize, | |
648 | pos_in_buffer, buf_size); | |
649 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
650 | { | |
651 | if (ret) | |
652 | { | |
653 | fprintf (dump_file, "After writing "); | |
654 | print_generic_expr (dump_file, info->val, 0); | |
655 | fprintf (dump_file, " of size " HOST_WIDE_INT_PRINT_DEC | |
656 | " at position %d the merged region contains:\n", | |
657 | info->bitsize, pos_in_buffer); | |
658 | dump_char_array (dump_file, val, buf_size); | |
659 | } | |
660 | else | |
661 | fprintf (dump_file, "Failed to merge stores\n"); | |
662 | } | |
663 | if (!ret) | |
664 | return false; | |
665 | } | |
666 | return true; | |
667 | } | |
668 | ||
669 | /* Structure describing the store chain. */ | |
670 | ||
671 | struct imm_store_chain_info | |
672 | { | |
673 | auto_vec<struct store_immediate_info *> m_store_info; | |
674 | auto_vec<merged_store_group *> m_merged_store_groups; | |
675 | ||
676 | bool terminate_and_process_chain (tree); | |
677 | bool coalesce_immediate_stores (); | |
678 | bool output_merged_store (tree, merged_store_group *); | |
679 | bool output_merged_stores (tree); | |
680 | }; | |
681 | ||
682 | const pass_data pass_data_tree_store_merging = { | |
683 | GIMPLE_PASS, /* type */ | |
684 | "store-merging", /* name */ | |
685 | OPTGROUP_NONE, /* optinfo_flags */ | |
686 | TV_GIMPLE_STORE_MERGING, /* tv_id */ | |
687 | PROP_ssa, /* properties_required */ | |
688 | 0, /* properties_provided */ | |
689 | 0, /* properties_destroyed */ | |
690 | 0, /* todo_flags_start */ | |
691 | TODO_update_ssa, /* todo_flags_finish */ | |
692 | }; | |
693 | ||
694 | class pass_store_merging : public gimple_opt_pass | |
695 | { | |
696 | public: | |
697 | pass_store_merging (gcc::context *ctxt) | |
698 | : gimple_opt_pass (pass_data_tree_store_merging, ctxt) | |
699 | { | |
700 | } | |
701 | ||
702 | /* Pass not supported for PDP-endianness. */ | |
703 | virtual bool | |
704 | gate (function *) | |
705 | { | |
706 | return flag_store_merging && (WORDS_BIG_ENDIAN == BYTES_BIG_ENDIAN); | |
707 | } | |
708 | ||
709 | virtual unsigned int execute (function *); | |
710 | ||
711 | private: | |
712 | hash_map<tree_operand_hash, struct imm_store_chain_info *> m_stores; | |
713 | ||
714 | bool terminate_and_process_all_chains (); | |
715 | bool terminate_all_aliasing_chains (tree, tree, bool, gimple *); | |
716 | bool terminate_and_release_chain (tree); | |
717 | }; // class pass_store_merging | |
718 | ||
719 | /* Terminate and process all recorded chains. Return true if any changes | |
720 | were made. */ | |
721 | ||
722 | bool | |
723 | pass_store_merging::terminate_and_process_all_chains () | |
724 | { | |
725 | hash_map<tree_operand_hash, struct imm_store_chain_info *>::iterator iter | |
726 | = m_stores.begin (); | |
727 | bool ret = false; | |
728 | for (; iter != m_stores.end (); ++iter) | |
729 | ret |= terminate_and_release_chain ((*iter).first); | |
730 | ||
731 | return ret; | |
732 | } | |
733 | ||
734 | /* Terminate all chains that are affected by the assignment to DEST, appearing | |
735 | in statement STMT and ultimately points to the object BASE. Return true if | |
736 | at least one aliasing chain was terminated. BASE and DEST are allowed to | |
737 | be NULL_TREE. In that case the aliasing checks are performed on the whole | |
738 | statement rather than a particular operand in it. VAR_OFFSET_P signifies | |
739 | whether STMT represents a store to BASE offset by a variable amount. | |
740 | If that is the case we have to terminate any chain anchored at BASE. */ | |
741 | ||
742 | bool | |
743 | pass_store_merging::terminate_all_aliasing_chains (tree dest, tree base, | |
744 | bool var_offset_p, | |
745 | gimple *stmt) | |
746 | { | |
747 | bool ret = false; | |
748 | ||
749 | /* If the statement doesn't touch memory it can't alias. */ | |
750 | if (!gimple_vuse (stmt)) | |
751 | return false; | |
752 | ||
753 | struct imm_store_chain_info **chain_info = NULL; | |
754 | ||
755 | /* Check if the assignment destination (BASE) is part of a store chain. | |
756 | This is to catch non-constant stores to destinations that may be part | |
757 | of a chain. */ | |
758 | if (base) | |
759 | { | |
760 | chain_info = m_stores.get (base); | |
761 | if (chain_info) | |
762 | { | |
763 | /* We have a chain at BASE and we're writing to [BASE + <variable>]. | |
764 | This can interfere with any of the stores so terminate | |
765 | the chain. */ | |
766 | if (var_offset_p) | |
767 | { | |
768 | terminate_and_release_chain (base); | |
769 | ret = true; | |
770 | } | |
771 | /* Otherwise go through every store in the chain to see if it | |
772 | aliases with any of them. */ | |
773 | else | |
774 | { | |
775 | struct store_immediate_info *info; | |
776 | unsigned int i; | |
777 | FOR_EACH_VEC_ELT ((*chain_info)->m_store_info, i, info) | |
778 | { | |
779 | if (refs_may_alias_p (info->dest, dest)) | |
780 | { | |
781 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
782 | { | |
783 | fprintf (dump_file, | |
784 | "stmt causes chain termination:\n"); | |
785 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
786 | } | |
787 | terminate_and_release_chain (base); | |
788 | ret = true; | |
789 | break; | |
790 | } | |
791 | } | |
792 | } | |
793 | } | |
794 | } | |
795 | ||
796 | hash_map<tree_operand_hash, struct imm_store_chain_info *>::iterator iter | |
797 | = m_stores.begin (); | |
798 | ||
799 | /* Check for aliasing with all other store chains. */ | |
800 | for (; iter != m_stores.end (); ++iter) | |
801 | { | |
802 | /* We already checked all the stores in chain_info and terminated the | |
803 | chain if necessary. Skip it here. */ | |
804 | if (chain_info && (*chain_info) == (*iter).second) | |
805 | continue; | |
806 | ||
807 | tree key = (*iter).first; | |
808 | if (ref_maybe_used_by_stmt_p (stmt, key) | |
809 | || stmt_may_clobber_ref_p (stmt, key)) | |
810 | { | |
811 | terminate_and_release_chain (key); | |
812 | ret = true; | |
813 | } | |
814 | } | |
815 | ||
816 | return ret; | |
817 | } | |
818 | ||
819 | /* Helper function. Terminate the recorded chain storing to base object | |
820 | BASE. Return true if the merging and output was successful. The m_stores | |
821 | entry is removed after the processing in any case. */ | |
822 | ||
823 | bool | |
824 | pass_store_merging::terminate_and_release_chain (tree base) | |
825 | { | |
826 | struct imm_store_chain_info **chain_info = m_stores.get (base); | |
827 | ||
828 | if (!chain_info) | |
829 | return false; | |
830 | ||
831 | gcc_assert (*chain_info); | |
832 | ||
833 | bool ret = (*chain_info)->terminate_and_process_chain (base); | |
834 | delete *chain_info; | |
835 | m_stores.remove (base); | |
836 | ||
837 | return ret; | |
838 | } | |
839 | ||
840 | /* Go through the candidate stores recorded in m_store_info and merge them | |
841 | into merged_store_group objects recorded into m_merged_store_groups | |
842 | representing the widened stores. Return true if coalescing was successful | |
843 | and the number of widened stores is fewer than the original number | |
844 | of stores. */ | |
845 | ||
846 | bool | |
847 | imm_store_chain_info::coalesce_immediate_stores () | |
848 | { | |
849 | /* Anything less can't be processed. */ | |
850 | if (m_store_info.length () < 2) | |
851 | return false; | |
852 | ||
853 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
854 | fprintf (dump_file, "Attempting to coalesce %u stores in chain.\n", | |
855 | m_store_info.length ()); | |
856 | ||
857 | store_immediate_info *info; | |
858 | unsigned int i; | |
859 | ||
860 | /* Order the stores by the bitposition they write to. */ | |
861 | m_store_info.qsort (sort_by_bitpos); | |
862 | ||
863 | info = m_store_info[0]; | |
864 | merged_store_group *merged_store = new merged_store_group (info); | |
865 | ||
866 | FOR_EACH_VEC_ELT (m_store_info, i, info) | |
867 | { | |
868 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
869 | { | |
870 | fprintf (dump_file, "Store %u:\nbitsize:" HOST_WIDE_INT_PRINT_DEC | |
871 | " bitpos:" HOST_WIDE_INT_PRINT_DEC " val:\n", | |
872 | i, info->bitsize, info->bitpos); | |
873 | print_generic_expr (dump_file, info->val, 0); | |
874 | fprintf (dump_file, "\n------------\n"); | |
875 | } | |
876 | ||
877 | if (i == 0) | |
878 | continue; | |
879 | ||
880 | /* |---store 1---| | |
881 | |---store 2---| | |
882 | Overlapping stores. */ | |
883 | unsigned HOST_WIDE_INT start = info->bitpos; | |
884 | if (IN_RANGE (start, merged_store->start, | |
885 | merged_store->start + merged_store->width - 1)) | |
886 | { | |
887 | merged_store->merge_overlapping (info); | |
888 | continue; | |
889 | } | |
890 | ||
891 | /* |---store 1---| <gap> |---store 2---|. | |
892 | Gap between stores. Start a new group. */ | |
893 | if (start != merged_store->start + merged_store->width) | |
894 | { | |
895 | /* Try to apply all the stores recorded for the group to determine | |
896 | the bitpattern they write and discard it if that fails. | |
897 | This will also reject single-store groups. */ | |
898 | if (!merged_store->apply_stores ()) | |
899 | delete merged_store; | |
900 | else | |
901 | m_merged_store_groups.safe_push (merged_store); | |
902 | ||
903 | merged_store = new merged_store_group (info); | |
904 | ||
905 | continue; | |
906 | } | |
907 | ||
908 | /* |---store 1---||---store 2---| | |
909 | This store is consecutive to the previous one. | |
910 | Merge it into the current store group. */ | |
911 | merged_store->merge_into (info); | |
912 | } | |
913 | ||
914 | /* Record or discard the last store group. */ | |
915 | if (!merged_store->apply_stores ()) | |
916 | delete merged_store; | |
917 | else | |
918 | m_merged_store_groups.safe_push (merged_store); | |
919 | ||
920 | gcc_assert (m_merged_store_groups.length () <= m_store_info.length ()); | |
921 | bool success | |
922 | = !m_merged_store_groups.is_empty () | |
923 | && m_merged_store_groups.length () < m_store_info.length (); | |
924 | ||
925 | if (success && dump_file) | |
926 | fprintf (dump_file, "Coalescing successful!\n" | |
927 | "Merged into %u stores\n", | |
928 | m_merged_store_groups.length ()); | |
929 | ||
930 | return success; | |
931 | } | |
932 | ||
933 | /* Return the type to use for the merged stores described by STMTS. | |
934 | This is needed to get the alias sets right. */ | |
935 | ||
936 | static tree | |
937 | get_alias_type_for_stmts (auto_vec<gimple *> &stmts) | |
938 | { | |
939 | gimple *stmt; | |
940 | unsigned int i; | |
941 | tree lhs = gimple_assign_lhs (stmts[0]); | |
942 | tree type = reference_alias_ptr_type (lhs); | |
943 | ||
944 | FOR_EACH_VEC_ELT (stmts, i, stmt) | |
945 | { | |
946 | if (i == 0) | |
947 | continue; | |
948 | ||
949 | lhs = gimple_assign_lhs (stmt); | |
950 | tree type1 = reference_alias_ptr_type (lhs); | |
951 | if (!alias_ptr_types_compatible_p (type, type1)) | |
952 | return ptr_type_node; | |
953 | } | |
954 | return type; | |
955 | } | |
956 | ||
957 | /* Return the location_t information we can find among the statements | |
958 | in STMTS. */ | |
959 | ||
960 | static location_t | |
961 | get_location_for_stmts (auto_vec<gimple *> &stmts) | |
962 | { | |
963 | gimple *stmt; | |
964 | unsigned int i; | |
965 | ||
966 | FOR_EACH_VEC_ELT (stmts, i, stmt) | |
967 | if (gimple_has_location (stmt)) | |
968 | return gimple_location (stmt); | |
969 | ||
970 | return UNKNOWN_LOCATION; | |
971 | } | |
972 | ||
973 | /* Used to decribe a store resulting from splitting a wide store in smaller | |
974 | regularly-sized stores in split_group. */ | |
975 | ||
976 | struct split_store | |
977 | { | |
978 | unsigned HOST_WIDE_INT bytepos; | |
979 | unsigned HOST_WIDE_INT size; | |
980 | unsigned HOST_WIDE_INT align; | |
981 | auto_vec<gimple *> orig_stmts; | |
982 | split_store (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT, | |
983 | unsigned HOST_WIDE_INT); | |
984 | }; | |
985 | ||
986 | /* Simple constructor. */ | |
987 | ||
988 | split_store::split_store (unsigned HOST_WIDE_INT bp, | |
989 | unsigned HOST_WIDE_INT sz, | |
990 | unsigned HOST_WIDE_INT al) | |
991 | : bytepos (bp), size (sz), align (al) | |
992 | { | |
993 | orig_stmts.create (0); | |
994 | } | |
995 | ||
996 | /* Record all statements corresponding to stores in GROUP that write to | |
997 | the region starting at BITPOS and is of size BITSIZE. Record such | |
998 | statements in STMTS. The stores in GROUP must be sorted by | |
999 | bitposition. */ | |
1000 | ||
1001 | static void | |
1002 | find_constituent_stmts (struct merged_store_group *group, | |
1003 | auto_vec<gimple *> &stmts, | |
1004 | unsigned HOST_WIDE_INT bitpos, | |
1005 | unsigned HOST_WIDE_INT bitsize) | |
1006 | { | |
1007 | struct store_immediate_info *info; | |
1008 | unsigned int i; | |
1009 | unsigned HOST_WIDE_INT end = bitpos + bitsize; | |
1010 | FOR_EACH_VEC_ELT (group->stores, i, info) | |
1011 | { | |
1012 | unsigned HOST_WIDE_INT stmt_start = info->bitpos; | |
1013 | unsigned HOST_WIDE_INT stmt_end = stmt_start + info->bitsize; | |
1014 | if (stmt_end < bitpos) | |
1015 | continue; | |
1016 | /* The stores in GROUP are ordered by bitposition so if we're past | |
1017 | the region for this group return early. */ | |
1018 | if (stmt_start > end) | |
1019 | return; | |
1020 | ||
1021 | if (IN_RANGE (stmt_start, bitpos, bitpos + bitsize) | |
1022 | || IN_RANGE (stmt_end, bitpos, end) | |
1023 | /* The statement writes a region that completely encloses the region | |
1024 | that this group writes. Unlikely to occur but let's | |
1025 | handle it. */ | |
1026 | || IN_RANGE (bitpos, stmt_start, stmt_end)) | |
1027 | stmts.safe_push (info->stmt); | |
1028 | } | |
1029 | } | |
1030 | ||
1031 | /* Split a merged store described by GROUP by populating the SPLIT_STORES | |
1032 | vector with split_store structs describing the byte offset (from the base), | |
1033 | the bit size and alignment of each store as well as the original statements | |
1034 | involved in each such split group. | |
1035 | This is to separate the splitting strategy from the statement | |
1036 | building/emission/linking done in output_merged_store. | |
1037 | At the moment just start with the widest possible size and keep emitting | |
1038 | the widest we can until we have emitted all the bytes, halving the size | |
1039 | when appropriate. */ | |
1040 | ||
1041 | static bool | |
1042 | split_group (merged_store_group *group, | |
1043 | auto_vec<struct split_store *> &split_stores) | |
1044 | { | |
1045 | unsigned HOST_WIDE_INT pos = group->start; | |
1046 | unsigned HOST_WIDE_INT size = group->width; | |
1047 | unsigned HOST_WIDE_INT bytepos = pos / BITS_PER_UNIT; | |
1048 | unsigned HOST_WIDE_INT align = group->align; | |
1049 | ||
1050 | /* We don't handle partial bitfields for now. We shouldn't have | |
1051 | reached this far. */ | |
1052 | gcc_assert ((size % BITS_PER_UNIT == 0) && (pos % BITS_PER_UNIT == 0)); | |
1053 | ||
1054 | bool allow_unaligned | |
1055 | = !STRICT_ALIGNMENT && PARAM_VALUE (PARAM_STORE_MERGING_ALLOW_UNALIGNED); | |
1056 | ||
1057 | unsigned int try_size = MAX_STORE_BITSIZE; | |
1058 | while (try_size > size | |
1059 | || (!allow_unaligned | |
1060 | && try_size > align)) | |
1061 | { | |
1062 | try_size /= 2; | |
1063 | if (try_size < BITS_PER_UNIT) | |
1064 | return false; | |
1065 | } | |
1066 | ||
1067 | unsigned HOST_WIDE_INT try_pos = bytepos; | |
1068 | group->stores.qsort (sort_by_bitpos); | |
1069 | ||
1070 | while (size > 0) | |
1071 | { | |
1072 | struct split_store *store = new split_store (try_pos, try_size, align); | |
1073 | unsigned HOST_WIDE_INT try_bitpos = try_pos * BITS_PER_UNIT; | |
1074 | find_constituent_stmts (group, store->orig_stmts, try_bitpos, try_size); | |
1075 | split_stores.safe_push (store); | |
1076 | ||
1077 | try_pos += try_size / BITS_PER_UNIT; | |
1078 | ||
1079 | size -= try_size; | |
1080 | align = try_size; | |
1081 | while (size < try_size) | |
1082 | try_size /= 2; | |
1083 | } | |
1084 | return true; | |
1085 | } | |
1086 | ||
1087 | /* Given a merged store group GROUP output the widened version of it. | |
1088 | The store chain is against the base object BASE. | |
1089 | Try store sizes of at most MAX_STORE_BITSIZE bits wide and don't output | |
1090 | unaligned stores for STRICT_ALIGNMENT targets or if it's too expensive. | |
1091 | Make sure that the number of statements output is less than the number of | |
1092 | original statements. If a better sequence is possible emit it and | |
1093 | return true. */ | |
1094 | ||
1095 | bool | |
1096 | imm_store_chain_info::output_merged_store (tree base, merged_store_group *group) | |
1097 | { | |
1098 | unsigned HOST_WIDE_INT start_byte_pos = group->start / BITS_PER_UNIT; | |
1099 | ||
1100 | unsigned int orig_num_stmts = group->stores.length (); | |
1101 | if (orig_num_stmts < 2) | |
1102 | return false; | |
1103 | ||
1104 | auto_vec<struct split_store *> split_stores; | |
1105 | split_stores.create (0); | |
1106 | if (!split_group (group, split_stores)) | |
1107 | return false; | |
1108 | ||
1109 | gimple_stmt_iterator last_gsi = gsi_for_stmt (group->last_stmt); | |
1110 | gimple_seq seq = NULL; | |
1111 | unsigned int num_stmts = 0; | |
1112 | tree last_vdef, new_vuse; | |
1113 | last_vdef = gimple_vdef (group->last_stmt); | |
1114 | new_vuse = gimple_vuse (group->last_stmt); | |
1115 | ||
1116 | gimple *stmt = NULL; | |
1117 | /* The new SSA names created. Keep track of them so that we can free them | |
1118 | if we decide to not use the new sequence. */ | |
1119 | auto_vec<tree> new_ssa_names; | |
1120 | split_store *split_store; | |
1121 | unsigned int i; | |
1122 | bool fail = false; | |
1123 | ||
1124 | FOR_EACH_VEC_ELT (split_stores, i, split_store) | |
1125 | { | |
1126 | unsigned HOST_WIDE_INT try_size = split_store->size; | |
1127 | unsigned HOST_WIDE_INT try_pos = split_store->bytepos; | |
1128 | unsigned HOST_WIDE_INT align = split_store->align; | |
1129 | tree offset_type = get_alias_type_for_stmts (split_store->orig_stmts); | |
1130 | location_t loc = get_location_for_stmts (split_store->orig_stmts); | |
1131 | ||
1132 | tree int_type = build_nonstandard_integer_type (try_size, UNSIGNED); | |
1133 | SET_TYPE_ALIGN (int_type, align); | |
1134 | tree addr = build_fold_addr_expr (base); | |
1135 | tree dest = fold_build2 (MEM_REF, int_type, addr, | |
1136 | build_int_cst (offset_type, try_pos)); | |
1137 | ||
1138 | tree src = native_interpret_expr (int_type, | |
1139 | group->val + try_pos - start_byte_pos, | |
1140 | group->buf_size); | |
1141 | ||
1142 | stmt = gimple_build_assign (dest, src); | |
1143 | gimple_set_location (stmt, loc); | |
1144 | gimple_set_vuse (stmt, new_vuse); | |
1145 | gimple_seq_add_stmt_without_update (&seq, stmt); | |
1146 | ||
1147 | /* We didn't manage to reduce the number of statements. Bail out. */ | |
1148 | if (++num_stmts == orig_num_stmts) | |
1149 | { | |
1150 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1151 | { | |
1152 | fprintf (dump_file, "Exceeded original number of stmts (%u)." | |
1153 | " Not profitable to emit new sequence.\n", | |
1154 | orig_num_stmts); | |
1155 | } | |
1156 | unsigned int ssa_count; | |
1157 | tree ssa_name; | |
1158 | /* Don't forget to cleanup the temporary SSA names. */ | |
1159 | FOR_EACH_VEC_ELT (new_ssa_names, ssa_count, ssa_name) | |
1160 | release_ssa_name (ssa_name); | |
1161 | ||
1162 | fail = true; | |
1163 | break; | |
1164 | } | |
1165 | ||
1166 | tree new_vdef; | |
1167 | if (i < split_stores.length () - 1) | |
1168 | { | |
1169 | new_vdef = make_ssa_name (gimple_vop (cfun), stmt); | |
1170 | new_ssa_names.safe_push (new_vdef); | |
1171 | } | |
1172 | else | |
1173 | new_vdef = last_vdef; | |
1174 | ||
1175 | gimple_set_vdef (stmt, new_vdef); | |
1176 | SSA_NAME_DEF_STMT (new_vdef) = stmt; | |
1177 | new_vuse = new_vdef; | |
1178 | } | |
1179 | ||
1180 | FOR_EACH_VEC_ELT (split_stores, i, split_store) | |
1181 | delete split_store; | |
1182 | ||
1183 | if (fail) | |
1184 | return false; | |
1185 | ||
1186 | gcc_assert (seq); | |
1187 | if (dump_file) | |
1188 | { | |
1189 | fprintf (dump_file, | |
1190 | "New sequence of %u stmts to replace old one of %u stmts\n", | |
1191 | num_stmts, orig_num_stmts); | |
1192 | if (dump_flags & TDF_DETAILS) | |
1193 | print_gimple_seq (dump_file, seq, 0, TDF_VOPS | TDF_MEMSYMS); | |
1194 | } | |
1195 | gsi_insert_seq_after (&last_gsi, seq, GSI_SAME_STMT); | |
1196 | ||
1197 | return true; | |
1198 | } | |
1199 | ||
1200 | /* Process the merged_store_group objects created in the coalescing phase. | |
1201 | The stores are all against the base object BASE. | |
1202 | Try to output the widened stores and delete the original statements if | |
1203 | successful. Return true iff any changes were made. */ | |
1204 | ||
1205 | bool | |
1206 | imm_store_chain_info::output_merged_stores (tree base) | |
1207 | { | |
1208 | unsigned int i; | |
1209 | merged_store_group *merged_store; | |
1210 | bool ret = false; | |
1211 | FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_store) | |
1212 | { | |
1213 | if (output_merged_store (base, merged_store)) | |
1214 | { | |
1215 | unsigned int j; | |
1216 | store_immediate_info *store; | |
1217 | FOR_EACH_VEC_ELT (merged_store->stores, j, store) | |
1218 | { | |
1219 | gimple *stmt = store->stmt; | |
1220 | gimple_stmt_iterator gsi = gsi_for_stmt (stmt); | |
1221 | gsi_remove (&gsi, true); | |
1222 | if (stmt != merged_store->last_stmt) | |
1223 | { | |
1224 | unlink_stmt_vdef (stmt); | |
1225 | release_defs (stmt); | |
1226 | } | |
1227 | } | |
1228 | ret = true; | |
1229 | } | |
1230 | } | |
1231 | if (ret && dump_file) | |
1232 | fprintf (dump_file, "Merging successful!\n"); | |
1233 | ||
1234 | return ret; | |
1235 | } | |
1236 | ||
1237 | /* Coalesce the store_immediate_info objects recorded against the base object | |
1238 | BASE in the first phase and output them. | |
1239 | Delete the allocated structures. | |
1240 | Return true if any changes were made. */ | |
1241 | ||
1242 | bool | |
1243 | imm_store_chain_info::terminate_and_process_chain (tree base) | |
1244 | { | |
1245 | /* Process store chain. */ | |
1246 | bool ret = false; | |
1247 | if (m_store_info.length () > 1) | |
1248 | { | |
1249 | ret = coalesce_immediate_stores (); | |
1250 | if (ret) | |
1251 | ret = output_merged_stores (base); | |
1252 | } | |
1253 | ||
1254 | /* Delete all the entries we allocated ourselves. */ | |
1255 | store_immediate_info *info; | |
1256 | unsigned int i; | |
1257 | FOR_EACH_VEC_ELT (m_store_info, i, info) | |
1258 | delete info; | |
1259 | ||
1260 | merged_store_group *merged_info; | |
1261 | FOR_EACH_VEC_ELT (m_merged_store_groups, i, merged_info) | |
1262 | delete merged_info; | |
1263 | ||
1264 | return ret; | |
1265 | } | |
1266 | ||
1267 | /* Return true iff LHS is a destination potentially interesting for | |
1268 | store merging. In practice these are the codes that get_inner_reference | |
1269 | can process. */ | |
1270 | ||
1271 | static bool | |
1272 | lhs_valid_for_store_merging_p (tree lhs) | |
1273 | { | |
1274 | tree_code code = TREE_CODE (lhs); | |
1275 | ||
1276 | if (code == ARRAY_REF || code == ARRAY_RANGE_REF || code == MEM_REF | |
1277 | || code == COMPONENT_REF || code == BIT_FIELD_REF) | |
1278 | return true; | |
1279 | ||
1280 | return false; | |
1281 | } | |
1282 | ||
1283 | /* Return true if the tree RHS is a constant we want to consider | |
1284 | during store merging. In practice accept all codes that | |
1285 | native_encode_expr accepts. */ | |
1286 | ||
1287 | static bool | |
1288 | rhs_valid_for_store_merging_p (tree rhs) | |
1289 | { | |
1290 | tree type = TREE_TYPE (rhs); | |
1291 | if (TREE_CODE_CLASS (TREE_CODE (rhs)) != tcc_constant | |
1292 | || !can_native_encode_type_p (type)) | |
1293 | return false; | |
1294 | ||
1295 | return true; | |
1296 | } | |
1297 | ||
1298 | /* Entry point for the pass. Go over each basic block recording chains of | |
1299 | immediate stores. Upon encountering a terminating statement (as defined | |
1300 | by stmt_terminates_chain_p) process the recorded stores and emit the widened | |
1301 | variants. */ | |
1302 | ||
1303 | unsigned int | |
1304 | pass_store_merging::execute (function *fun) | |
1305 | { | |
1306 | basic_block bb; | |
1307 | hash_set<gimple *> orig_stmts; | |
1308 | ||
1309 | FOR_EACH_BB_FN (bb, fun) | |
1310 | { | |
1311 | gimple_stmt_iterator gsi; | |
1312 | unsigned HOST_WIDE_INT num_statements = 0; | |
1313 | /* Record the original statements so that we can keep track of | |
1314 | statements emitted in this pass and not re-process new | |
1315 | statements. */ | |
1316 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1317 | { | |
1318 | if (is_gimple_debug (gsi_stmt (gsi))) | |
1319 | continue; | |
1320 | ||
1321 | if (++num_statements > 2) | |
1322 | break; | |
1323 | } | |
1324 | ||
1325 | if (num_statements < 2) | |
1326 | continue; | |
1327 | ||
1328 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1329 | fprintf (dump_file, "Processing basic block <%d>:\n", bb->index); | |
1330 | ||
1331 | for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1332 | { | |
1333 | gimple *stmt = gsi_stmt (gsi); | |
1334 | ||
1335 | if (gimple_has_volatile_ops (stmt)) | |
1336 | { | |
1337 | /* Terminate all chains. */ | |
1338 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1339 | fprintf (dump_file, "Volatile access terminates " | |
1340 | "all chains\n"); | |
1341 | terminate_and_process_all_chains (); | |
1342 | continue; | |
1343 | } | |
1344 | ||
1345 | if (is_gimple_debug (stmt)) | |
1346 | continue; | |
1347 | ||
1348 | if (gimple_assign_single_p (stmt) && gimple_vdef (stmt) | |
1349 | && !stmt_can_throw_internal (stmt) | |
1350 | && lhs_valid_for_store_merging_p (gimple_assign_lhs (stmt))) | |
1351 | { | |
1352 | tree lhs = gimple_assign_lhs (stmt); | |
1353 | tree rhs = gimple_assign_rhs1 (stmt); | |
1354 | ||
1355 | HOST_WIDE_INT bitsize, bitpos; | |
1356 | machine_mode mode; | |
1357 | int unsignedp = 0, reversep = 0, volatilep = 0; | |
1358 | tree offset, base_addr; | |
1359 | base_addr | |
1360 | = get_inner_reference (lhs, &bitsize, &bitpos, &offset, &mode, | |
1361 | &unsignedp, &reversep, &volatilep); | |
1362 | /* As a future enhancement we could handle stores with the same | |
1363 | base and offset. */ | |
1364 | bool invalid = offset || reversep | |
1365 | || ((bitsize > MAX_BITSIZE_MODE_ANY_INT) | |
1366 | && (TREE_CODE (rhs) != INTEGER_CST)) | |
1367 | || !rhs_valid_for_store_merging_p (rhs) | |
1368 | /* An access may not be volatile itself but base_addr may be | |
1369 | a volatile decl i.e. MEM[&volatile-decl]. The hashing for | |
1370 | tree_operand_hash won't consider such stores equal to each | |
1371 | other so we can't track chains on them. */ | |
1372 | || TREE_THIS_VOLATILE (base_addr); | |
1373 | ||
1374 | /* In some cases get_inner_reference may return a | |
1375 | MEM_REF [ptr + byteoffset]. For the purposes of this pass | |
1376 | canonicalize the base_addr to MEM_REF [ptr] and take | |
1377 | byteoffset into account in the bitpos. This occurs in | |
1378 | PR 23684 and this way we can catch more chains. */ | |
1379 | if (TREE_CODE (base_addr) == MEM_REF | |
1380 | && POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (base_addr, 0)))) | |
1381 | { | |
1382 | offset_int bit_off, byte_off = mem_ref_offset (base_addr); | |
1383 | bit_off = byte_off << LOG2_BITS_PER_UNIT; | |
1384 | bit_off += bitpos; | |
1385 | if (!wi::neg_p (bit_off) && wi::fits_shwi_p (bit_off)) | |
1386 | { | |
1387 | bitpos = bit_off.to_shwi (); | |
1388 | base_addr = build2 (MEM_REF, TREE_TYPE (base_addr), | |
1389 | TREE_OPERAND (base_addr, 0), | |
1390 | build_zero_cst (TREE_TYPE ( | |
1391 | TREE_OPERAND (base_addr, 1)))); | |
1392 | } | |
1393 | else | |
1394 | invalid = true; | |
1395 | } | |
1396 | ||
1397 | struct imm_store_chain_info **chain_info | |
1398 | = m_stores.get (base_addr); | |
1399 | ||
1400 | if (!invalid) | |
1401 | { | |
1402 | store_immediate_info *info; | |
1403 | if (chain_info) | |
1404 | { | |
1405 | info = new store_immediate_info ( | |
1406 | bitsize, bitpos, rhs, lhs, stmt, | |
1407 | (*chain_info)->m_store_info.length ()); | |
1408 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1409 | { | |
1410 | fprintf (dump_file, | |
1411 | "Recording immediate store from stmt:\n"); | |
1412 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
1413 | } | |
1414 | (*chain_info)->m_store_info.safe_push (info); | |
1415 | /* If we reach the limit of stores to merge in a chain | |
1416 | terminate and process the chain now. */ | |
1417 | if ((*chain_info)->m_store_info.length () | |
1418 | == (unsigned int) | |
1419 | PARAM_VALUE (PARAM_MAX_STORES_TO_MERGE)) | |
1420 | { | |
1421 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1422 | fprintf (dump_file, | |
1423 | "Reached maximum number of statements" | |
1424 | " to merge:\n"); | |
1425 | terminate_and_release_chain (base_addr); | |
1426 | } | |
1427 | continue; | |
1428 | } | |
1429 | ||
1430 | /* Store aliases any existing chain? */ | |
1431 | terminate_all_aliasing_chains (lhs, base_addr, false, stmt); | |
1432 | /* Start a new chain. */ | |
1433 | struct imm_store_chain_info *new_chain | |
1434 | = new imm_store_chain_info; | |
1435 | info = new store_immediate_info (bitsize, bitpos, rhs, lhs, | |
1436 | stmt, 0); | |
1437 | new_chain->m_store_info.safe_push (info); | |
1438 | m_stores.put (base_addr, new_chain); | |
1439 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1440 | { | |
1441 | fprintf (dump_file, | |
1442 | "Starting new chain with statement:\n"); | |
1443 | print_gimple_stmt (dump_file, stmt, 0, 0); | |
1444 | fprintf (dump_file, "The base object is:\n"); | |
1445 | print_generic_expr (dump_file, base_addr, 0); | |
1446 | fprintf (dump_file, "\n"); | |
1447 | } | |
1448 | } | |
1449 | else | |
1450 | terminate_all_aliasing_chains (lhs, base_addr, | |
1451 | offset != NULL_TREE, stmt); | |
1452 | ||
1453 | continue; | |
1454 | } | |
1455 | ||
1456 | terminate_all_aliasing_chains (NULL_TREE, NULL_TREE, false, stmt); | |
1457 | } | |
1458 | terminate_and_process_all_chains (); | |
1459 | } | |
1460 | return 0; | |
1461 | } | |
1462 | ||
1463 | } // anon namespace | |
1464 | ||
1465 | /* Construct and return a store merging pass object. */ | |
1466 | ||
1467 | gimple_opt_pass * | |
1468 | make_pass_store_merging (gcc::context *ctxt) | |
1469 | { | |
1470 | return new pass_store_merging (ctxt); | |
1471 | } |