]>
Commit | Line | Data |
---|---|---|
dea61d92 | 1 | /* Loop distribution. |
5624e564 | 2 | Copyright (C) 2006-2015 Free Software Foundation, Inc. |
dea61d92 SP |
3 | Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> |
4 | and Sebastian Pop <sebastian.pop@amd.com>. | |
5 | ||
6 | This file is part of GCC. | |
b8698a0f | 7 | |
dea61d92 SP |
8 | GCC is free software; you can redistribute it and/or modify it |
9 | under the terms of the GNU General Public License as published by the | |
10 | Free Software Foundation; either version 3, or (at your option) any | |
11 | later version. | |
b8698a0f | 12 | |
dea61d92 SP |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT |
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
b8698a0f | 17 | |
dea61d92 SP |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | /* This pass performs loop distribution: for example, the loop | |
23 | ||
24 | |DO I = 2, N | |
25 | | A(I) = B(I) + C | |
26 | | D(I) = A(I-1)*E | |
27 | |ENDDO | |
28 | ||
b8698a0f | 29 | is transformed to |
dea61d92 SP |
30 | |
31 | |DOALL I = 2, N | |
32 | | A(I) = B(I) + C | |
33 | |ENDDO | |
34 | | | |
35 | |DOALL I = 2, N | |
36 | | D(I) = A(I-1)*E | |
37 | |ENDDO | |
38 | ||
39 | This pass uses an RDG, Reduced Dependence Graph built on top of the | |
40 | data dependence relations. The RDG is then topologically sorted to | |
41 | obtain a map of information producers/consumers based on which it | |
42 | generates the new loops. */ | |
43 | ||
44 | #include "config.h" | |
45 | #include "system.h" | |
46 | #include "coretypes.h" | |
40e23961 | 47 | #include "alias.h" |
c7131fb2 | 48 | #include "backend.h" |
9fdcd34e | 49 | #include "cfghooks.h" |
40e23961 | 50 | #include "tree.h" |
c7131fb2 | 51 | #include "gimple.h" |
60393bbc | 52 | #include "hard-reg-set.h" |
c7131fb2 AM |
53 | #include "ssa.h" |
54 | #include "options.h" | |
55 | #include "fold-const.h" | |
60393bbc | 56 | #include "cfganal.h" |
2fb9a547 | 57 | #include "internal-fn.h" |
5be5c238 | 58 | #include "gimple-iterator.h" |
18f429e2 | 59 | #include "gimplify-me.h" |
d8a2d370 | 60 | #include "stor-layout.h" |
442b4905 | 61 | #include "tree-cfg.h" |
e28030cf | 62 | #include "tree-ssa-loop-manip.h" |
442b4905 AM |
63 | #include "tree-ssa-loop.h" |
64 | #include "tree-into-ssa.h" | |
7a300452 | 65 | #include "tree-ssa.h" |
dea61d92 | 66 | #include "cfgloop.h" |
dea61d92 SP |
67 | #include "tree-chrec.h" |
68 | #include "tree-data-ref.h" | |
69 | #include "tree-scalar-evolution.h" | |
70 | #include "tree-pass.h" | |
80ab0b19 | 71 | #include "gimple-pretty-print.h" |
826a536d | 72 | #include "tree-vectorizer.h" |
80ab0b19 RB |
73 | |
74 | ||
75 | /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ | |
526ceb68 | 76 | struct rdg_vertex |
80ab0b19 RB |
77 | { |
78 | /* The statement represented by this vertex. */ | |
355fe088 | 79 | gimple *stmt; |
80ab0b19 RB |
80 | |
81 | /* Vector of data-references in this statement. */ | |
82 | vec<data_reference_p> datarefs; | |
83 | ||
84 | /* True when the statement contains a write to memory. */ | |
85 | bool has_mem_write; | |
86 | ||
87 | /* True when the statement contains a read from memory. */ | |
88 | bool has_mem_reads; | |
526ceb68 | 89 | }; |
80ab0b19 RB |
90 | |
91 | #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt | |
92 | #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs | |
93 | #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write | |
94 | #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads | |
95 | #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I])) | |
96 | #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I])) | |
97 | #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) | |
98 | #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) | |
99 | ||
100 | /* Data dependence type. */ | |
101 | ||
102 | enum rdg_dep_type | |
103 | { | |
104 | /* Read After Write (RAW). */ | |
105 | flow_dd = 'f', | |
106 | ||
36875e8f RB |
107 | /* Control dependence (execute conditional on). */ |
108 | control_dd = 'c' | |
80ab0b19 RB |
109 | }; |
110 | ||
111 | /* Dependence information attached to an edge of the RDG. */ | |
112 | ||
526ceb68 | 113 | struct rdg_edge |
80ab0b19 RB |
114 | { |
115 | /* Type of the dependence. */ | |
116 | enum rdg_dep_type type; | |
526ceb68 | 117 | }; |
80ab0b19 RB |
118 | |
119 | #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type | |
80ab0b19 | 120 | |
80ab0b19 RB |
121 | /* Dump vertex I in RDG to FILE. */ |
122 | ||
123 | static void | |
124 | dump_rdg_vertex (FILE *file, struct graph *rdg, int i) | |
125 | { | |
126 | struct vertex *v = &(rdg->vertices[i]); | |
127 | struct graph_edge *e; | |
128 | ||
129 | fprintf (file, "(vertex %d: (%s%s) (in:", i, | |
130 | RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", | |
131 | RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); | |
132 | ||
133 | if (v->pred) | |
134 | for (e = v->pred; e; e = e->pred_next) | |
135 | fprintf (file, " %d", e->src); | |
136 | ||
137 | fprintf (file, ") (out:"); | |
138 | ||
139 | if (v->succ) | |
140 | for (e = v->succ; e; e = e->succ_next) | |
141 | fprintf (file, " %d", e->dest); | |
142 | ||
143 | fprintf (file, ")\n"); | |
144 | print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); | |
145 | fprintf (file, ")\n"); | |
146 | } | |
147 | ||
148 | /* Call dump_rdg_vertex on stderr. */ | |
149 | ||
150 | DEBUG_FUNCTION void | |
151 | debug_rdg_vertex (struct graph *rdg, int i) | |
152 | { | |
153 | dump_rdg_vertex (stderr, rdg, i); | |
154 | } | |
155 | ||
80ab0b19 RB |
156 | /* Dump the reduced dependence graph RDG to FILE. */ |
157 | ||
158 | static void | |
159 | dump_rdg (FILE *file, struct graph *rdg) | |
160 | { | |
80ab0b19 | 161 | fprintf (file, "(rdg\n"); |
2fd5894f RB |
162 | for (int i = 0; i < rdg->n_vertices; i++) |
163 | dump_rdg_vertex (file, rdg, i); | |
80ab0b19 | 164 | fprintf (file, ")\n"); |
80ab0b19 RB |
165 | } |
166 | ||
167 | /* Call dump_rdg on stderr. */ | |
168 | ||
169 | DEBUG_FUNCTION void | |
170 | debug_rdg (struct graph *rdg) | |
171 | { | |
172 | dump_rdg (stderr, rdg); | |
173 | } | |
174 | ||
175 | static void | |
176 | dot_rdg_1 (FILE *file, struct graph *rdg) | |
177 | { | |
178 | int i; | |
174ec470 RB |
179 | pretty_printer buffer; |
180 | pp_needs_newline (&buffer) = false; | |
181 | buffer.buffer->stream = file; | |
80ab0b19 RB |
182 | |
183 | fprintf (file, "digraph RDG {\n"); | |
184 | ||
185 | for (i = 0; i < rdg->n_vertices; i++) | |
186 | { | |
187 | struct vertex *v = &(rdg->vertices[i]); | |
188 | struct graph_edge *e; | |
189 | ||
174ec470 RB |
190 | fprintf (file, "%d [label=\"[%d] ", i, i); |
191 | pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); | |
192 | pp_flush (&buffer); | |
193 | fprintf (file, "\"]\n"); | |
194 | ||
80ab0b19 RB |
195 | /* Highlight reads from memory. */ |
196 | if (RDG_MEM_READS_STMT (rdg, i)) | |
197 | fprintf (file, "%d [style=filled, fillcolor=green]\n", i); | |
198 | ||
199 | /* Highlight stores to memory. */ | |
200 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
201 | fprintf (file, "%d [style=filled, fillcolor=red]\n", i); | |
202 | ||
203 | if (v->succ) | |
204 | for (e = v->succ; e; e = e->succ_next) | |
205 | switch (RDGE_TYPE (e)) | |
206 | { | |
80ab0b19 RB |
207 | case flow_dd: |
208 | /* These are the most common dependences: don't print these. */ | |
209 | fprintf (file, "%d -> %d \n", i, e->dest); | |
210 | break; | |
211 | ||
36875e8f RB |
212 | case control_dd: |
213 | fprintf (file, "%d -> %d [label=control] \n", i, e->dest); | |
214 | break; | |
215 | ||
80ab0b19 RB |
216 | default: |
217 | gcc_unreachable (); | |
218 | } | |
219 | } | |
220 | ||
221 | fprintf (file, "}\n\n"); | |
222 | } | |
223 | ||
224 | /* Display the Reduced Dependence Graph using dotty. */ | |
225 | ||
226 | DEBUG_FUNCTION void | |
227 | dot_rdg (struct graph *rdg) | |
228 | { | |
174ec470 | 229 | /* When debugging, you may want to enable the following code. */ |
b6d94045 | 230 | #ifdef HAVE_POPEN |
c3284718 | 231 | FILE *file = popen ("dot -Tx11", "w"); |
174ec470 RB |
232 | if (!file) |
233 | return; | |
80ab0b19 | 234 | dot_rdg_1 (file, rdg); |
174ec470 RB |
235 | fflush (file); |
236 | close (fileno (file)); | |
237 | pclose (file); | |
80ab0b19 RB |
238 | #else |
239 | dot_rdg_1 (stderr, rdg); | |
240 | #endif | |
241 | } | |
242 | ||
243 | /* Returns the index of STMT in RDG. */ | |
244 | ||
245 | static int | |
355fe088 | 246 | rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt) |
80ab0b19 RB |
247 | { |
248 | int index = gimple_uid (stmt); | |
249 | gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); | |
250 | return index; | |
251 | } | |
252 | ||
80ab0b19 RB |
253 | /* Creates dependence edges in RDG for all the uses of DEF. IDEF is |
254 | the index of DEF in RDG. */ | |
255 | ||
256 | static void | |
257 | create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) | |
258 | { | |
259 | use_operand_p imm_use_p; | |
260 | imm_use_iterator iterator; | |
261 | ||
262 | FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) | |
263 | { | |
264 | struct graph_edge *e; | |
265 | int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); | |
266 | ||
267 | if (use < 0) | |
268 | continue; | |
269 | ||
270 | e = add_edge (rdg, idef, use); | |
271 | e->data = XNEW (struct rdg_edge); | |
272 | RDGE_TYPE (e) = flow_dd; | |
80ab0b19 RB |
273 | } |
274 | } | |
275 | ||
36875e8f RB |
276 | /* Creates an edge for the control dependences of BB to the vertex V. */ |
277 | ||
278 | static void | |
279 | create_edge_for_control_dependence (struct graph *rdg, basic_block bb, | |
280 | int v, control_dependences *cd) | |
281 | { | |
282 | bitmap_iterator bi; | |
283 | unsigned edge_n; | |
284 | EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), | |
285 | 0, edge_n, bi) | |
286 | { | |
287 | basic_block cond_bb = cd->get_edge (edge_n)->src; | |
355fe088 | 288 | gimple *stmt = last_stmt (cond_bb); |
36875e8f RB |
289 | if (stmt && is_ctrl_stmt (stmt)) |
290 | { | |
291 | struct graph_edge *e; | |
292 | int c = rdg_vertex_for_stmt (rdg, stmt); | |
293 | if (c < 0) | |
294 | continue; | |
295 | ||
296 | e = add_edge (rdg, c, v); | |
297 | e->data = XNEW (struct rdg_edge); | |
298 | RDGE_TYPE (e) = control_dd; | |
36875e8f RB |
299 | } |
300 | } | |
301 | } | |
302 | ||
80ab0b19 RB |
303 | /* Creates the edges of the reduced dependence graph RDG. */ |
304 | ||
305 | static void | |
447f3223 | 306 | create_rdg_flow_edges (struct graph *rdg) |
80ab0b19 RB |
307 | { |
308 | int i; | |
80ab0b19 RB |
309 | def_operand_p def_p; |
310 | ssa_op_iter iter; | |
311 | ||
80ab0b19 RB |
312 | for (i = 0; i < rdg->n_vertices; i++) |
313 | FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), | |
314 | iter, SSA_OP_DEF) | |
315 | create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); | |
447f3223 | 316 | } |
36875e8f | 317 | |
447f3223 RB |
318 | /* Creates the edges of the reduced dependence graph RDG. */ |
319 | ||
320 | static void | |
321 | create_rdg_cd_edges (struct graph *rdg, control_dependences *cd) | |
322 | { | |
323 | int i; | |
324 | ||
325 | for (i = 0; i < rdg->n_vertices; i++) | |
326 | { | |
355fe088 | 327 | gimple *stmt = RDG_STMT (rdg, i); |
447f3223 RB |
328 | if (gimple_code (stmt) == GIMPLE_PHI) |
329 | { | |
330 | edge_iterator ei; | |
331 | edge e; | |
332 | FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) | |
36875e8f | 333 | create_edge_for_control_dependence (rdg, e->src, i, cd); |
447f3223 RB |
334 | } |
335 | else | |
336 | create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); | |
337 | } | |
80ab0b19 RB |
338 | } |
339 | ||
340 | /* Build the vertices of the reduced dependence graph RDG. Return false | |
341 | if that failed. */ | |
342 | ||
343 | static bool | |
355fe088 | 344 | create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop, |
80ab0b19 RB |
345 | vec<data_reference_p> *datarefs) |
346 | { | |
347 | int i; | |
355fe088 | 348 | gimple *stmt; |
80ab0b19 RB |
349 | |
350 | FOR_EACH_VEC_ELT (stmts, i, stmt) | |
351 | { | |
352 | struct vertex *v = &(rdg->vertices[i]); | |
353 | ||
354 | /* Record statement to vertex mapping. */ | |
355 | gimple_set_uid (stmt, i); | |
356 | ||
357 | v->data = XNEW (struct rdg_vertex); | |
358 | RDGV_STMT (v) = stmt; | |
359 | RDGV_DATAREFS (v).create (0); | |
360 | RDGV_HAS_MEM_WRITE (v) = false; | |
361 | RDGV_HAS_MEM_READS (v) = false; | |
362 | if (gimple_code (stmt) == GIMPLE_PHI) | |
363 | continue; | |
364 | ||
365 | unsigned drp = datarefs->length (); | |
366 | if (!find_data_references_in_stmt (loop, stmt, datarefs)) | |
367 | return false; | |
368 | for (unsigned j = drp; j < datarefs->length (); ++j) | |
369 | { | |
370 | data_reference_p dr = (*datarefs)[j]; | |
371 | if (DR_IS_READ (dr)) | |
372 | RDGV_HAS_MEM_READS (v) = true; | |
373 | else | |
374 | RDGV_HAS_MEM_WRITE (v) = true; | |
375 | RDGV_DATAREFS (v).safe_push (dr); | |
376 | } | |
377 | } | |
378 | return true; | |
379 | } | |
380 | ||
2fd5894f | 381 | /* Initialize STMTS with all the statements of LOOP. The order in |
80ab0b19 RB |
382 | which we discover statements is important as |
383 | generate_loops_for_partition is using the same traversal for | |
2fd5894f | 384 | identifying statements in loop copies. */ |
80ab0b19 RB |
385 | |
386 | static void | |
355fe088 | 387 | stmts_from_loop (struct loop *loop, vec<gimple *> *stmts) |
80ab0b19 RB |
388 | { |
389 | unsigned int i; | |
390 | basic_block *bbs = get_loop_body_in_dom_order (loop); | |
391 | ||
392 | for (i = 0; i < loop->num_nodes; i++) | |
393 | { | |
394 | basic_block bb = bbs[i]; | |
80ab0b19 | 395 | |
538dd0b7 DM |
396 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); |
397 | gsi_next (&bsi)) | |
398 | if (!virtual_operand_p (gimple_phi_result (bsi.phi ()))) | |
399 | stmts->safe_push (bsi.phi ()); | |
80ab0b19 | 400 | |
538dd0b7 DM |
401 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); |
402 | gsi_next (&bsi)) | |
80ab0b19 | 403 | { |
355fe088 | 404 | gimple *stmt = gsi_stmt (bsi); |
80ab0b19 RB |
405 | if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) |
406 | stmts->safe_push (stmt); | |
407 | } | |
408 | } | |
409 | ||
410 | free (bbs); | |
411 | } | |
412 | ||
80ab0b19 RB |
413 | /* Free the reduced dependence graph RDG. */ |
414 | ||
415 | static void | |
416 | free_rdg (struct graph *rdg) | |
417 | { | |
418 | int i; | |
419 | ||
420 | for (i = 0; i < rdg->n_vertices; i++) | |
421 | { | |
422 | struct vertex *v = &(rdg->vertices[i]); | |
423 | struct graph_edge *e; | |
424 | ||
425 | for (e = v->succ; e; e = e->succ_next) | |
447f3223 | 426 | free (e->data); |
80ab0b19 RB |
427 | |
428 | if (v->data) | |
429 | { | |
430 | gimple_set_uid (RDGV_STMT (v), -1); | |
431 | free_data_refs (RDGV_DATAREFS (v)); | |
432 | free (v->data); | |
433 | } | |
434 | } | |
435 | ||
436 | free_graph (rdg); | |
437 | } | |
438 | ||
439 | /* Build the Reduced Dependence Graph (RDG) with one vertex per | |
97463b2b | 440 | statement of the loop nest LOOP_NEST, and one edge per data dependence or |
80ab0b19 RB |
441 | scalar dependence. */ |
442 | ||
443 | static struct graph * | |
36875e8f | 444 | build_rdg (vec<loop_p> loop_nest, control_dependences *cd) |
80ab0b19 RB |
445 | { |
446 | struct graph *rdg; | |
80ab0b19 | 447 | vec<data_reference_p> datarefs; |
80ab0b19 | 448 | |
97463b2b | 449 | /* Create the RDG vertices from the stmts of the loop nest. */ |
355fe088 | 450 | auto_vec<gimple *, 10> stmts; |
97463b2b | 451 | stmts_from_loop (loop_nest[0], &stmts); |
24f161fd | 452 | rdg = new_graph (stmts.length ()); |
80ab0b19 | 453 | datarefs.create (10); |
97463b2b | 454 | if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs)) |
80ab0b19 | 455 | { |
97463b2b | 456 | datarefs.release (); |
80ab0b19 RB |
457 | free_rdg (rdg); |
458 | return NULL; | |
459 | } | |
460 | stmts.release (); | |
97463b2b | 461 | |
447f3223 RB |
462 | create_rdg_flow_edges (rdg); |
463 | if (cd) | |
464 | create_rdg_cd_edges (rdg, cd); | |
465 | ||
97463b2b | 466 | datarefs.release (); |
80ab0b19 RB |
467 | |
468 | return rdg; | |
469 | } | |
470 | ||
80ab0b19 | 471 | |
dea61d92 | 472 | |
b9fc0497 | 473 | enum partition_kind { |
826a536d | 474 | PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY |
b9fc0497 | 475 | }; |
30d55936 | 476 | |
526ceb68 | 477 | struct partition |
c61f8985 RG |
478 | { |
479 | bitmap stmts; | |
80ab0b19 | 480 | bitmap loops; |
826a536d | 481 | bool reduction_p; |
30d55936 | 482 | enum partition_kind kind; |
d0582dc1 RG |
483 | /* data-references a kind != PKIND_NORMAL partition is about. */ |
484 | data_reference_p main_dr; | |
485 | data_reference_p secondary_dr; | |
818625cf | 486 | tree niter; |
d995e887 | 487 | bool plus_one; |
526ceb68 | 488 | }; |
c61f8985 | 489 | |
c61f8985 RG |
490 | |
491 | /* Allocate and initialize a partition from BITMAP. */ | |
492 | ||
526ceb68 | 493 | static partition * |
80ab0b19 | 494 | partition_alloc (bitmap stmts, bitmap loops) |
c61f8985 | 495 | { |
526ceb68 | 496 | partition *partition = XCNEW (struct partition); |
c61f8985 | 497 | partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL); |
80ab0b19 | 498 | partition->loops = loops ? loops : BITMAP_ALLOC (NULL); |
826a536d | 499 | partition->reduction_p = false; |
30d55936 | 500 | partition->kind = PKIND_NORMAL; |
c61f8985 RG |
501 | return partition; |
502 | } | |
503 | ||
504 | /* Free PARTITION. */ | |
505 | ||
506 | static void | |
526ceb68 | 507 | partition_free (partition *partition) |
c61f8985 RG |
508 | { |
509 | BITMAP_FREE (partition->stmts); | |
80ab0b19 | 510 | BITMAP_FREE (partition->loops); |
c61f8985 RG |
511 | free (partition); |
512 | } | |
513 | ||
30d55936 RG |
514 | /* Returns true if the partition can be generated as a builtin. */ |
515 | ||
516 | static bool | |
526ceb68 | 517 | partition_builtin_p (partition *partition) |
30d55936 | 518 | { |
826a536d | 519 | return partition->kind != PKIND_NORMAL; |
30d55936 | 520 | } |
c61f8985 | 521 | |
826a536d | 522 | /* Returns true if the partition contains a reduction. */ |
7ad672e4 RG |
523 | |
524 | static bool | |
526ceb68 | 525 | partition_reduction_p (partition *partition) |
7ad672e4 | 526 | { |
826a536d | 527 | return partition->reduction_p; |
7ad672e4 RG |
528 | } |
529 | ||
826a536d RB |
530 | /* Merge PARTITION into the partition DEST. */ |
531 | ||
532 | static void | |
526ceb68 | 533 | partition_merge_into (partition *dest, partition *partition) |
826a536d RB |
534 | { |
535 | dest->kind = PKIND_NORMAL; | |
536 | bitmap_ior_into (dest->stmts, partition->stmts); | |
537 | if (partition_reduction_p (partition)) | |
538 | dest->reduction_p = true; | |
539 | } | |
540 | ||
541 | ||
c07a8cb3 RG |
542 | /* Returns true when DEF is an SSA_NAME defined in LOOP and used after |
543 | the LOOP. */ | |
544 | ||
545 | static bool | |
546 | ssa_name_has_uses_outside_loop_p (tree def, loop_p loop) | |
547 | { | |
548 | imm_use_iterator imm_iter; | |
549 | use_operand_p use_p; | |
550 | ||
551 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) | |
e665269a | 552 | { |
355fe088 | 553 | gimple *use_stmt = USE_STMT (use_p); |
e665269a RG |
554 | if (!is_gimple_debug (use_stmt) |
555 | && loop != loop_containing_stmt (use_stmt)) | |
556 | return true; | |
557 | } | |
c07a8cb3 RG |
558 | |
559 | return false; | |
560 | } | |
561 | ||
562 | /* Returns true when STMT defines a scalar variable used after the | |
88af7c1a | 563 | loop LOOP. */ |
c07a8cb3 RG |
564 | |
565 | static bool | |
355fe088 | 566 | stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt) |
c07a8cb3 | 567 | { |
88af7c1a RG |
568 | def_operand_p def_p; |
569 | ssa_op_iter op_iter; | |
c07a8cb3 | 570 | |
9ca86fc3 RG |
571 | if (gimple_code (stmt) == GIMPLE_PHI) |
572 | return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop); | |
573 | ||
88af7c1a RG |
574 | FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) |
575 | if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop)) | |
576 | return true; | |
c07a8cb3 | 577 | |
88af7c1a | 578 | return false; |
c07a8cb3 RG |
579 | } |
580 | ||
dea61d92 SP |
581 | /* Return a copy of LOOP placed before LOOP. */ |
582 | ||
583 | static struct loop * | |
584 | copy_loop_before (struct loop *loop) | |
585 | { | |
586 | struct loop *res; | |
587 | edge preheader = loop_preheader_edge (loop); | |
588 | ||
dea61d92 | 589 | initialize_original_copy_tables (); |
5ce9450f | 590 | res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); |
30d55936 | 591 | gcc_assert (res != NULL); |
dea61d92 | 592 | free_original_copy_tables (); |
2cfc56b9 | 593 | delete_update_ssa (); |
dea61d92 SP |
594 | |
595 | return res; | |
596 | } | |
597 | ||
598 | /* Creates an empty basic block after LOOP. */ | |
599 | ||
600 | static void | |
601 | create_bb_after_loop (struct loop *loop) | |
602 | { | |
603 | edge exit = single_exit (loop); | |
604 | ||
605 | if (!exit) | |
606 | return; | |
607 | ||
608 | split_edge (exit); | |
609 | } | |
610 | ||
611 | /* Generate code for PARTITION from the code in LOOP. The loop is | |
612 | copied when COPY_P is true. All the statements not flagged in the | |
613 | PARTITION bitmap are removed from the loop or from its copy. The | |
614 | statements are indexed in sequence inside a basic block, and the | |
30d55936 | 615 | basic blocks of a loop are taken in dom order. */ |
dea61d92 | 616 | |
30d55936 | 617 | static void |
526ceb68 | 618 | generate_loops_for_partition (struct loop *loop, partition *partition, |
c61f8985 | 619 | bool copy_p) |
dea61d92 | 620 | { |
2fd5894f | 621 | unsigned i; |
dea61d92 SP |
622 | basic_block *bbs; |
623 | ||
624 | if (copy_p) | |
625 | { | |
626 | loop = copy_loop_before (loop); | |
30d55936 | 627 | gcc_assert (loop != NULL); |
dea61d92 SP |
628 | create_preheader (loop, CP_SIMPLE_PREHEADERS); |
629 | create_bb_after_loop (loop); | |
630 | } | |
631 | ||
2fd5894f | 632 | /* Remove stmts not in the PARTITION bitmap. */ |
dea61d92 SP |
633 | bbs = get_loop_body_in_dom_order (loop); |
634 | ||
b03c3082 | 635 | if (MAY_HAVE_DEBUG_STMTS) |
2fd5894f | 636 | for (i = 0; i < loop->num_nodes; i++) |
b03c3082 JJ |
637 | { |
638 | basic_block bb = bbs[i]; | |
639 | ||
538dd0b7 DM |
640 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); |
641 | gsi_next (&bsi)) | |
2fd5894f | 642 | { |
538dd0b7 | 643 | gphi *phi = bsi.phi (); |
2fd5894f RB |
644 | if (!virtual_operand_p (gimple_phi_result (phi)) |
645 | && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) | |
646 | reset_debug_uses (phi); | |
647 | } | |
b03c3082 | 648 | |
538dd0b7 | 649 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) |
b03c3082 | 650 | { |
355fe088 | 651 | gimple *stmt = gsi_stmt (bsi); |
b03c3082 JJ |
652 | if (gimple_code (stmt) != GIMPLE_LABEL |
653 | && !is_gimple_debug (stmt) | |
2fd5894f | 654 | && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) |
b03c3082 JJ |
655 | reset_debug_uses (stmt); |
656 | } | |
657 | } | |
658 | ||
2fd5894f | 659 | for (i = 0; i < loop->num_nodes; i++) |
dea61d92 SP |
660 | { |
661 | basic_block bb = bbs[i]; | |
dea61d92 | 662 | |
538dd0b7 | 663 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) |
2fd5894f | 664 | { |
538dd0b7 | 665 | gphi *phi = bsi.phi (); |
2fd5894f RB |
666 | if (!virtual_operand_p (gimple_phi_result (phi)) |
667 | && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) | |
2706a615 | 668 | remove_phi_node (&bsi, true); |
2fd5894f RB |
669 | else |
670 | gsi_next (&bsi); | |
671 | } | |
dea61d92 | 672 | |
538dd0b7 | 673 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) |
2706a615 | 674 | { |
355fe088 | 675 | gimple *stmt = gsi_stmt (bsi); |
b03c3082 JJ |
676 | if (gimple_code (stmt) != GIMPLE_LABEL |
677 | && !is_gimple_debug (stmt) | |
2fd5894f | 678 | && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) |
2706a615 | 679 | { |
36875e8f RB |
680 | /* Choose an arbitrary path through the empty CFG part |
681 | that this unnecessary control stmt controls. */ | |
538dd0b7 | 682 | if (gcond *cond_stmt = dyn_cast <gcond *> (stmt)) |
36875e8f | 683 | { |
538dd0b7 | 684 | gimple_cond_make_false (cond_stmt); |
36875e8f RB |
685 | update_stmt (stmt); |
686 | } | |
687 | else if (gimple_code (stmt) == GIMPLE_SWITCH) | |
688 | { | |
538dd0b7 | 689 | gswitch *switch_stmt = as_a <gswitch *> (stmt); |
36875e8f | 690 | gimple_switch_set_index |
538dd0b7 | 691 | (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1))); |
36875e8f RB |
692 | update_stmt (stmt); |
693 | } | |
694 | else | |
695 | { | |
696 | unlink_stmt_vdef (stmt); | |
697 | gsi_remove (&bsi, true); | |
698 | release_defs (stmt); | |
699 | continue; | |
700 | } | |
2706a615 | 701 | } |
36875e8f | 702 | gsi_next (&bsi); |
2706a615 | 703 | } |
dea61d92 SP |
704 | } |
705 | ||
706 | free (bbs); | |
dea61d92 SP |
707 | } |
708 | ||
d0582dc1 | 709 | /* Build the size argument for a memory operation call. */ |
3661e899 | 710 | |
d0582dc1 | 711 | static tree |
d995e887 RB |
712 | build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter, |
713 | bool plus_one) | |
3661e899 | 714 | { |
d995e887 RB |
715 | tree size = fold_convert_loc (loc, sizetype, nb_iter); |
716 | if (plus_one) | |
717 | size = size_binop (PLUS_EXPR, size, size_one_node); | |
718 | size = fold_build2_loc (loc, MULT_EXPR, sizetype, size, | |
d0582dc1 | 719 | TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); |
d995e887 RB |
720 | size = fold_convert_loc (loc, size_type_node, size); |
721 | return size; | |
d0582dc1 RG |
722 | } |
723 | ||
724 | /* Build an address argument for a memory operation call. */ | |
725 | ||
726 | static tree | |
727 | build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) | |
728 | { | |
729 | tree addr_base; | |
730 | ||
731 | addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); | |
732 | addr_base = fold_convert_loc (loc, sizetype, addr_base); | |
733 | ||
734 | /* Test for a negative stride, iterating over every element. */ | |
735 | if (tree_int_cst_sgn (DR_STEP (dr)) == -1) | |
736 | { | |
737 | addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, | |
738 | fold_convert_loc (loc, sizetype, nb_bytes)); | |
739 | addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, | |
740 | TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); | |
741 | } | |
742 | ||
743 | return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); | |
3661e899 TB |
744 | } |
745 | ||
401f3a81 JJ |
746 | /* If VAL memory representation contains the same value in all bytes, |
747 | return that value, otherwise return -1. | |
748 | E.g. for 0x24242424 return 0x24, for IEEE double | |
749 | 747708026454360457216.0 return 0x44, etc. */ | |
750 | ||
751 | static int | |
752 | const_with_all_bytes_same (tree val) | |
753 | { | |
754 | unsigned char buf[64]; | |
755 | int i, len; | |
756 | ||
757 | if (integer_zerop (val) | |
758 | || real_zerop (val) | |
759 | || (TREE_CODE (val) == CONSTRUCTOR | |
760 | && !TREE_CLOBBER_P (val) | |
761 | && CONSTRUCTOR_NELTS (val) == 0)) | |
762 | return 0; | |
763 | ||
764 | if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) | |
765 | return -1; | |
766 | ||
767 | len = native_encode_expr (val, buf, sizeof (buf)); | |
768 | if (len == 0) | |
769 | return -1; | |
770 | for (i = 1; i < len; i++) | |
771 | if (buf[i] != buf[0]) | |
772 | return -1; | |
773 | return buf[0]; | |
774 | } | |
775 | ||
30d55936 | 776 | /* Generate a call to memset for PARTITION in LOOP. */ |
dea61d92 | 777 | |
cfee318d | 778 | static void |
526ceb68 | 779 | generate_memset_builtin (struct loop *loop, partition *partition) |
dea61d92 | 780 | { |
30d55936 | 781 | gimple_stmt_iterator gsi; |
355fe088 | 782 | gimple *stmt, *fn_call; |
818625cf | 783 | tree mem, fn, nb_bytes; |
30d55936 | 784 | location_t loc; |
b6dd5261 | 785 | tree val; |
30d55936 | 786 | |
d0582dc1 | 787 | stmt = DR_STMT (partition->main_dr); |
30d55936 | 788 | loc = gimple_location (stmt); |
30d55936 RG |
789 | |
790 | /* The new statements will be placed before LOOP. */ | |
791 | gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | |
dea61d92 | 792 | |
d995e887 RB |
793 | nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, |
794 | partition->plus_one); | |
d0582dc1 RG |
795 | nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, |
796 | false, GSI_CONTINUE_LINKING); | |
797 | mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); | |
798 | mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, | |
799 | false, GSI_CONTINUE_LINKING); | |
dea61d92 | 800 | |
b6dd5261 RG |
801 | /* This exactly matches the pattern recognition in classify_partition. */ |
802 | val = gimple_assign_rhs1 (stmt); | |
401f3a81 JJ |
803 | /* Handle constants like 0x15151515 and similarly |
804 | floating point constants etc. where all bytes are the same. */ | |
805 | int bytev = const_with_all_bytes_same (val); | |
806 | if (bytev != -1) | |
807 | val = build_int_cst (integer_type_node, bytev); | |
808 | else if (TREE_CODE (val) == INTEGER_CST) | |
809 | val = fold_convert (integer_type_node, val); | |
810 | else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) | |
b6dd5261 | 811 | { |
b731b390 | 812 | tree tem = make_ssa_name (integer_type_node); |
355fe088 | 813 | gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val); |
401f3a81 JJ |
814 | gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); |
815 | val = tem; | |
b6dd5261 RG |
816 | } |
817 | ||
e79983f4 | 818 | fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); |
b6dd5261 | 819 | fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); |
d0582dc1 | 820 | gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); |
dea61d92 SP |
821 | |
822 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
b6dd5261 RG |
823 | { |
824 | fprintf (dump_file, "generated memset"); | |
401f3a81 | 825 | if (bytev == 0) |
b6dd5261 | 826 | fprintf (dump_file, " zero\n"); |
b6dd5261 RG |
827 | else |
828 | fprintf (dump_file, "\n"); | |
829 | } | |
dea61d92 SP |
830 | } |
831 | ||
d0582dc1 RG |
832 | /* Generate a call to memcpy for PARTITION in LOOP. */ |
833 | ||
834 | static void | |
526ceb68 | 835 | generate_memcpy_builtin (struct loop *loop, partition *partition) |
d0582dc1 RG |
836 | { |
837 | gimple_stmt_iterator gsi; | |
355fe088 | 838 | gimple *stmt, *fn_call; |
818625cf | 839 | tree dest, src, fn, nb_bytes; |
d0582dc1 RG |
840 | location_t loc; |
841 | enum built_in_function kind; | |
842 | ||
843 | stmt = DR_STMT (partition->main_dr); | |
844 | loc = gimple_location (stmt); | |
d0582dc1 RG |
845 | |
846 | /* The new statements will be placed before LOOP. */ | |
847 | gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | |
848 | ||
d995e887 RB |
849 | nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, |
850 | partition->plus_one); | |
d0582dc1 RG |
851 | nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, |
852 | false, GSI_CONTINUE_LINKING); | |
853 | dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); | |
854 | src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); | |
855 | if (ptr_derefs_may_alias_p (dest, src)) | |
856 | kind = BUILT_IN_MEMMOVE; | |
857 | else | |
858 | kind = BUILT_IN_MEMCPY; | |
859 | ||
860 | dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, | |
861 | false, GSI_CONTINUE_LINKING); | |
862 | src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, | |
863 | false, GSI_CONTINUE_LINKING); | |
864 | fn = build_fold_addr_expr (builtin_decl_implicit (kind)); | |
865 | fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); | |
866 | gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); | |
867 | ||
868 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
869 | { | |
870 | if (kind == BUILT_IN_MEMCPY) | |
871 | fprintf (dump_file, "generated memcpy\n"); | |
872 | else | |
873 | fprintf (dump_file, "generated memmove\n"); | |
874 | } | |
875 | } | |
876 | ||
30d55936 | 877 | /* Remove and destroy the loop LOOP. */ |
dea61d92 | 878 | |
30d55936 RG |
879 | static void |
880 | destroy_loop (struct loop *loop) | |
dea61d92 | 881 | { |
30d55936 RG |
882 | unsigned nbbs = loop->num_nodes; |
883 | edge exit = single_exit (loop); | |
884 | basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; | |
dea61d92 | 885 | basic_block *bbs; |
30d55936 | 886 | unsigned i; |
dea61d92 SP |
887 | |
888 | bbs = get_loop_body_in_dom_order (loop); | |
889 | ||
30d55936 RG |
890 | redirect_edge_pred (exit, src); |
891 | exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); | |
892 | exit->flags |= EDGE_FALLTHRU; | |
893 | cancel_loop_tree (loop); | |
894 | rescan_loop_exit (exit, false, true); | |
dea61d92 | 895 | |
30d55936 | 896 | for (i = 0; i < nbbs; i++) |
c014f6f5 RG |
897 | { |
898 | /* We have made sure to not leave any dangling uses of SSA | |
899 | names defined in the loop. With the exception of virtuals. | |
900 | Make sure we replace all uses of virtual defs that will remain | |
901 | outside of the loop with the bare symbol as delete_basic_block | |
902 | will release them. */ | |
538dd0b7 DM |
903 | for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); |
904 | gsi_next (&gsi)) | |
c014f6f5 | 905 | { |
538dd0b7 | 906 | gphi *phi = gsi.phi (); |
ea057359 | 907 | if (virtual_operand_p (gimple_phi_result (phi))) |
c014f6f5 RG |
908 | mark_virtual_phi_result_for_renaming (phi); |
909 | } | |
538dd0b7 DM |
910 | for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); |
911 | gsi_next (&gsi)) | |
c014f6f5 | 912 | { |
355fe088 | 913 | gimple *stmt = gsi_stmt (gsi); |
c014f6f5 RG |
914 | tree vdef = gimple_vdef (stmt); |
915 | if (vdef && TREE_CODE (vdef) == SSA_NAME) | |
916 | mark_virtual_operand_for_renaming (vdef); | |
917 | } | |
918 | delete_basic_block (bbs[i]); | |
919 | } | |
dea61d92 | 920 | free (bbs); |
30d55936 RG |
921 | |
922 | set_immediate_dominator (CDI_DOMINATORS, dest, | |
923 | recompute_dominator (CDI_DOMINATORS, dest)); | |
dea61d92 SP |
924 | } |
925 | ||
30d55936 | 926 | /* Generates code for PARTITION. */ |
dea61d92 | 927 | |
30d55936 | 928 | static void |
d0582dc1 | 929 | generate_code_for_partition (struct loop *loop, |
526ceb68 | 930 | partition *partition, bool copy_p) |
dea61d92 | 931 | { |
30d55936 RG |
932 | switch (partition->kind) |
933 | { | |
826a536d RB |
934 | case PKIND_NORMAL: |
935 | /* Reductions all have to be in the last partition. */ | |
936 | gcc_assert (!partition_reduction_p (partition) | |
937 | || !copy_p); | |
938 | generate_loops_for_partition (loop, partition, copy_p); | |
939 | return; | |
940 | ||
30d55936 | 941 | case PKIND_MEMSET: |
d0582dc1 | 942 | generate_memset_builtin (loop, partition); |
d0582dc1 RG |
943 | break; |
944 | ||
945 | case PKIND_MEMCPY: | |
946 | generate_memcpy_builtin (loop, partition); | |
30d55936 RG |
947 | break; |
948 | ||
949 | default: | |
950 | gcc_unreachable (); | |
951 | } | |
dea61d92 | 952 | |
826a536d RB |
953 | /* Common tail for partitions we turn into a call. If this was the last |
954 | partition for which we generate code, we have to destroy the loop. */ | |
955 | if (!copy_p) | |
956 | destroy_loop (loop); | |
dea61d92 SP |
957 | } |
958 | ||
dea61d92 | 959 | |
24f161fd RB |
960 | /* Returns a partition with all the statements needed for computing |
961 | the vertex V of the RDG, also including the loop exit conditions. */ | |
dea61d92 | 962 | |
526ceb68 | 963 | static partition * |
24f161fd | 964 | build_rdg_partition_for_vertex (struct graph *rdg, int v) |
dea61d92 | 965 | { |
526ceb68 | 966 | partition *partition = partition_alloc (NULL, NULL); |
00f96dc9 | 967 | auto_vec<int, 3> nodes; |
24f161fd | 968 | unsigned i; |
dea61d92 SP |
969 | int x; |
970 | ||
174ec470 | 971 | graphds_dfs (rdg, &v, 1, &nodes, false, NULL); |
dea61d92 | 972 | |
9771b263 | 973 | FOR_EACH_VEC_ELT (nodes, i, x) |
24f161fd RB |
974 | { |
975 | bitmap_set_bit (partition->stmts, x); | |
976 | bitmap_set_bit (partition->loops, | |
977 | loop_containing_stmt (RDG_STMT (rdg, x))->num); | |
978 | } | |
dea61d92 | 979 | |
dea61d92 SP |
980 | return partition; |
981 | } | |
982 | ||
30d55936 RG |
983 | /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. |
984 | For the moment we detect only the memset zero pattern. */ | |
cfee318d | 985 | |
30d55936 | 986 | static void |
526ceb68 | 987 | classify_partition (loop_p loop, struct graph *rdg, partition *partition) |
cfee318d | 988 | { |
cfee318d | 989 | bitmap_iterator bi; |
30d55936 RG |
990 | unsigned i; |
991 | tree nb_iter; | |
d0582dc1 | 992 | data_reference_p single_load, single_store; |
b9fc0497 | 993 | bool volatiles_p = false; |
d995e887 | 994 | bool plus_one = false; |
30d55936 RG |
995 | |
996 | partition->kind = PKIND_NORMAL; | |
d0582dc1 RG |
997 | partition->main_dr = NULL; |
998 | partition->secondary_dr = NULL; | |
818625cf | 999 | partition->niter = NULL_TREE; |
d995e887 | 1000 | partition->plus_one = false; |
30d55936 | 1001 | |
c61f8985 | 1002 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) |
30d55936 | 1003 | { |
355fe088 | 1004 | gimple *stmt = RDG_STMT (rdg, i); |
30d55936 RG |
1005 | |
1006 | if (gimple_has_volatile_ops (stmt)) | |
b9fc0497 | 1007 | volatiles_p = true; |
cfee318d | 1008 | |
826a536d | 1009 | /* If the stmt has uses outside of the loop mark it as reduction. */ |
9ca86fc3 | 1010 | if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) |
30d55936 | 1011 | { |
826a536d | 1012 | partition->reduction_p = true; |
30d55936 RG |
1013 | return; |
1014 | } | |
1015 | } | |
1016 | ||
b9fc0497 RB |
1017 | /* Perform general partition disqualification for builtins. */ |
1018 | if (volatiles_p | |
1019 | || !flag_tree_loop_distribute_patterns) | |
1020 | return; | |
1021 | ||
d0582dc1 RG |
1022 | /* Detect memset and memcpy. */ |
1023 | single_load = NULL; | |
1024 | single_store = NULL; | |
30d55936 RG |
1025 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) |
1026 | { | |
355fe088 | 1027 | gimple *stmt = RDG_STMT (rdg, i); |
d0582dc1 RG |
1028 | data_reference_p dr; |
1029 | unsigned j; | |
30d55936 RG |
1030 | |
1031 | if (gimple_code (stmt) == GIMPLE_PHI) | |
1032 | continue; | |
1033 | ||
1034 | /* Any scalar stmts are ok. */ | |
1035 | if (!gimple_vuse (stmt)) | |
1036 | continue; | |
1037 | ||
d0582dc1 RG |
1038 | /* Otherwise just regular loads/stores. */ |
1039 | if (!gimple_assign_single_p (stmt)) | |
1040 | return; | |
1041 | ||
1042 | /* But exactly one store and/or load. */ | |
9771b263 | 1043 | for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j) |
30d55936 | 1044 | { |
d0582dc1 RG |
1045 | if (DR_IS_READ (dr)) |
1046 | { | |
1047 | if (single_load != NULL) | |
1048 | return; | |
1049 | single_load = dr; | |
1050 | } | |
1051 | else | |
1052 | { | |
1053 | if (single_store != NULL) | |
1054 | return; | |
1055 | single_store = dr; | |
1056 | } | |
30d55936 | 1057 | } |
30d55936 RG |
1058 | } |
1059 | ||
818625cf RB |
1060 | if (!single_store) |
1061 | return; | |
1062 | ||
d995e887 | 1063 | nb_iter = number_of_latch_executions (loop); |
818625cf RB |
1064 | if (!nb_iter || nb_iter == chrec_dont_know) |
1065 | return; | |
d995e887 RB |
1066 | if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, |
1067 | gimple_bb (DR_STMT (single_store)))) | |
1068 | plus_one = true; | |
818625cf | 1069 | |
d0582dc1 RG |
1070 | if (single_store && !single_load) |
1071 | { | |
355fe088 | 1072 | gimple *stmt = DR_STMT (single_store); |
d0582dc1 | 1073 | tree rhs = gimple_assign_rhs1 (stmt); |
401f3a81 JJ |
1074 | if (const_with_all_bytes_same (rhs) == -1 |
1075 | && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs)) | |
1076 | || (TYPE_MODE (TREE_TYPE (rhs)) | |
1077 | != TYPE_MODE (unsigned_char_type_node)))) | |
d0582dc1 RG |
1078 | return; |
1079 | if (TREE_CODE (rhs) == SSA_NAME | |
1080 | && !SSA_NAME_IS_DEFAULT_DEF (rhs) | |
1081 | && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) | |
1082 | return; | |
ca406576 RB |
1083 | if (!adjacent_dr_p (single_store) |
1084 | || !dominated_by_p (CDI_DOMINATORS, | |
1085 | loop->latch, gimple_bb (stmt))) | |
d0582dc1 RG |
1086 | return; |
1087 | partition->kind = PKIND_MEMSET; | |
1088 | partition->main_dr = single_store; | |
818625cf | 1089 | partition->niter = nb_iter; |
d995e887 | 1090 | partition->plus_one = plus_one; |
d0582dc1 RG |
1091 | } |
1092 | else if (single_store && single_load) | |
1093 | { | |
355fe088 TS |
1094 | gimple *store = DR_STMT (single_store); |
1095 | gimple *load = DR_STMT (single_load); | |
d0582dc1 RG |
1096 | /* Direct aggregate copy or via an SSA name temporary. */ |
1097 | if (load != store | |
1098 | && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) | |
1099 | return; | |
1100 | if (!adjacent_dr_p (single_store) | |
1101 | || !adjacent_dr_p (single_load) | |
1102 | || !operand_equal_p (DR_STEP (single_store), | |
ca406576 RB |
1103 | DR_STEP (single_load), 0) |
1104 | || !dominated_by_p (CDI_DOMINATORS, | |
1105 | loop->latch, gimple_bb (store))) | |
d0582dc1 | 1106 | return; |
f20132e7 RG |
1107 | /* Now check that if there is a dependence this dependence is |
1108 | of a suitable form for memmove. */ | |
6e1aa848 | 1109 | vec<loop_p> loops = vNULL; |
f20132e7 | 1110 | ddr_p ddr; |
9771b263 | 1111 | loops.safe_push (loop); |
f20132e7 RG |
1112 | ddr = initialize_data_dependence_relation (single_load, single_store, |
1113 | loops); | |
1114 | compute_affine_dependence (ddr, loop); | |
f20132e7 RG |
1115 | if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
1116 | { | |
1117 | free_dependence_relation (ddr); | |
9771b263 | 1118 | loops.release (); |
f20132e7 RG |
1119 | return; |
1120 | } | |
1121 | if (DDR_ARE_DEPENDENT (ddr) != chrec_known) | |
1122 | { | |
1123 | if (DDR_NUM_DIST_VECTS (ddr) == 0) | |
1124 | { | |
1125 | free_dependence_relation (ddr); | |
9771b263 | 1126 | loops.release (); |
f20132e7 RG |
1127 | return; |
1128 | } | |
1129 | lambda_vector dist_v; | |
9771b263 | 1130 | FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) |
f20132e7 RG |
1131 | { |
1132 | int dist = dist_v[index_in_loop_nest (loop->num, | |
1133 | DDR_LOOP_NEST (ddr))]; | |
1134 | if (dist > 0 && !DDR_REVERSED_P (ddr)) | |
1135 | { | |
1136 | free_dependence_relation (ddr); | |
9771b263 | 1137 | loops.release (); |
f20132e7 RG |
1138 | return; |
1139 | } | |
1140 | } | |
1141 | } | |
b7ce70b3 | 1142 | free_dependence_relation (ddr); |
9771b263 | 1143 | loops.release (); |
d0582dc1 RG |
1144 | partition->kind = PKIND_MEMCPY; |
1145 | partition->main_dr = single_store; | |
1146 | partition->secondary_dr = single_load; | |
818625cf | 1147 | partition->niter = nb_iter; |
d995e887 | 1148 | partition->plus_one = plus_one; |
d0582dc1 | 1149 | } |
cfee318d SP |
1150 | } |
1151 | ||
1fa0c180 RG |
1152 | /* For a data reference REF, return the declaration of its base |
1153 | address or NULL_TREE if the base is not determined. */ | |
1154 | ||
1155 | static tree | |
1156 | ref_base_address (data_reference_p dr) | |
1157 | { | |
1158 | tree base_address = DR_BASE_ADDRESS (dr); | |
1159 | if (base_address | |
1160 | && TREE_CODE (base_address) == ADDR_EXPR) | |
1161 | return TREE_OPERAND (base_address, 0); | |
1162 | ||
1163 | return base_address; | |
1164 | } | |
1165 | ||
cfee318d SP |
1166 | /* Returns true when PARTITION1 and PARTITION2 have similar memory |
1167 | accesses in RDG. */ | |
1168 | ||
1169 | static bool | |
526ceb68 TS |
1170 | similar_memory_accesses (struct graph *rdg, partition *partition1, |
1171 | partition *partition2) | |
cfee318d | 1172 | { |
1fa0c180 | 1173 | unsigned i, j, k, l; |
cfee318d | 1174 | bitmap_iterator bi, bj; |
1fa0c180 RG |
1175 | data_reference_p ref1, ref2; |
1176 | ||
1177 | /* First check whether in the intersection of the two partitions are | |
1178 | any loads or stores. Common loads are the situation that happens | |
1179 | most often. */ | |
1180 | EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi) | |
1181 | if (RDG_MEM_WRITE_STMT (rdg, i) | |
1182 | || RDG_MEM_READS_STMT (rdg, i)) | |
1183 | return true; | |
cfee318d | 1184 | |
1fa0c180 | 1185 | /* Then check all data-references against each other. */ |
c61f8985 | 1186 | EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi) |
cfee318d SP |
1187 | if (RDG_MEM_WRITE_STMT (rdg, i) |
1188 | || RDG_MEM_READS_STMT (rdg, i)) | |
c61f8985 | 1189 | EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj) |
cfee318d SP |
1190 | if (RDG_MEM_WRITE_STMT (rdg, j) |
1191 | || RDG_MEM_READS_STMT (rdg, j)) | |
1fa0c180 | 1192 | { |
9771b263 | 1193 | FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1) |
1fa0c180 RG |
1194 | { |
1195 | tree base1 = ref_base_address (ref1); | |
1196 | if (base1) | |
9771b263 | 1197 | FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2) |
1fa0c180 RG |
1198 | if (base1 == ref_base_address (ref2)) |
1199 | return true; | |
1200 | } | |
1201 | } | |
cfee318d SP |
1202 | |
1203 | return false; | |
1204 | } | |
1205 | ||
dea61d92 SP |
1206 | /* Aggregate several components into a useful partition that is |
1207 | registered in the PARTITIONS vector. Partitions will be | |
1208 | distributed in different loops. */ | |
1209 | ||
1210 | static void | |
83a95546 | 1211 | rdg_build_partitions (struct graph *rdg, |
355fe088 | 1212 | vec<gimple *> starting_stmts, |
526ceb68 | 1213 | vec<partition *> *partitions) |
dea61d92 | 1214 | { |
83a95546 | 1215 | bitmap processed = BITMAP_ALLOC (NULL); |
2fd5894f | 1216 | int i; |
355fe088 | 1217 | gimple *stmt; |
dea61d92 | 1218 | |
2fd5894f | 1219 | FOR_EACH_VEC_ELT (starting_stmts, i, stmt) |
dea61d92 | 1220 | { |
2fd5894f RB |
1221 | int v = rdg_vertex_for_stmt (rdg, stmt); |
1222 | ||
1223 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1224 | fprintf (dump_file, | |
1225 | "ldist asked to generate code for vertex %d\n", v); | |
b8698a0f | 1226 | |
24f161fd RB |
1227 | /* If the vertex is already contained in another partition so |
1228 | is the partition rooted at it. */ | |
dea61d92 SP |
1229 | if (bitmap_bit_p (processed, v)) |
1230 | continue; | |
b8698a0f | 1231 | |
526ceb68 | 1232 | partition *partition = build_rdg_partition_for_vertex (rdg, v); |
24f161fd | 1233 | bitmap_ior_into (processed, partition->stmts); |
dea61d92 | 1234 | |
826a536d | 1235 | if (dump_file && (dump_flags & TDF_DETAILS)) |
dea61d92 | 1236 | { |
826a536d RB |
1237 | fprintf (dump_file, "ldist useful partition:\n"); |
1238 | dump_bitmap (dump_file, partition->stmts); | |
dea61d92 | 1239 | } |
826a536d RB |
1240 | |
1241 | partitions->safe_push (partition); | |
dea61d92 SP |
1242 | } |
1243 | ||
83a95546 RB |
1244 | /* All vertices should have been assigned to at least one partition now, |
1245 | other than vertices belonging to dead code. */ | |
dea61d92 | 1246 | |
83a95546 | 1247 | BITMAP_FREE (processed); |
dea61d92 SP |
1248 | } |
1249 | ||
1250 | /* Dump to FILE the PARTITIONS. */ | |
1251 | ||
1252 | static void | |
526ceb68 | 1253 | dump_rdg_partitions (FILE *file, vec<partition *> partitions) |
dea61d92 SP |
1254 | { |
1255 | int i; | |
526ceb68 | 1256 | partition *partition; |
dea61d92 | 1257 | |
9771b263 | 1258 | FOR_EACH_VEC_ELT (partitions, i, partition) |
c61f8985 | 1259 | debug_bitmap_file (file, partition->stmts); |
dea61d92 SP |
1260 | } |
1261 | ||
1262 | /* Debug PARTITIONS. */ | |
526ceb68 | 1263 | extern void debug_rdg_partitions (vec<partition *> ); |
dea61d92 | 1264 | |
24e47c76 | 1265 | DEBUG_FUNCTION void |
526ceb68 | 1266 | debug_rdg_partitions (vec<partition *> partitions) |
dea61d92 SP |
1267 | { |
1268 | dump_rdg_partitions (stderr, partitions); | |
1269 | } | |
1270 | ||
2b8aee8e SP |
1271 | /* Returns the number of read and write operations in the RDG. */ |
1272 | ||
1273 | static int | |
1274 | number_of_rw_in_rdg (struct graph *rdg) | |
1275 | { | |
1276 | int i, res = 0; | |
1277 | ||
1278 | for (i = 0; i < rdg->n_vertices; i++) | |
1279 | { | |
1280 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1281 | ++res; | |
1282 | ||
1283 | if (RDG_MEM_READS_STMT (rdg, i)) | |
1284 | ++res; | |
1285 | } | |
1286 | ||
1287 | return res; | |
1288 | } | |
1289 | ||
1290 | /* Returns the number of read and write operations in a PARTITION of | |
1291 | the RDG. */ | |
1292 | ||
1293 | static int | |
526ceb68 | 1294 | number_of_rw_in_partition (struct graph *rdg, partition *partition) |
2b8aee8e SP |
1295 | { |
1296 | int res = 0; | |
1297 | unsigned i; | |
1298 | bitmap_iterator ii; | |
1299 | ||
c61f8985 | 1300 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii) |
2b8aee8e SP |
1301 | { |
1302 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1303 | ++res; | |
1304 | ||
1305 | if (RDG_MEM_READS_STMT (rdg, i)) | |
1306 | ++res; | |
1307 | } | |
1308 | ||
1309 | return res; | |
1310 | } | |
1311 | ||
1312 | /* Returns true when one of the PARTITIONS contains all the read or | |
1313 | write operations of RDG. */ | |
1314 | ||
1315 | static bool | |
9771b263 | 1316 | partition_contains_all_rw (struct graph *rdg, |
526ceb68 | 1317 | vec<partition *> partitions) |
2b8aee8e SP |
1318 | { |
1319 | int i; | |
526ceb68 | 1320 | partition *partition; |
2b8aee8e SP |
1321 | int nrw = number_of_rw_in_rdg (rdg); |
1322 | ||
9771b263 | 1323 | FOR_EACH_VEC_ELT (partitions, i, partition) |
2b8aee8e SP |
1324 | if (nrw == number_of_rw_in_partition (rdg, partition)) |
1325 | return true; | |
1326 | ||
1327 | return false; | |
1328 | } | |
1329 | ||
447f3223 RB |
1330 | /* Compute partition dependence created by the data references in DRS1 |
1331 | and DRS2 and modify and return DIR according to that. */ | |
1332 | ||
1333 | static int | |
1334 | pg_add_dependence_edges (struct graph *rdg, vec<loop_p> loops, int dir, | |
1335 | vec<data_reference_p> drs1, | |
1336 | vec<data_reference_p> drs2) | |
1337 | { | |
1338 | data_reference_p dr1, dr2; | |
1339 | ||
1340 | /* dependence direction - 0 is no dependence, -1 is back, | |
1341 | 1 is forth, 2 is both (we can stop then, merging will occur). */ | |
1342 | for (int ii = 0; drs1.iterate (ii, &dr1); ++ii) | |
1343 | for (int jj = 0; drs2.iterate (jj, &dr2); ++jj) | |
1344 | { | |
62e22fcb | 1345 | data_reference_p saved_dr1 = dr1; |
2cf19e26 | 1346 | int this_dir = 1; |
447f3223 RB |
1347 | ddr_p ddr; |
1348 | /* Re-shuffle data-refs to be in dominator order. */ | |
1349 | if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) | |
1350 | > rdg_vertex_for_stmt (rdg, DR_STMT (dr2))) | |
1351 | { | |
6b4db501 | 1352 | std::swap (dr1, dr2); |
447f3223 RB |
1353 | this_dir = -this_dir; |
1354 | } | |
1355 | ddr = initialize_data_dependence_relation (dr1, dr2, loops); | |
1356 | compute_affine_dependence (ddr, loops[0]); | |
1357 | if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) | |
1358 | this_dir = 2; | |
1359 | else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) | |
1360 | { | |
1361 | if (DDR_REVERSED_P (ddr)) | |
1362 | { | |
6b4db501 | 1363 | std::swap (dr1, dr2); |
447f3223 RB |
1364 | this_dir = -this_dir; |
1365 | } | |
1366 | /* Known dependences can still be unordered througout the | |
1367 | iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ | |
2cf19e26 | 1368 | if (DDR_NUM_DIST_VECTS (ddr) != 1) |
447f3223 | 1369 | this_dir = 2; |
2cf19e26 RB |
1370 | /* If the overlap is exact preserve stmt order. */ |
1371 | else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1)) | |
1372 | ; | |
1373 | else | |
1374 | { | |
1375 | /* Else as the distance vector is lexicographic positive | |
1376 | swap the dependence direction. */ | |
1377 | this_dir = -this_dir; | |
1378 | } | |
447f3223 RB |
1379 | } |
1380 | else | |
1381 | this_dir = 0; | |
1382 | free_dependence_relation (ddr); | |
1383 | if (dir == 0) | |
1384 | dir = this_dir; | |
1385 | else if (dir != this_dir) | |
1386 | return 2; | |
62e22fcb RB |
1387 | /* Shuffle "back" dr1. */ |
1388 | dr1 = saved_dr1; | |
447f3223 RB |
1389 | } |
1390 | return dir; | |
1391 | } | |
1392 | ||
1393 | /* Compare postorder number of the partition graph vertices V1 and V2. */ | |
1394 | ||
1395 | static int | |
1396 | pgcmp (const void *v1_, const void *v2_) | |
1397 | { | |
1398 | const vertex *v1 = (const vertex *)v1_; | |
1399 | const vertex *v2 = (const vertex *)v2_; | |
1400 | return v2->post - v1->post; | |
1401 | } | |
2fd5894f RB |
1402 | |
1403 | /* Distributes the code from LOOP in such a way that producer | |
1404 | statements are placed before consumer statements. Tries to separate | |
1405 | only the statements from STMTS into separate loops. | |
1406 | Returns the number of distributed loops. */ | |
dea61d92 SP |
1407 | |
1408 | static int | |
355fe088 | 1409 | distribute_loop (struct loop *loop, vec<gimple *> stmts, |
826a536d | 1410 | control_dependences *cd, int *nb_calls) |
dea61d92 | 1411 | { |
2fd5894f | 1412 | struct graph *rdg; |
526ceb68 | 1413 | partition *partition; |
be6b029b | 1414 | bool any_builtin; |
2fd5894f | 1415 | int i, nbp; |
447f3223 RB |
1416 | graph *pg = NULL; |
1417 | int num_sccs = 1; | |
dea61d92 | 1418 | |
826a536d | 1419 | *nb_calls = 0; |
00f96dc9 | 1420 | auto_vec<loop_p, 3> loop_nest; |
2fd5894f | 1421 | if (!find_loop_nest (loop, &loop_nest)) |
07687835 | 1422 | return 0; |
2fd5894f | 1423 | |
36875e8f | 1424 | rdg = build_rdg (loop_nest, cd); |
2fd5894f RB |
1425 | if (!rdg) |
1426 | { | |
1427 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1428 | fprintf (dump_file, | |
1429 | "Loop %d not distributed: failed to build the RDG.\n", | |
1430 | loop->num); | |
1431 | ||
2fd5894f RB |
1432 | return 0; |
1433 | } | |
1434 | ||
1435 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1436 | dump_rdg (dump_file, rdg); | |
1437 | ||
526ceb68 | 1438 | auto_vec<struct partition *, 3> partitions; |
2fd5894f | 1439 | rdg_build_partitions (rdg, stmts, &partitions); |
dea61d92 | 1440 | |
be6b029b | 1441 | any_builtin = false; |
9771b263 | 1442 | FOR_EACH_VEC_ELT (partitions, i, partition) |
be6b029b RG |
1443 | { |
1444 | classify_partition (loop, rdg, partition); | |
1445 | any_builtin |= partition_builtin_p (partition); | |
1446 | } | |
30d55936 | 1447 | |
447f3223 RB |
1448 | /* If we are only distributing patterns but did not detect any, |
1449 | simply bail out. */ | |
9fed7f3a RB |
1450 | if (!flag_tree_loop_distribution |
1451 | && !any_builtin) | |
1452 | { | |
1453 | nbp = 0; | |
1454 | goto ldist_done; | |
1455 | } | |
1456 | ||
447f3223 RB |
1457 | /* If we are only distributing patterns fuse all partitions that |
1458 | were not classified as builtins. This also avoids chopping | |
1459 | a loop into pieces, separated by builtin calls. That is, we | |
1460 | only want no or a single loop body remaining. */ | |
526ceb68 | 1461 | struct partition *into; |
447f3223 RB |
1462 | if (!flag_tree_loop_distribution) |
1463 | { | |
1464 | for (i = 0; partitions.iterate (i, &into); ++i) | |
1465 | if (!partition_builtin_p (into)) | |
1466 | break; | |
1467 | for (++i; partitions.iterate (i, &partition); ++i) | |
1468 | if (!partition_builtin_p (partition)) | |
1469 | { | |
1470 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1471 | { | |
1472 | fprintf (dump_file, "fusing non-builtin partitions\n"); | |
1473 | dump_bitmap (dump_file, into->stmts); | |
1474 | dump_bitmap (dump_file, partition->stmts); | |
1475 | } | |
1476 | partition_merge_into (into, partition); | |
1477 | partitions.unordered_remove (i); | |
1478 | partition_free (partition); | |
1479 | i--; | |
1480 | } | |
1481 | } | |
1482 | ||
1483 | /* Due to limitations in the transform phase we have to fuse all | |
1484 | reduction partitions into the last partition so the existing | |
1485 | loop will contain all loop-closed PHI nodes. */ | |
1486 | for (i = 0; partitions.iterate (i, &into); ++i) | |
1487 | if (partition_reduction_p (into)) | |
1488 | break; | |
1489 | for (i = i + 1; partitions.iterate (i, &partition); ++i) | |
1490 | if (partition_reduction_p (partition)) | |
1491 | { | |
1492 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1493 | { | |
1494 | fprintf (dump_file, "fusing partitions\n"); | |
1495 | dump_bitmap (dump_file, into->stmts); | |
1496 | dump_bitmap (dump_file, partition->stmts); | |
1497 | fprintf (dump_file, "because they have reductions\n"); | |
1498 | } | |
1499 | partition_merge_into (into, partition); | |
1500 | partitions.unordered_remove (i); | |
1501 | partition_free (partition); | |
1502 | i--; | |
1503 | } | |
1504 | ||
9fed7f3a RB |
1505 | /* Apply our simple cost model - fuse partitions with similar |
1506 | memory accesses. */ | |
9fed7f3a RB |
1507 | for (i = 0; partitions.iterate (i, &into); ++i) |
1508 | { | |
1509 | if (partition_builtin_p (into)) | |
1510 | continue; | |
1511 | for (int j = i + 1; | |
1512 | partitions.iterate (j, &partition); ++j) | |
1513 | { | |
1514 | if (!partition_builtin_p (partition) | |
1515 | && similar_memory_accesses (rdg, into, partition)) | |
1516 | { | |
1517 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1518 | { | |
1519 | fprintf (dump_file, "fusing partitions\n"); | |
1520 | dump_bitmap (dump_file, into->stmts); | |
1521 | dump_bitmap (dump_file, partition->stmts); | |
1522 | fprintf (dump_file, "because they have similar " | |
1523 | "memory accesses\n"); | |
1524 | } | |
826a536d | 1525 | partition_merge_into (into, partition); |
447f3223 | 1526 | partitions.unordered_remove (j); |
9fed7f3a RB |
1527 | partition_free (partition); |
1528 | j--; | |
1529 | } | |
1530 | } | |
1531 | } | |
1532 | ||
447f3223 RB |
1533 | /* Build the partition dependency graph. */ |
1534 | if (partitions.length () > 1) | |
c014f6f5 | 1535 | { |
447f3223 RB |
1536 | pg = new_graph (partitions.length ()); |
1537 | struct pgdata { | |
526ceb68 | 1538 | struct partition *partition; |
447f3223 RB |
1539 | vec<data_reference_p> writes; |
1540 | vec<data_reference_p> reads; | |
1541 | }; | |
1542 | #define PGDATA(i) ((pgdata *)(pg->vertices[i].data)) | |
1543 | for (i = 0; partitions.iterate (i, &partition); ++i) | |
1544 | { | |
1545 | vertex *v = &pg->vertices[i]; | |
1546 | pgdata *data = new pgdata; | |
1547 | data_reference_p dr; | |
1548 | /* FIXME - leaks. */ | |
1549 | v->data = data; | |
1550 | bitmap_iterator bi; | |
1551 | unsigned j; | |
1552 | data->partition = partition; | |
1553 | data->reads = vNULL; | |
1554 | data->writes = vNULL; | |
1555 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, j, bi) | |
1556 | for (int k = 0; RDG_DATAREFS (rdg, j).iterate (k, &dr); ++k) | |
1557 | if (DR_IS_READ (dr)) | |
1558 | data->reads.safe_push (dr); | |
1559 | else | |
1560 | data->writes.safe_push (dr); | |
1561 | } | |
526ceb68 | 1562 | struct partition *partition1, *partition2; |
447f3223 RB |
1563 | for (i = 0; partitions.iterate (i, &partition1); ++i) |
1564 | for (int j = i + 1; partitions.iterate (j, &partition2); ++j) | |
1565 | { | |
1566 | /* dependence direction - 0 is no dependence, -1 is back, | |
1567 | 1 is forth, 2 is both (we can stop then, merging will occur). */ | |
1568 | int dir = 0; | |
1569 | dir = pg_add_dependence_edges (rdg, loop_nest, dir, | |
1570 | PGDATA(i)->writes, | |
1571 | PGDATA(j)->reads); | |
1572 | if (dir != 2) | |
1573 | dir = pg_add_dependence_edges (rdg, loop_nest, dir, | |
1574 | PGDATA(i)->reads, | |
1575 | PGDATA(j)->writes); | |
1576 | if (dir != 2) | |
1577 | dir = pg_add_dependence_edges (rdg, loop_nest, dir, | |
1578 | PGDATA(i)->writes, | |
1579 | PGDATA(j)->writes); | |
1580 | if (dir == 1 || dir == 2) | |
1581 | add_edge (pg, i, j); | |
1582 | if (dir == -1 || dir == 2) | |
1583 | add_edge (pg, j, i); | |
1584 | } | |
1585 | ||
1586 | /* Add edges to the reduction partition (if any) to force it last. */ | |
1587 | unsigned j; | |
1588 | for (j = 0; partitions.iterate (j, &partition); ++j) | |
1589 | if (partition_reduction_p (partition)) | |
1590 | break; | |
1591 | if (j < partitions.length ()) | |
38ad2d07 | 1592 | { |
447f3223 RB |
1593 | for (unsigned i = 0; partitions.iterate (i, &partition); ++i) |
1594 | if (i != j) | |
1595 | add_edge (pg, i, j); | |
1596 | } | |
1597 | ||
1598 | /* Compute partitions we cannot separate and fuse them. */ | |
1599 | num_sccs = graphds_scc (pg, NULL); | |
1600 | for (i = 0; i < num_sccs; ++i) | |
1601 | { | |
526ceb68 | 1602 | struct partition *first; |
447f3223 RB |
1603 | int j; |
1604 | for (j = 0; partitions.iterate (j, &first); ++j) | |
1605 | if (pg->vertices[j].component == i) | |
38ad2d07 | 1606 | break; |
447f3223 RB |
1607 | for (j = j + 1; partitions.iterate (j, &partition); ++j) |
1608 | if (pg->vertices[j].component == i) | |
38ad2d07 | 1609 | { |
447f3223 RB |
1610 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1611 | { | |
1612 | fprintf (dump_file, "fusing partitions\n"); | |
1613 | dump_bitmap (dump_file, first->stmts); | |
1614 | dump_bitmap (dump_file, partition->stmts); | |
1615 | fprintf (dump_file, "because they are in the same " | |
1616 | "dependence SCC\n"); | |
1617 | } | |
1618 | partition_merge_into (first, partition); | |
1619 | partitions[j] = NULL; | |
5eb010bc | 1620 | partition_free (partition); |
447f3223 | 1621 | PGDATA (j)->partition = NULL; |
38ad2d07 | 1622 | } |
38ad2d07 | 1623 | } |
30d55936 | 1624 | |
447f3223 RB |
1625 | /* Now order the remaining nodes in postorder. */ |
1626 | qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp); | |
1627 | partitions.truncate (0); | |
1628 | for (i = 0; i < pg->n_vertices; ++i) | |
b9fc0497 | 1629 | { |
447f3223 RB |
1630 | pgdata *data = PGDATA (i); |
1631 | if (data->partition) | |
1632 | partitions.safe_push (data->partition); | |
1633 | data->reads.release (); | |
1634 | data->writes.release (); | |
1635 | delete data; | |
b9fc0497 | 1636 | } |
447f3223 RB |
1637 | gcc_assert (partitions.length () == (unsigned)num_sccs); |
1638 | free_graph (pg); | |
b9fc0497 RB |
1639 | } |
1640 | ||
9771b263 | 1641 | nbp = partitions.length (); |
a4293fa6 | 1642 | if (nbp == 0 |
9771b263 DN |
1643 | || (nbp == 1 && !partition_builtin_p (partitions[0])) |
1644 | || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) | |
c014f6f5 RG |
1645 | { |
1646 | nbp = 0; | |
1647 | goto ldist_done; | |
1648 | } | |
dea61d92 SP |
1649 | |
1650 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1651 | dump_rdg_partitions (dump_file, partitions); | |
1652 | ||
9771b263 | 1653 | FOR_EACH_VEC_ELT (partitions, i, partition) |
826a536d RB |
1654 | { |
1655 | if (partition_builtin_p (partition)) | |
1656 | (*nb_calls)++; | |
1657 | generate_code_for_partition (loop, partition, i < nbp - 1); | |
1658 | } | |
dea61d92 | 1659 | |
dea61d92 SP |
1660 | ldist_done: |
1661 | ||
9771b263 | 1662 | FOR_EACH_VEC_ELT (partitions, i, partition) |
c61f8985 | 1663 | partition_free (partition); |
dea61d92 | 1664 | |
dea61d92 | 1665 | free_rdg (rdg); |
826a536d | 1666 | return nbp - *nb_calls; |
dea61d92 SP |
1667 | } |
1668 | ||
1669 | /* Distribute all loops in the current function. */ | |
1670 | ||
be55bfe6 TS |
1671 | namespace { |
1672 | ||
1673 | const pass_data pass_data_loop_distribution = | |
1674 | { | |
1675 | GIMPLE_PASS, /* type */ | |
1676 | "ldist", /* name */ | |
1677 | OPTGROUP_LOOP, /* optinfo_flags */ | |
be55bfe6 TS |
1678 | TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ |
1679 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1680 | 0, /* properties_provided */ | |
1681 | 0, /* properties_destroyed */ | |
1682 | 0, /* todo_flags_start */ | |
3bea341f | 1683 | 0, /* todo_flags_finish */ |
be55bfe6 TS |
1684 | }; |
1685 | ||
1686 | class pass_loop_distribution : public gimple_opt_pass | |
1687 | { | |
1688 | public: | |
1689 | pass_loop_distribution (gcc::context *ctxt) | |
1690 | : gimple_opt_pass (pass_data_loop_distribution, ctxt) | |
1691 | {} | |
1692 | ||
1693 | /* opt_pass methods: */ | |
1694 | virtual bool gate (function *) | |
1695 | { | |
1696 | return flag_tree_loop_distribution | |
1697 | || flag_tree_loop_distribute_patterns; | |
1698 | } | |
1699 | ||
1700 | virtual unsigned int execute (function *); | |
1701 | ||
1702 | }; // class pass_loop_distribution | |
1703 | ||
1704 | unsigned int | |
1705 | pass_loop_distribution::execute (function *fun) | |
dea61d92 SP |
1706 | { |
1707 | struct loop *loop; | |
c014f6f5 | 1708 | bool changed = false; |
1fa0c180 | 1709 | basic_block bb; |
36875e8f | 1710 | control_dependences *cd = NULL; |
1fa0c180 | 1711 | |
be55bfe6 | 1712 | FOR_ALL_BB_FN (bb, fun) |
1fa0c180 RG |
1713 | { |
1714 | gimple_stmt_iterator gsi; | |
1715 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1716 | gimple_set_uid (gsi_stmt (gsi), -1); | |
1717 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1718 | gimple_set_uid (gsi_stmt (gsi), -1); | |
1719 | } | |
dea61d92 | 1720 | |
c014f6f5 RG |
1721 | /* We can at the moment only distribute non-nested loops, thus restrict |
1722 | walking to innermost loops. */ | |
f0bd40b1 | 1723 | FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) |
dea61d92 | 1724 | { |
355fe088 | 1725 | auto_vec<gimple *> work_list; |
be6b029b | 1726 | basic_block *bbs; |
0e20c89f | 1727 | int num = loop->num; |
be6b029b | 1728 | unsigned int i; |
a3357f7d RG |
1729 | |
1730 | /* If the loop doesn't have a single exit we will fail anyway, | |
1731 | so do that early. */ | |
1732 | if (!single_exit (loop)) | |
1733 | continue; | |
dea61d92 | 1734 | |
f56f2d33 JH |
1735 | /* Only optimize hot loops. */ |
1736 | if (!optimize_loop_for_speed_p (loop)) | |
1737 | continue; | |
1738 | ||
be6b029b RG |
1739 | /* Initialize the worklist with stmts we seed the partitions with. */ |
1740 | bbs = get_loop_body_in_dom_order (loop); | |
1741 | for (i = 0; i < loop->num_nodes; ++i) | |
1742 | { | |
538dd0b7 DM |
1743 | for (gphi_iterator gsi = gsi_start_phis (bbs[i]); |
1744 | !gsi_end_p (gsi); | |
1745 | gsi_next (&gsi)) | |
deb6c11a | 1746 | { |
538dd0b7 | 1747 | gphi *phi = gsi.phi (); |
deb6c11a RB |
1748 | if (virtual_operand_p (gimple_phi_result (phi))) |
1749 | continue; | |
1750 | /* Distribute stmts which have defs that are used outside of | |
be55bfe6 | 1751 | the loop. */ |
deb6c11a RB |
1752 | if (!stmt_has_scalar_dependences_outside_loop (loop, phi)) |
1753 | continue; | |
1754 | work_list.safe_push (phi); | |
1755 | } | |
538dd0b7 DM |
1756 | for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); |
1757 | !gsi_end_p (gsi); | |
1758 | gsi_next (&gsi)) | |
be6b029b | 1759 | { |
355fe088 | 1760 | gimple *stmt = gsi_stmt (gsi); |
83a95546 RB |
1761 | |
1762 | /* If there is a stmt with side-effects bail out - we | |
be55bfe6 | 1763 | cannot and should not distribute this loop. */ |
83a95546 RB |
1764 | if (gimple_has_side_effects (stmt)) |
1765 | { | |
1766 | work_list.truncate (0); | |
1767 | goto out; | |
1768 | } | |
1769 | ||
b9fc0497 | 1770 | /* Distribute stmts which have defs that are used outside of |
be55bfe6 | 1771 | the loop. */ |
b9fc0497 RB |
1772 | if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) |
1773 | ; | |
1774 | /* Otherwise only distribute stores for now. */ | |
e179190c | 1775 | else if (!gimple_vdef (stmt)) |
be6b029b RG |
1776 | continue; |
1777 | ||
9771b263 | 1778 | work_list.safe_push (stmt); |
be6b029b RG |
1779 | } |
1780 | } | |
83a95546 | 1781 | out: |
be6b029b | 1782 | free (bbs); |
c014f6f5 | 1783 | |
826a536d RB |
1784 | int nb_generated_loops = 0; |
1785 | int nb_generated_calls = 0; | |
1786 | location_t loc = find_loop_location (loop); | |
9771b263 | 1787 | if (work_list.length () > 0) |
36875e8f RB |
1788 | { |
1789 | if (!cd) | |
1790 | { | |
ca406576 | 1791 | calculate_dominance_info (CDI_DOMINATORS); |
36875e8f RB |
1792 | calculate_dominance_info (CDI_POST_DOMINATORS); |
1793 | cd = new control_dependences (create_edge_list ()); | |
1794 | free_dominance_info (CDI_POST_DOMINATORS); | |
1795 | } | |
826a536d RB |
1796 | nb_generated_loops = distribute_loop (loop, work_list, cd, |
1797 | &nb_generated_calls); | |
36875e8f | 1798 | } |
c014f6f5 | 1799 | |
826a536d | 1800 | if (nb_generated_loops + nb_generated_calls > 0) |
dea61d92 | 1801 | { |
826a536d RB |
1802 | changed = true; |
1803 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, | |
1804 | loc, "Loop %d distributed: split to %d loops " | |
1805 | "and %d library calls.\n", | |
1806 | num, nb_generated_loops, nb_generated_calls); | |
dea61d92 | 1807 | } |
826a536d RB |
1808 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
1809 | fprintf (dump_file, "Loop %d is the same.\n", num); | |
dea61d92 SP |
1810 | } |
1811 | ||
36875e8f RB |
1812 | if (cd) |
1813 | delete cd; | |
1814 | ||
c014f6f5 RG |
1815 | if (changed) |
1816 | { | |
d0ed943c RB |
1817 | /* Cached scalar evolutions now may refer to wrong or non-existing |
1818 | loops. */ | |
1819 | scev_reset_htab (); | |
be55bfe6 | 1820 | mark_virtual_operands_for_renaming (fun); |
c014f6f5 RG |
1821 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); |
1822 | } | |
1823 | ||
b2b29377 | 1824 | checking_verify_loop_structure (); |
c014f6f5 | 1825 | |
5006671f | 1826 | return 0; |
dea61d92 SP |
1827 | } |
1828 | ||
27a4cd48 DM |
1829 | } // anon namespace |
1830 | ||
1831 | gimple_opt_pass * | |
1832 | make_pass_loop_distribution (gcc::context *ctxt) | |
1833 | { | |
1834 | return new pass_loop_distribution (ctxt); | |
1835 | } |