]>
Commit | Line | Data |
---|---|---|
dea61d92 | 1 | /* Loop distribution. |
d1e082c2 | 2 | Copyright (C) 2006-2013 Free Software Foundation, Inc. |
dea61d92 SP |
3 | Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> |
4 | and Sebastian Pop <sebastian.pop@amd.com>. | |
5 | ||
6 | This file is part of GCC. | |
b8698a0f | 7 | |
dea61d92 SP |
8 | GCC is free software; you can redistribute it and/or modify it |
9 | under the terms of the GNU General Public License as published by the | |
10 | Free Software Foundation; either version 3, or (at your option) any | |
11 | later version. | |
b8698a0f | 12 | |
dea61d92 SP |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT |
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
b8698a0f | 17 | |
dea61d92 SP |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | /* This pass performs loop distribution: for example, the loop | |
23 | ||
24 | |DO I = 2, N | |
25 | | A(I) = B(I) + C | |
26 | | D(I) = A(I-1)*E | |
27 | |ENDDO | |
28 | ||
b8698a0f | 29 | is transformed to |
dea61d92 SP |
30 | |
31 | |DOALL I = 2, N | |
32 | | A(I) = B(I) + C | |
33 | |ENDDO | |
34 | | | |
35 | |DOALL I = 2, N | |
36 | | D(I) = A(I-1)*E | |
37 | |ENDDO | |
38 | ||
39 | This pass uses an RDG, Reduced Dependence Graph built on top of the | |
40 | data dependence relations. The RDG is then topologically sorted to | |
41 | obtain a map of information producers/consumers based on which it | |
42 | generates the new loops. */ | |
43 | ||
44 | #include "config.h" | |
45 | #include "system.h" | |
46 | #include "coretypes.h" | |
7a300452 | 47 | #include "tree-ssa.h" |
dea61d92 | 48 | #include "cfgloop.h" |
dea61d92 SP |
49 | #include "tree-chrec.h" |
50 | #include "tree-data-ref.h" | |
51 | #include "tree-scalar-evolution.h" | |
52 | #include "tree-pass.h" | |
80ab0b19 RB |
53 | #include "gimple-pretty-print.h" |
54 | ||
55 | ||
56 | /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ | |
57 | typedef struct rdg_vertex | |
58 | { | |
59 | /* The statement represented by this vertex. */ | |
60 | gimple stmt; | |
61 | ||
62 | /* Vector of data-references in this statement. */ | |
63 | vec<data_reference_p> datarefs; | |
64 | ||
65 | /* True when the statement contains a write to memory. */ | |
66 | bool has_mem_write; | |
67 | ||
68 | /* True when the statement contains a read from memory. */ | |
69 | bool has_mem_reads; | |
70 | } *rdg_vertex_p; | |
71 | ||
72 | #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt | |
73 | #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs | |
74 | #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write | |
75 | #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads | |
76 | #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I])) | |
77 | #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I])) | |
78 | #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) | |
79 | #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) | |
80 | ||
81 | /* Data dependence type. */ | |
82 | ||
83 | enum rdg_dep_type | |
84 | { | |
85 | /* Read After Write (RAW). */ | |
86 | flow_dd = 'f', | |
87 | ||
88 | /* Write After Read (WAR). */ | |
89 | anti_dd = 'a', | |
90 | ||
91 | /* Write After Write (WAW). */ | |
92 | output_dd = 'o', | |
93 | ||
94 | /* Read After Read (RAR). */ | |
36875e8f RB |
95 | input_dd = 'i', |
96 | ||
97 | /* Control dependence (execute conditional on). */ | |
98 | control_dd = 'c' | |
80ab0b19 RB |
99 | }; |
100 | ||
101 | /* Dependence information attached to an edge of the RDG. */ | |
102 | ||
103 | typedef struct rdg_edge | |
104 | { | |
105 | /* Type of the dependence. */ | |
106 | enum rdg_dep_type type; | |
107 | ||
108 | /* Levels of the dependence: the depth of the loops that carry the | |
109 | dependence. */ | |
110 | unsigned level; | |
111 | ||
112 | /* Dependence relation between data dependences, NULL when one of | |
113 | the vertices is a scalar. */ | |
114 | ddr_p relation; | |
115 | } *rdg_edge_p; | |
116 | ||
117 | #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type | |
118 | #define RDGE_LEVEL(E) ((struct rdg_edge *) ((E)->data))->level | |
119 | #define RDGE_RELATION(E) ((struct rdg_edge *) ((E)->data))->relation | |
120 | ||
80ab0b19 RB |
121 | /* Dump vertex I in RDG to FILE. */ |
122 | ||
123 | static void | |
124 | dump_rdg_vertex (FILE *file, struct graph *rdg, int i) | |
125 | { | |
126 | struct vertex *v = &(rdg->vertices[i]); | |
127 | struct graph_edge *e; | |
128 | ||
129 | fprintf (file, "(vertex %d: (%s%s) (in:", i, | |
130 | RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", | |
131 | RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); | |
132 | ||
133 | if (v->pred) | |
134 | for (e = v->pred; e; e = e->pred_next) | |
135 | fprintf (file, " %d", e->src); | |
136 | ||
137 | fprintf (file, ") (out:"); | |
138 | ||
139 | if (v->succ) | |
140 | for (e = v->succ; e; e = e->succ_next) | |
141 | fprintf (file, " %d", e->dest); | |
142 | ||
143 | fprintf (file, ")\n"); | |
144 | print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); | |
145 | fprintf (file, ")\n"); | |
146 | } | |
147 | ||
148 | /* Call dump_rdg_vertex on stderr. */ | |
149 | ||
150 | DEBUG_FUNCTION void | |
151 | debug_rdg_vertex (struct graph *rdg, int i) | |
152 | { | |
153 | dump_rdg_vertex (stderr, rdg, i); | |
154 | } | |
155 | ||
80ab0b19 RB |
156 | /* Dump the reduced dependence graph RDG to FILE. */ |
157 | ||
158 | static void | |
159 | dump_rdg (FILE *file, struct graph *rdg) | |
160 | { | |
80ab0b19 | 161 | fprintf (file, "(rdg\n"); |
2fd5894f RB |
162 | for (int i = 0; i < rdg->n_vertices; i++) |
163 | dump_rdg_vertex (file, rdg, i); | |
80ab0b19 | 164 | fprintf (file, ")\n"); |
80ab0b19 RB |
165 | } |
166 | ||
167 | /* Call dump_rdg on stderr. */ | |
168 | ||
169 | DEBUG_FUNCTION void | |
170 | debug_rdg (struct graph *rdg) | |
171 | { | |
172 | dump_rdg (stderr, rdg); | |
173 | } | |
174 | ||
175 | static void | |
176 | dot_rdg_1 (FILE *file, struct graph *rdg) | |
177 | { | |
178 | int i; | |
174ec470 RB |
179 | pretty_printer buffer; |
180 | pp_needs_newline (&buffer) = false; | |
181 | buffer.buffer->stream = file; | |
80ab0b19 RB |
182 | |
183 | fprintf (file, "digraph RDG {\n"); | |
184 | ||
185 | for (i = 0; i < rdg->n_vertices; i++) | |
186 | { | |
187 | struct vertex *v = &(rdg->vertices[i]); | |
188 | struct graph_edge *e; | |
189 | ||
174ec470 RB |
190 | fprintf (file, "%d [label=\"[%d] ", i, i); |
191 | pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); | |
192 | pp_flush (&buffer); | |
193 | fprintf (file, "\"]\n"); | |
194 | ||
80ab0b19 RB |
195 | /* Highlight reads from memory. */ |
196 | if (RDG_MEM_READS_STMT (rdg, i)) | |
197 | fprintf (file, "%d [style=filled, fillcolor=green]\n", i); | |
198 | ||
199 | /* Highlight stores to memory. */ | |
200 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
201 | fprintf (file, "%d [style=filled, fillcolor=red]\n", i); | |
202 | ||
203 | if (v->succ) | |
204 | for (e = v->succ; e; e = e->succ_next) | |
205 | switch (RDGE_TYPE (e)) | |
206 | { | |
207 | case input_dd: | |
208 | fprintf (file, "%d -> %d [label=input] \n", i, e->dest); | |
209 | break; | |
210 | ||
211 | case output_dd: | |
212 | fprintf (file, "%d -> %d [label=output] \n", i, e->dest); | |
213 | break; | |
214 | ||
215 | case flow_dd: | |
216 | /* These are the most common dependences: don't print these. */ | |
217 | fprintf (file, "%d -> %d \n", i, e->dest); | |
218 | break; | |
219 | ||
220 | case anti_dd: | |
221 | fprintf (file, "%d -> %d [label=anti] \n", i, e->dest); | |
222 | break; | |
223 | ||
36875e8f RB |
224 | case control_dd: |
225 | fprintf (file, "%d -> %d [label=control] \n", i, e->dest); | |
226 | break; | |
227 | ||
80ab0b19 RB |
228 | default: |
229 | gcc_unreachable (); | |
230 | } | |
231 | } | |
232 | ||
233 | fprintf (file, "}\n\n"); | |
234 | } | |
235 | ||
236 | /* Display the Reduced Dependence Graph using dotty. */ | |
237 | ||
238 | DEBUG_FUNCTION void | |
239 | dot_rdg (struct graph *rdg) | |
240 | { | |
174ec470 RB |
241 | /* When debugging, you may want to enable the following code. */ |
242 | #if 1 | |
243 | FILE *file = popen("dot -Tx11", "w"); | |
244 | if (!file) | |
245 | return; | |
80ab0b19 | 246 | dot_rdg_1 (file, rdg); |
174ec470 RB |
247 | fflush (file); |
248 | close (fileno (file)); | |
249 | pclose (file); | |
80ab0b19 RB |
250 | #else |
251 | dot_rdg_1 (stderr, rdg); | |
252 | #endif | |
253 | } | |
254 | ||
255 | /* Returns the index of STMT in RDG. */ | |
256 | ||
257 | static int | |
258 | rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple stmt) | |
259 | { | |
260 | int index = gimple_uid (stmt); | |
261 | gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); | |
262 | return index; | |
263 | } | |
264 | ||
265 | /* Creates an edge in RDG for each distance vector from DDR. The | |
266 | order that we keep track of in the RDG is the order in which | |
267 | statements have to be executed. */ | |
268 | ||
269 | static void | |
270 | create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr) | |
271 | { | |
272 | struct graph_edge *e; | |
273 | int va, vb; | |
274 | data_reference_p dra = DDR_A (ddr); | |
275 | data_reference_p drb = DDR_B (ddr); | |
276 | unsigned level = ddr_dependence_level (ddr); | |
277 | ||
278 | /* For non scalar dependences, when the dependence is REVERSED, | |
279 | statement B has to be executed before statement A. */ | |
280 | if (level > 0 | |
281 | && !DDR_REVERSED_P (ddr)) | |
282 | { | |
283 | data_reference_p tmp = dra; | |
284 | dra = drb; | |
285 | drb = tmp; | |
286 | } | |
287 | ||
288 | va = rdg_vertex_for_stmt (rdg, DR_STMT (dra)); | |
289 | vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb)); | |
290 | ||
291 | if (va < 0 || vb < 0) | |
292 | return; | |
293 | ||
294 | e = add_edge (rdg, va, vb); | |
295 | e->data = XNEW (struct rdg_edge); | |
296 | ||
297 | RDGE_LEVEL (e) = level; | |
298 | RDGE_RELATION (e) = ddr; | |
299 | ||
300 | /* Determines the type of the data dependence. */ | |
301 | if (DR_IS_READ (dra) && DR_IS_READ (drb)) | |
302 | RDGE_TYPE (e) = input_dd; | |
303 | else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)) | |
304 | RDGE_TYPE (e) = output_dd; | |
305 | else if (DR_IS_WRITE (dra) && DR_IS_READ (drb)) | |
306 | RDGE_TYPE (e) = flow_dd; | |
307 | else if (DR_IS_READ (dra) && DR_IS_WRITE (drb)) | |
308 | RDGE_TYPE (e) = anti_dd; | |
309 | } | |
310 | ||
311 | /* Creates dependence edges in RDG for all the uses of DEF. IDEF is | |
312 | the index of DEF in RDG. */ | |
313 | ||
314 | static void | |
315 | create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) | |
316 | { | |
317 | use_operand_p imm_use_p; | |
318 | imm_use_iterator iterator; | |
319 | ||
320 | FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) | |
321 | { | |
322 | struct graph_edge *e; | |
323 | int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); | |
324 | ||
325 | if (use < 0) | |
326 | continue; | |
327 | ||
328 | e = add_edge (rdg, idef, use); | |
329 | e->data = XNEW (struct rdg_edge); | |
330 | RDGE_TYPE (e) = flow_dd; | |
331 | RDGE_RELATION (e) = NULL; | |
332 | } | |
333 | } | |
334 | ||
36875e8f RB |
335 | /* Creates an edge for the control dependences of BB to the vertex V. */ |
336 | ||
337 | static void | |
338 | create_edge_for_control_dependence (struct graph *rdg, basic_block bb, | |
339 | int v, control_dependences *cd) | |
340 | { | |
341 | bitmap_iterator bi; | |
342 | unsigned edge_n; | |
343 | EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), | |
344 | 0, edge_n, bi) | |
345 | { | |
346 | basic_block cond_bb = cd->get_edge (edge_n)->src; | |
347 | gimple stmt = last_stmt (cond_bb); | |
348 | if (stmt && is_ctrl_stmt (stmt)) | |
349 | { | |
350 | struct graph_edge *e; | |
351 | int c = rdg_vertex_for_stmt (rdg, stmt); | |
352 | if (c < 0) | |
353 | continue; | |
354 | ||
355 | e = add_edge (rdg, c, v); | |
356 | e->data = XNEW (struct rdg_edge); | |
357 | RDGE_TYPE (e) = control_dd; | |
358 | RDGE_RELATION (e) = NULL; | |
359 | } | |
360 | } | |
361 | } | |
362 | ||
80ab0b19 RB |
363 | /* Creates the edges of the reduced dependence graph RDG. */ |
364 | ||
365 | static void | |
36875e8f | 366 | create_rdg_edges (struct graph *rdg, vec<ddr_p> ddrs, control_dependences *cd) |
80ab0b19 RB |
367 | { |
368 | int i; | |
369 | struct data_dependence_relation *ddr; | |
370 | def_operand_p def_p; | |
371 | ssa_op_iter iter; | |
372 | ||
373 | FOR_EACH_VEC_ELT (ddrs, i, ddr) | |
374 | if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) | |
375 | create_rdg_edge_for_ddr (rdg, ddr); | |
97463b2b RB |
376 | else |
377 | free_dependence_relation (ddr); | |
80ab0b19 RB |
378 | |
379 | for (i = 0; i < rdg->n_vertices; i++) | |
380 | FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), | |
381 | iter, SSA_OP_DEF) | |
382 | create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); | |
36875e8f RB |
383 | |
384 | if (cd) | |
385 | for (i = 0; i < rdg->n_vertices; i++) | |
386 | { | |
387 | gimple stmt = RDG_STMT (rdg, i); | |
388 | if (gimple_code (stmt) == GIMPLE_PHI) | |
389 | { | |
390 | edge_iterator ei; | |
391 | edge e; | |
392 | FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) | |
393 | create_edge_for_control_dependence (rdg, e->src, i, cd); | |
394 | } | |
395 | else | |
396 | create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); | |
397 | } | |
80ab0b19 RB |
398 | } |
399 | ||
400 | /* Build the vertices of the reduced dependence graph RDG. Return false | |
401 | if that failed. */ | |
402 | ||
403 | static bool | |
404 | create_rdg_vertices (struct graph *rdg, vec<gimple> stmts, loop_p loop, | |
405 | vec<data_reference_p> *datarefs) | |
406 | { | |
407 | int i; | |
408 | gimple stmt; | |
409 | ||
410 | FOR_EACH_VEC_ELT (stmts, i, stmt) | |
411 | { | |
412 | struct vertex *v = &(rdg->vertices[i]); | |
413 | ||
414 | /* Record statement to vertex mapping. */ | |
415 | gimple_set_uid (stmt, i); | |
416 | ||
417 | v->data = XNEW (struct rdg_vertex); | |
418 | RDGV_STMT (v) = stmt; | |
419 | RDGV_DATAREFS (v).create (0); | |
420 | RDGV_HAS_MEM_WRITE (v) = false; | |
421 | RDGV_HAS_MEM_READS (v) = false; | |
422 | if (gimple_code (stmt) == GIMPLE_PHI) | |
423 | continue; | |
424 | ||
425 | unsigned drp = datarefs->length (); | |
426 | if (!find_data_references_in_stmt (loop, stmt, datarefs)) | |
427 | return false; | |
428 | for (unsigned j = drp; j < datarefs->length (); ++j) | |
429 | { | |
430 | data_reference_p dr = (*datarefs)[j]; | |
431 | if (DR_IS_READ (dr)) | |
432 | RDGV_HAS_MEM_READS (v) = true; | |
433 | else | |
434 | RDGV_HAS_MEM_WRITE (v) = true; | |
435 | RDGV_DATAREFS (v).safe_push (dr); | |
436 | } | |
437 | } | |
438 | return true; | |
439 | } | |
440 | ||
2fd5894f | 441 | /* Initialize STMTS with all the statements of LOOP. The order in |
80ab0b19 RB |
442 | which we discover statements is important as |
443 | generate_loops_for_partition is using the same traversal for | |
2fd5894f | 444 | identifying statements in loop copies. */ |
80ab0b19 RB |
445 | |
446 | static void | |
447 | stmts_from_loop (struct loop *loop, vec<gimple> *stmts) | |
448 | { | |
449 | unsigned int i; | |
450 | basic_block *bbs = get_loop_body_in_dom_order (loop); | |
451 | ||
452 | for (i = 0; i < loop->num_nodes; i++) | |
453 | { | |
454 | basic_block bb = bbs[i]; | |
455 | gimple_stmt_iterator bsi; | |
456 | gimple stmt; | |
457 | ||
458 | for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
83a95546 RB |
459 | if (!virtual_operand_p (gimple_phi_result (gsi_stmt (bsi)))) |
460 | stmts->safe_push (gsi_stmt (bsi)); | |
80ab0b19 RB |
461 | |
462 | for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
463 | { | |
464 | stmt = gsi_stmt (bsi); | |
465 | if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) | |
466 | stmts->safe_push (stmt); | |
467 | } | |
468 | } | |
469 | ||
470 | free (bbs); | |
471 | } | |
472 | ||
80ab0b19 RB |
473 | /* Build the Reduced Dependence Graph (RDG) with one vertex per |
474 | statement of the loop nest, and one edge per data dependence or | |
475 | scalar dependence. */ | |
476 | ||
477 | struct graph * | |
478 | build_empty_rdg (int n_stmts) | |
479 | { | |
480 | struct graph *rdg = new_graph (n_stmts); | |
481 | return rdg; | |
482 | } | |
483 | ||
484 | /* Free the reduced dependence graph RDG. */ | |
485 | ||
486 | static void | |
487 | free_rdg (struct graph *rdg) | |
488 | { | |
489 | int i; | |
490 | ||
491 | for (i = 0; i < rdg->n_vertices; i++) | |
492 | { | |
493 | struct vertex *v = &(rdg->vertices[i]); | |
494 | struct graph_edge *e; | |
495 | ||
496 | for (e = v->succ; e; e = e->succ_next) | |
497 | { | |
498 | free_dependence_relation (RDGE_RELATION (e)); | |
499 | free (e->data); | |
500 | } | |
501 | ||
502 | if (v->data) | |
503 | { | |
504 | gimple_set_uid (RDGV_STMT (v), -1); | |
505 | free_data_refs (RDGV_DATAREFS (v)); | |
506 | free (v->data); | |
507 | } | |
508 | } | |
509 | ||
510 | free_graph (rdg); | |
511 | } | |
512 | ||
513 | /* Build the Reduced Dependence Graph (RDG) with one vertex per | |
97463b2b | 514 | statement of the loop nest LOOP_NEST, and one edge per data dependence or |
80ab0b19 RB |
515 | scalar dependence. */ |
516 | ||
517 | static struct graph * | |
36875e8f | 518 | build_rdg (vec<loop_p> loop_nest, control_dependences *cd) |
80ab0b19 RB |
519 | { |
520 | struct graph *rdg; | |
80ab0b19 RB |
521 | vec<gimple> stmts; |
522 | vec<data_reference_p> datarefs; | |
523 | vec<ddr_p> dependence_relations; | |
524 | ||
97463b2b | 525 | /* Create the RDG vertices from the stmts of the loop nest. */ |
80ab0b19 | 526 | stmts.create (10); |
97463b2b | 527 | stmts_from_loop (loop_nest[0], &stmts); |
80ab0b19 RB |
528 | rdg = build_empty_rdg (stmts.length ()); |
529 | datarefs.create (10); | |
97463b2b | 530 | if (!create_rdg_vertices (rdg, stmts, loop_nest[0], &datarefs)) |
80ab0b19 RB |
531 | { |
532 | stmts.release (); | |
97463b2b | 533 | datarefs.release (); |
80ab0b19 RB |
534 | free_rdg (rdg); |
535 | return NULL; | |
536 | } | |
537 | stmts.release (); | |
97463b2b RB |
538 | |
539 | /* Create the RDG edges from the data dependences in the loop nest. */ | |
80ab0b19 RB |
540 | dependence_relations.create (100); |
541 | if (!compute_all_dependences (datarefs, &dependence_relations, loop_nest, | |
542 | false) | |
543 | || !known_dependences_p (dependence_relations)) | |
544 | { | |
97463b2b | 545 | free_dependence_relations (dependence_relations); |
80ab0b19 | 546 | datarefs.release (); |
80ab0b19 RB |
547 | free_rdg (rdg); |
548 | return NULL; | |
549 | } | |
36875e8f | 550 | create_rdg_edges (rdg, dependence_relations, cd); |
80ab0b19 | 551 | dependence_relations.release (); |
97463b2b | 552 | datarefs.release (); |
80ab0b19 RB |
553 | |
554 | return rdg; | |
555 | } | |
556 | ||
80ab0b19 | 557 | |
dea61d92 | 558 | |
b9fc0497 RB |
559 | enum partition_kind { |
560 | PKIND_NORMAL, PKIND_REDUCTION, PKIND_MEMSET, PKIND_MEMCPY | |
561 | }; | |
30d55936 | 562 | |
c61f8985 RG |
563 | typedef struct partition_s |
564 | { | |
565 | bitmap stmts; | |
80ab0b19 | 566 | bitmap loops; |
7ad672e4 | 567 | bool has_writes; |
30d55936 | 568 | enum partition_kind kind; |
d0582dc1 RG |
569 | /* data-references a kind != PKIND_NORMAL partition is about. */ |
570 | data_reference_p main_dr; | |
571 | data_reference_p secondary_dr; | |
c61f8985 RG |
572 | } *partition_t; |
573 | ||
c61f8985 RG |
574 | |
575 | /* Allocate and initialize a partition from BITMAP. */ | |
576 | ||
577 | static partition_t | |
80ab0b19 | 578 | partition_alloc (bitmap stmts, bitmap loops) |
c61f8985 RG |
579 | { |
580 | partition_t partition = XCNEW (struct partition_s); | |
581 | partition->stmts = stmts ? stmts : BITMAP_ALLOC (NULL); | |
80ab0b19 | 582 | partition->loops = loops ? loops : BITMAP_ALLOC (NULL); |
7ad672e4 | 583 | partition->has_writes = false; |
30d55936 | 584 | partition->kind = PKIND_NORMAL; |
c61f8985 RG |
585 | return partition; |
586 | } | |
587 | ||
588 | /* Free PARTITION. */ | |
589 | ||
590 | static void | |
591 | partition_free (partition_t partition) | |
592 | { | |
593 | BITMAP_FREE (partition->stmts); | |
80ab0b19 | 594 | BITMAP_FREE (partition->loops); |
c61f8985 RG |
595 | free (partition); |
596 | } | |
597 | ||
30d55936 RG |
598 | /* Returns true if the partition can be generated as a builtin. */ |
599 | ||
600 | static bool | |
601 | partition_builtin_p (partition_t partition) | |
602 | { | |
b9fc0497 | 603 | return partition->kind > PKIND_REDUCTION; |
30d55936 | 604 | } |
c61f8985 | 605 | |
7ad672e4 RG |
606 | /* Returns true if the partition has an writes. */ |
607 | ||
608 | static bool | |
609 | partition_has_writes (partition_t partition) | |
610 | { | |
611 | return partition->has_writes; | |
612 | } | |
613 | ||
c07a8cb3 RG |
614 | /* Returns true when DEF is an SSA_NAME defined in LOOP and used after |
615 | the LOOP. */ | |
616 | ||
617 | static bool | |
618 | ssa_name_has_uses_outside_loop_p (tree def, loop_p loop) | |
619 | { | |
620 | imm_use_iterator imm_iter; | |
621 | use_operand_p use_p; | |
622 | ||
623 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) | |
e665269a RG |
624 | { |
625 | gimple use_stmt = USE_STMT (use_p); | |
626 | if (!is_gimple_debug (use_stmt) | |
627 | && loop != loop_containing_stmt (use_stmt)) | |
628 | return true; | |
629 | } | |
c07a8cb3 RG |
630 | |
631 | return false; | |
632 | } | |
633 | ||
634 | /* Returns true when STMT defines a scalar variable used after the | |
88af7c1a | 635 | loop LOOP. */ |
c07a8cb3 RG |
636 | |
637 | static bool | |
88af7c1a | 638 | stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple stmt) |
c07a8cb3 | 639 | { |
88af7c1a RG |
640 | def_operand_p def_p; |
641 | ssa_op_iter op_iter; | |
c07a8cb3 | 642 | |
9ca86fc3 RG |
643 | if (gimple_code (stmt) == GIMPLE_PHI) |
644 | return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop); | |
645 | ||
88af7c1a RG |
646 | FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) |
647 | if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop)) | |
648 | return true; | |
c07a8cb3 | 649 | |
88af7c1a | 650 | return false; |
c07a8cb3 RG |
651 | } |
652 | ||
dea61d92 SP |
653 | /* Return a copy of LOOP placed before LOOP. */ |
654 | ||
655 | static struct loop * | |
656 | copy_loop_before (struct loop *loop) | |
657 | { | |
658 | struct loop *res; | |
659 | edge preheader = loop_preheader_edge (loop); | |
660 | ||
dea61d92 SP |
661 | initialize_original_copy_tables (); |
662 | res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, preheader); | |
30d55936 | 663 | gcc_assert (res != NULL); |
dea61d92 | 664 | free_original_copy_tables (); |
2cfc56b9 | 665 | delete_update_ssa (); |
dea61d92 SP |
666 | |
667 | return res; | |
668 | } | |
669 | ||
670 | /* Creates an empty basic block after LOOP. */ | |
671 | ||
672 | static void | |
673 | create_bb_after_loop (struct loop *loop) | |
674 | { | |
675 | edge exit = single_exit (loop); | |
676 | ||
677 | if (!exit) | |
678 | return; | |
679 | ||
680 | split_edge (exit); | |
681 | } | |
682 | ||
683 | /* Generate code for PARTITION from the code in LOOP. The loop is | |
684 | copied when COPY_P is true. All the statements not flagged in the | |
685 | PARTITION bitmap are removed from the loop or from its copy. The | |
686 | statements are indexed in sequence inside a basic block, and the | |
30d55936 | 687 | basic blocks of a loop are taken in dom order. */ |
dea61d92 | 688 | |
30d55936 | 689 | static void |
c61f8985 RG |
690 | generate_loops_for_partition (struct loop *loop, partition_t partition, |
691 | bool copy_p) | |
dea61d92 | 692 | { |
2fd5894f | 693 | unsigned i; |
726a989a | 694 | gimple_stmt_iterator bsi; |
dea61d92 SP |
695 | basic_block *bbs; |
696 | ||
697 | if (copy_p) | |
698 | { | |
699 | loop = copy_loop_before (loop); | |
30d55936 | 700 | gcc_assert (loop != NULL); |
dea61d92 SP |
701 | create_preheader (loop, CP_SIMPLE_PREHEADERS); |
702 | create_bb_after_loop (loop); | |
703 | } | |
704 | ||
2fd5894f | 705 | /* Remove stmts not in the PARTITION bitmap. */ |
dea61d92 SP |
706 | bbs = get_loop_body_in_dom_order (loop); |
707 | ||
b03c3082 | 708 | if (MAY_HAVE_DEBUG_STMTS) |
2fd5894f | 709 | for (i = 0; i < loop->num_nodes; i++) |
b03c3082 JJ |
710 | { |
711 | basic_block bb = bbs[i]; | |
712 | ||
713 | for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
2fd5894f RB |
714 | { |
715 | gimple phi = gsi_stmt (bsi); | |
716 | if (!virtual_operand_p (gimple_phi_result (phi)) | |
717 | && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) | |
718 | reset_debug_uses (phi); | |
719 | } | |
b03c3082 JJ |
720 | |
721 | for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
722 | { | |
723 | gimple stmt = gsi_stmt (bsi); | |
724 | if (gimple_code (stmt) != GIMPLE_LABEL | |
725 | && !is_gimple_debug (stmt) | |
2fd5894f | 726 | && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) |
b03c3082 JJ |
727 | reset_debug_uses (stmt); |
728 | } | |
729 | } | |
730 | ||
2fd5894f | 731 | for (i = 0; i < loop->num_nodes; i++) |
dea61d92 SP |
732 | { |
733 | basic_block bb = bbs[i]; | |
dea61d92 | 734 | |
726a989a | 735 | for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) |
2fd5894f RB |
736 | { |
737 | gimple phi = gsi_stmt (bsi); | |
738 | if (!virtual_operand_p (gimple_phi_result (phi)) | |
739 | && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) | |
2706a615 | 740 | remove_phi_node (&bsi, true); |
2fd5894f RB |
741 | else |
742 | gsi_next (&bsi); | |
743 | } | |
dea61d92 | 744 | |
726a989a | 745 | for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) |
2706a615 RG |
746 | { |
747 | gimple stmt = gsi_stmt (bsi); | |
b03c3082 JJ |
748 | if (gimple_code (stmt) != GIMPLE_LABEL |
749 | && !is_gimple_debug (stmt) | |
2fd5894f | 750 | && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) |
2706a615 | 751 | { |
36875e8f RB |
752 | /* Choose an arbitrary path through the empty CFG part |
753 | that this unnecessary control stmt controls. */ | |
754 | if (gimple_code (stmt) == GIMPLE_COND) | |
755 | { | |
756 | gimple_cond_make_false (stmt); | |
757 | update_stmt (stmt); | |
758 | } | |
759 | else if (gimple_code (stmt) == GIMPLE_SWITCH) | |
760 | { | |
761 | gimple_switch_set_index | |
762 | (stmt, CASE_LOW (gimple_switch_label (stmt, 1))); | |
763 | update_stmt (stmt); | |
764 | } | |
765 | else | |
766 | { | |
767 | unlink_stmt_vdef (stmt); | |
768 | gsi_remove (&bsi, true); | |
769 | release_defs (stmt); | |
770 | continue; | |
771 | } | |
2706a615 | 772 | } |
36875e8f | 773 | gsi_next (&bsi); |
2706a615 | 774 | } |
dea61d92 SP |
775 | } |
776 | ||
777 | free (bbs); | |
dea61d92 SP |
778 | } |
779 | ||
d0582dc1 | 780 | /* Build the size argument for a memory operation call. */ |
3661e899 | 781 | |
d0582dc1 RG |
782 | static tree |
783 | build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter) | |
3661e899 | 784 | { |
d0582dc1 RG |
785 | tree size; |
786 | size = fold_build2_loc (loc, MULT_EXPR, sizetype, | |
787 | fold_convert_loc (loc, sizetype, nb_iter), | |
788 | TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); | |
789 | return fold_convert_loc (loc, size_type_node, size); | |
790 | } | |
791 | ||
792 | /* Build an address argument for a memory operation call. */ | |
793 | ||
794 | static tree | |
795 | build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) | |
796 | { | |
797 | tree addr_base; | |
798 | ||
799 | addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); | |
800 | addr_base = fold_convert_loc (loc, sizetype, addr_base); | |
801 | ||
802 | /* Test for a negative stride, iterating over every element. */ | |
803 | if (tree_int_cst_sgn (DR_STEP (dr)) == -1) | |
804 | { | |
805 | addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, | |
806 | fold_convert_loc (loc, sizetype, nb_bytes)); | |
807 | addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, | |
808 | TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); | |
809 | } | |
810 | ||
811 | return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); | |
3661e899 TB |
812 | } |
813 | ||
401f3a81 JJ |
814 | /* If VAL memory representation contains the same value in all bytes, |
815 | return that value, otherwise return -1. | |
816 | E.g. for 0x24242424 return 0x24, for IEEE double | |
817 | 747708026454360457216.0 return 0x44, etc. */ | |
818 | ||
819 | static int | |
820 | const_with_all_bytes_same (tree val) | |
821 | { | |
822 | unsigned char buf[64]; | |
823 | int i, len; | |
824 | ||
825 | if (integer_zerop (val) | |
826 | || real_zerop (val) | |
827 | || (TREE_CODE (val) == CONSTRUCTOR | |
828 | && !TREE_CLOBBER_P (val) | |
829 | && CONSTRUCTOR_NELTS (val) == 0)) | |
830 | return 0; | |
831 | ||
832 | if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) | |
833 | return -1; | |
834 | ||
835 | len = native_encode_expr (val, buf, sizeof (buf)); | |
836 | if (len == 0) | |
837 | return -1; | |
838 | for (i = 1; i < len; i++) | |
839 | if (buf[i] != buf[0]) | |
840 | return -1; | |
841 | return buf[0]; | |
842 | } | |
843 | ||
30d55936 | 844 | /* Generate a call to memset for PARTITION in LOOP. */ |
dea61d92 | 845 | |
cfee318d | 846 | static void |
d0582dc1 | 847 | generate_memset_builtin (struct loop *loop, partition_t partition) |
dea61d92 | 848 | { |
30d55936 RG |
849 | gimple_stmt_iterator gsi; |
850 | gimple stmt, fn_call; | |
d0582dc1 | 851 | tree nb_iter, mem, fn, nb_bytes; |
30d55936 | 852 | location_t loc; |
b6dd5261 | 853 | tree val; |
30d55936 | 854 | |
d0582dc1 | 855 | stmt = DR_STMT (partition->main_dr); |
30d55936 | 856 | loc = gimple_location (stmt); |
30d55936 RG |
857 | if (gimple_bb (stmt) == loop->latch) |
858 | nb_iter = number_of_latch_executions (loop); | |
859 | else | |
860 | nb_iter = number_of_exit_cond_executions (loop); | |
861 | ||
862 | /* The new statements will be placed before LOOP. */ | |
863 | gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | |
dea61d92 | 864 | |
d0582dc1 RG |
865 | nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); |
866 | nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, | |
867 | false, GSI_CONTINUE_LINKING); | |
868 | mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); | |
869 | mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, | |
870 | false, GSI_CONTINUE_LINKING); | |
dea61d92 | 871 | |
b6dd5261 RG |
872 | /* This exactly matches the pattern recognition in classify_partition. */ |
873 | val = gimple_assign_rhs1 (stmt); | |
401f3a81 JJ |
874 | /* Handle constants like 0x15151515 and similarly |
875 | floating point constants etc. where all bytes are the same. */ | |
876 | int bytev = const_with_all_bytes_same (val); | |
877 | if (bytev != -1) | |
878 | val = build_int_cst (integer_type_node, bytev); | |
879 | else if (TREE_CODE (val) == INTEGER_CST) | |
880 | val = fold_convert (integer_type_node, val); | |
881 | else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) | |
b6dd5261 | 882 | { |
401f3a81 JJ |
883 | gimple cstmt; |
884 | tree tem = make_ssa_name (integer_type_node, NULL); | |
885 | cstmt = gimple_build_assign_with_ops (NOP_EXPR, tem, val, NULL_TREE); | |
886 | gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); | |
887 | val = tem; | |
b6dd5261 RG |
888 | } |
889 | ||
e79983f4 | 890 | fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); |
b6dd5261 | 891 | fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); |
d0582dc1 | 892 | gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); |
dea61d92 SP |
893 | |
894 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
b6dd5261 RG |
895 | { |
896 | fprintf (dump_file, "generated memset"); | |
401f3a81 | 897 | if (bytev == 0) |
b6dd5261 | 898 | fprintf (dump_file, " zero\n"); |
b6dd5261 RG |
899 | else |
900 | fprintf (dump_file, "\n"); | |
901 | } | |
dea61d92 SP |
902 | } |
903 | ||
d0582dc1 RG |
904 | /* Generate a call to memcpy for PARTITION in LOOP. */ |
905 | ||
906 | static void | |
907 | generate_memcpy_builtin (struct loop *loop, partition_t partition) | |
908 | { | |
909 | gimple_stmt_iterator gsi; | |
910 | gimple stmt, fn_call; | |
911 | tree nb_iter, dest, src, fn, nb_bytes; | |
912 | location_t loc; | |
913 | enum built_in_function kind; | |
914 | ||
915 | stmt = DR_STMT (partition->main_dr); | |
916 | loc = gimple_location (stmt); | |
917 | if (gimple_bb (stmt) == loop->latch) | |
918 | nb_iter = number_of_latch_executions (loop); | |
919 | else | |
920 | nb_iter = number_of_exit_cond_executions (loop); | |
921 | ||
922 | /* The new statements will be placed before LOOP. */ | |
923 | gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | |
924 | ||
925 | nb_bytes = build_size_arg_loc (loc, partition->main_dr, nb_iter); | |
926 | nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, | |
927 | false, GSI_CONTINUE_LINKING); | |
928 | dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); | |
929 | src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); | |
930 | if (ptr_derefs_may_alias_p (dest, src)) | |
931 | kind = BUILT_IN_MEMMOVE; | |
932 | else | |
933 | kind = BUILT_IN_MEMCPY; | |
934 | ||
935 | dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, | |
936 | false, GSI_CONTINUE_LINKING); | |
937 | src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, | |
938 | false, GSI_CONTINUE_LINKING); | |
939 | fn = build_fold_addr_expr (builtin_decl_implicit (kind)); | |
940 | fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); | |
941 | gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); | |
942 | ||
943 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
944 | { | |
945 | if (kind == BUILT_IN_MEMCPY) | |
946 | fprintf (dump_file, "generated memcpy\n"); | |
947 | else | |
948 | fprintf (dump_file, "generated memmove\n"); | |
949 | } | |
950 | } | |
951 | ||
30d55936 | 952 | /* Remove and destroy the loop LOOP. */ |
dea61d92 | 953 | |
30d55936 RG |
954 | static void |
955 | destroy_loop (struct loop *loop) | |
dea61d92 | 956 | { |
30d55936 RG |
957 | unsigned nbbs = loop->num_nodes; |
958 | edge exit = single_exit (loop); | |
959 | basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; | |
dea61d92 | 960 | basic_block *bbs; |
30d55936 | 961 | unsigned i; |
dea61d92 SP |
962 | |
963 | bbs = get_loop_body_in_dom_order (loop); | |
964 | ||
30d55936 RG |
965 | redirect_edge_pred (exit, src); |
966 | exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); | |
967 | exit->flags |= EDGE_FALLTHRU; | |
968 | cancel_loop_tree (loop); | |
969 | rescan_loop_exit (exit, false, true); | |
dea61d92 | 970 | |
30d55936 | 971 | for (i = 0; i < nbbs; i++) |
c014f6f5 RG |
972 | { |
973 | /* We have made sure to not leave any dangling uses of SSA | |
974 | names defined in the loop. With the exception of virtuals. | |
975 | Make sure we replace all uses of virtual defs that will remain | |
976 | outside of the loop with the bare symbol as delete_basic_block | |
977 | will release them. */ | |
978 | gimple_stmt_iterator gsi; | |
979 | for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | |
980 | { | |
981 | gimple phi = gsi_stmt (gsi); | |
ea057359 | 982 | if (virtual_operand_p (gimple_phi_result (phi))) |
c014f6f5 RG |
983 | mark_virtual_phi_result_for_renaming (phi); |
984 | } | |
985 | for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) | |
986 | { | |
987 | gimple stmt = gsi_stmt (gsi); | |
988 | tree vdef = gimple_vdef (stmt); | |
989 | if (vdef && TREE_CODE (vdef) == SSA_NAME) | |
990 | mark_virtual_operand_for_renaming (vdef); | |
991 | } | |
992 | delete_basic_block (bbs[i]); | |
993 | } | |
dea61d92 | 994 | free (bbs); |
30d55936 RG |
995 | |
996 | set_immediate_dominator (CDI_DOMINATORS, dest, | |
997 | recompute_dominator (CDI_DOMINATORS, dest)); | |
dea61d92 SP |
998 | } |
999 | ||
30d55936 | 1000 | /* Generates code for PARTITION. */ |
dea61d92 | 1001 | |
30d55936 | 1002 | static void |
d0582dc1 | 1003 | generate_code_for_partition (struct loop *loop, |
be6b029b | 1004 | partition_t partition, bool copy_p) |
dea61d92 | 1005 | { |
30d55936 RG |
1006 | switch (partition->kind) |
1007 | { | |
1008 | case PKIND_MEMSET: | |
d0582dc1 RG |
1009 | generate_memset_builtin (loop, partition); |
1010 | /* If this is the last partition for which we generate code, we have | |
1011 | to destroy the loop. */ | |
1012 | if (!copy_p) | |
1013 | destroy_loop (loop); | |
1014 | break; | |
1015 | ||
1016 | case PKIND_MEMCPY: | |
1017 | generate_memcpy_builtin (loop, partition); | |
30d55936 RG |
1018 | /* If this is the last partition for which we generate code, we have |
1019 | to destroy the loop. */ | |
1020 | if (!copy_p) | |
1021 | destroy_loop (loop); | |
1022 | break; | |
1023 | ||
b9fc0497 RB |
1024 | case PKIND_REDUCTION: |
1025 | /* Reductions all have to be in the last partition. */ | |
1026 | gcc_assert (!copy_p); | |
30d55936 RG |
1027 | case PKIND_NORMAL: |
1028 | generate_loops_for_partition (loop, partition, copy_p); | |
1029 | break; | |
1030 | ||
1031 | default: | |
1032 | gcc_unreachable (); | |
1033 | } | |
dea61d92 SP |
1034 | } |
1035 | ||
1036 | ||
1037 | /* Returns true if the node V of RDG cannot be recomputed. */ | |
1038 | ||
1039 | static bool | |
1040 | rdg_cannot_recompute_vertex_p (struct graph *rdg, int v) | |
1041 | { | |
1042 | if (RDG_MEM_WRITE_STMT (rdg, v)) | |
1043 | return true; | |
1044 | ||
1045 | return false; | |
1046 | } | |
1047 | ||
1048 | /* Returns true when the vertex V has already been generated in the | |
1049 | current partition (V is in PROCESSED), or when V belongs to another | |
1050 | partition and cannot be recomputed (V is not in REMAINING_STMTS). */ | |
1051 | ||
1052 | static inline bool | |
1053 | already_processed_vertex_p (bitmap processed, int v) | |
1054 | { | |
174ec470 | 1055 | return bitmap_bit_p (processed, v); |
dea61d92 SP |
1056 | } |
1057 | ||
c61f8985 | 1058 | static void rdg_flag_vertex_and_dependent (struct graph *, int, partition_t, |
80ab0b19 | 1059 | bitmap); |
dea61d92 | 1060 | |
dea61d92 SP |
1061 | /* Flag V from RDG as part of PARTITION, and also flag its loop number |
1062 | in LOOPS. */ | |
1063 | ||
1064 | static void | |
80ab0b19 | 1065 | rdg_flag_vertex (struct graph *rdg, int v, partition_t partition) |
dea61d92 SP |
1066 | { |
1067 | struct loop *loop; | |
1068 | ||
c61f8985 | 1069 | if (!bitmap_set_bit (partition->stmts, v)) |
dea61d92 SP |
1070 | return; |
1071 | ||
1072 | loop = loop_containing_stmt (RDG_STMT (rdg, v)); | |
80ab0b19 | 1073 | bitmap_set_bit (partition->loops, loop->num); |
dea61d92 SP |
1074 | |
1075 | if (rdg_cannot_recompute_vertex_p (rdg, v)) | |
174ec470 | 1076 | partition->has_writes = true; |
dea61d92 SP |
1077 | } |
1078 | ||
1079 | /* Flag in the bitmap PARTITION the vertex V and all its predecessors. | |
fa10beec | 1080 | Also flag their loop number in LOOPS. */ |
dea61d92 SP |
1081 | |
1082 | static void | |
c61f8985 | 1083 | rdg_flag_vertex_and_dependent (struct graph *rdg, int v, partition_t partition, |
80ab0b19 | 1084 | bitmap processed) |
dea61d92 SP |
1085 | { |
1086 | unsigned i; | |
9771b263 DN |
1087 | vec<int> nodes; |
1088 | nodes.create (3); | |
dea61d92 SP |
1089 | int x; |
1090 | ||
174ec470 | 1091 | graphds_dfs (rdg, &v, 1, &nodes, false, NULL); |
dea61d92 | 1092 | |
9771b263 | 1093 | FOR_EACH_VEC_ELT (nodes, i, x) |
174ec470 RB |
1094 | if (bitmap_set_bit (processed, x)) |
1095 | rdg_flag_vertex (rdg, x, partition); | |
dea61d92 | 1096 | |
9771b263 | 1097 | nodes.release (); |
dea61d92 SP |
1098 | } |
1099 | ||
83a95546 RB |
1100 | /* Returns a partition with all the statements needed for computing |
1101 | the vertex V of the RDG, also including the loop exit conditions. */ | |
dea61d92 | 1102 | |
c61f8985 | 1103 | static partition_t |
83a95546 | 1104 | build_rdg_partition_for_vertex (struct graph *rdg, int v) |
dea61d92 | 1105 | { |
80ab0b19 | 1106 | partition_t partition = partition_alloc (NULL, NULL); |
dea61d92 | 1107 | bitmap processed = BITMAP_ALLOC (NULL); |
83a95546 | 1108 | rdg_flag_vertex_and_dependent (rdg, v, partition, processed); |
dea61d92 | 1109 | BITMAP_FREE (processed); |
dea61d92 SP |
1110 | return partition; |
1111 | } | |
1112 | ||
30d55936 RG |
1113 | /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. |
1114 | For the moment we detect only the memset zero pattern. */ | |
cfee318d | 1115 | |
30d55936 RG |
1116 | static void |
1117 | classify_partition (loop_p loop, struct graph *rdg, partition_t partition) | |
cfee318d | 1118 | { |
cfee318d | 1119 | bitmap_iterator bi; |
30d55936 RG |
1120 | unsigned i; |
1121 | tree nb_iter; | |
d0582dc1 | 1122 | data_reference_p single_load, single_store; |
b9fc0497 | 1123 | bool volatiles_p = false; |
30d55936 RG |
1124 | |
1125 | partition->kind = PKIND_NORMAL; | |
d0582dc1 RG |
1126 | partition->main_dr = NULL; |
1127 | partition->secondary_dr = NULL; | |
30d55936 | 1128 | |
c61f8985 | 1129 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) |
30d55936 RG |
1130 | { |
1131 | gimple stmt = RDG_STMT (rdg, i); | |
1132 | ||
1133 | if (gimple_has_volatile_ops (stmt)) | |
b9fc0497 | 1134 | volatiles_p = true; |
cfee318d | 1135 | |
30d55936 RG |
1136 | /* If the stmt has uses outside of the loop fail. |
1137 | ??? If the stmt is generated in another partition that | |
1138 | is not created as builtin we can ignore this. */ | |
9ca86fc3 | 1139 | if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) |
30d55936 RG |
1140 | { |
1141 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1142 | fprintf (dump_file, "not generating builtin, partition has " | |
1143 | "scalar uses outside of the loop\n"); | |
b9fc0497 | 1144 | partition->kind = PKIND_REDUCTION; |
30d55936 RG |
1145 | return; |
1146 | } | |
1147 | } | |
1148 | ||
b9fc0497 RB |
1149 | /* Perform general partition disqualification for builtins. */ |
1150 | if (volatiles_p | |
1151 | || !flag_tree_loop_distribute_patterns) | |
1152 | return; | |
1153 | ||
1154 | nb_iter = number_of_exit_cond_executions (loop); | |
1155 | if (!nb_iter || nb_iter == chrec_dont_know) | |
1156 | return; | |
1157 | ||
d0582dc1 RG |
1158 | /* Detect memset and memcpy. */ |
1159 | single_load = NULL; | |
1160 | single_store = NULL; | |
30d55936 RG |
1161 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) |
1162 | { | |
1163 | gimple stmt = RDG_STMT (rdg, i); | |
d0582dc1 RG |
1164 | data_reference_p dr; |
1165 | unsigned j; | |
30d55936 RG |
1166 | |
1167 | if (gimple_code (stmt) == GIMPLE_PHI) | |
1168 | continue; | |
1169 | ||
1170 | /* Any scalar stmts are ok. */ | |
1171 | if (!gimple_vuse (stmt)) | |
1172 | continue; | |
1173 | ||
d0582dc1 RG |
1174 | /* Otherwise just regular loads/stores. */ |
1175 | if (!gimple_assign_single_p (stmt)) | |
1176 | return; | |
1177 | ||
1178 | /* But exactly one store and/or load. */ | |
9771b263 | 1179 | for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j) |
30d55936 | 1180 | { |
d0582dc1 RG |
1181 | if (DR_IS_READ (dr)) |
1182 | { | |
1183 | if (single_load != NULL) | |
1184 | return; | |
1185 | single_load = dr; | |
1186 | } | |
1187 | else | |
1188 | { | |
1189 | if (single_store != NULL) | |
1190 | return; | |
1191 | single_store = dr; | |
1192 | } | |
30d55936 | 1193 | } |
30d55936 RG |
1194 | } |
1195 | ||
d0582dc1 RG |
1196 | if (single_store && !single_load) |
1197 | { | |
1198 | gimple stmt = DR_STMT (single_store); | |
1199 | tree rhs = gimple_assign_rhs1 (stmt); | |
401f3a81 JJ |
1200 | if (const_with_all_bytes_same (rhs) == -1 |
1201 | && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs)) | |
1202 | || (TYPE_MODE (TREE_TYPE (rhs)) | |
1203 | != TYPE_MODE (unsigned_char_type_node)))) | |
d0582dc1 RG |
1204 | return; |
1205 | if (TREE_CODE (rhs) == SSA_NAME | |
1206 | && !SSA_NAME_IS_DEFAULT_DEF (rhs) | |
1207 | && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) | |
1208 | return; | |
1209 | if (!adjacent_dr_p (single_store)) | |
1210 | return; | |
1211 | partition->kind = PKIND_MEMSET; | |
1212 | partition->main_dr = single_store; | |
1213 | } | |
1214 | else if (single_store && single_load) | |
1215 | { | |
1216 | gimple store = DR_STMT (single_store); | |
1217 | gimple load = DR_STMT (single_load); | |
1218 | /* Direct aggregate copy or via an SSA name temporary. */ | |
1219 | if (load != store | |
1220 | && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) | |
1221 | return; | |
1222 | if (!adjacent_dr_p (single_store) | |
1223 | || !adjacent_dr_p (single_load) | |
1224 | || !operand_equal_p (DR_STEP (single_store), | |
1225 | DR_STEP (single_load), 0)) | |
1226 | return; | |
f20132e7 RG |
1227 | /* Now check that if there is a dependence this dependence is |
1228 | of a suitable form for memmove. */ | |
6e1aa848 | 1229 | vec<loop_p> loops = vNULL; |
f20132e7 | 1230 | ddr_p ddr; |
9771b263 | 1231 | loops.safe_push (loop); |
f20132e7 RG |
1232 | ddr = initialize_data_dependence_relation (single_load, single_store, |
1233 | loops); | |
1234 | compute_affine_dependence (ddr, loop); | |
f20132e7 RG |
1235 | if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
1236 | { | |
1237 | free_dependence_relation (ddr); | |
9771b263 | 1238 | loops.release (); |
f20132e7 RG |
1239 | return; |
1240 | } | |
1241 | if (DDR_ARE_DEPENDENT (ddr) != chrec_known) | |
1242 | { | |
1243 | if (DDR_NUM_DIST_VECTS (ddr) == 0) | |
1244 | { | |
1245 | free_dependence_relation (ddr); | |
9771b263 | 1246 | loops.release (); |
f20132e7 RG |
1247 | return; |
1248 | } | |
1249 | lambda_vector dist_v; | |
9771b263 | 1250 | FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) |
f20132e7 RG |
1251 | { |
1252 | int dist = dist_v[index_in_loop_nest (loop->num, | |
1253 | DDR_LOOP_NEST (ddr))]; | |
1254 | if (dist > 0 && !DDR_REVERSED_P (ddr)) | |
1255 | { | |
1256 | free_dependence_relation (ddr); | |
9771b263 | 1257 | loops.release (); |
f20132e7 RG |
1258 | return; |
1259 | } | |
1260 | } | |
1261 | } | |
b7ce70b3 | 1262 | free_dependence_relation (ddr); |
9771b263 | 1263 | loops.release (); |
d0582dc1 RG |
1264 | partition->kind = PKIND_MEMCPY; |
1265 | partition->main_dr = single_store; | |
1266 | partition->secondary_dr = single_load; | |
1267 | } | |
cfee318d SP |
1268 | } |
1269 | ||
1fa0c180 RG |
1270 | /* For a data reference REF, return the declaration of its base |
1271 | address or NULL_TREE if the base is not determined. */ | |
1272 | ||
1273 | static tree | |
1274 | ref_base_address (data_reference_p dr) | |
1275 | { | |
1276 | tree base_address = DR_BASE_ADDRESS (dr); | |
1277 | if (base_address | |
1278 | && TREE_CODE (base_address) == ADDR_EXPR) | |
1279 | return TREE_OPERAND (base_address, 0); | |
1280 | ||
1281 | return base_address; | |
1282 | } | |
1283 | ||
cfee318d SP |
1284 | /* Returns true when PARTITION1 and PARTITION2 have similar memory |
1285 | accesses in RDG. */ | |
1286 | ||
1287 | static bool | |
c61f8985 RG |
1288 | similar_memory_accesses (struct graph *rdg, partition_t partition1, |
1289 | partition_t partition2) | |
cfee318d | 1290 | { |
1fa0c180 | 1291 | unsigned i, j, k, l; |
cfee318d | 1292 | bitmap_iterator bi, bj; |
1fa0c180 RG |
1293 | data_reference_p ref1, ref2; |
1294 | ||
1295 | /* First check whether in the intersection of the two partitions are | |
1296 | any loads or stores. Common loads are the situation that happens | |
1297 | most often. */ | |
1298 | EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi) | |
1299 | if (RDG_MEM_WRITE_STMT (rdg, i) | |
1300 | || RDG_MEM_READS_STMT (rdg, i)) | |
1301 | return true; | |
cfee318d | 1302 | |
1fa0c180 | 1303 | /* Then check all data-references against each other. */ |
c61f8985 | 1304 | EXECUTE_IF_SET_IN_BITMAP (partition1->stmts, 0, i, bi) |
cfee318d SP |
1305 | if (RDG_MEM_WRITE_STMT (rdg, i) |
1306 | || RDG_MEM_READS_STMT (rdg, i)) | |
c61f8985 | 1307 | EXECUTE_IF_SET_IN_BITMAP (partition2->stmts, 0, j, bj) |
cfee318d SP |
1308 | if (RDG_MEM_WRITE_STMT (rdg, j) |
1309 | || RDG_MEM_READS_STMT (rdg, j)) | |
1fa0c180 | 1310 | { |
9771b263 | 1311 | FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, i), k, ref1) |
1fa0c180 RG |
1312 | { |
1313 | tree base1 = ref_base_address (ref1); | |
1314 | if (base1) | |
9771b263 | 1315 | FOR_EACH_VEC_ELT (RDG_DATAREFS (rdg, j), l, ref2) |
1fa0c180 RG |
1316 | if (base1 == ref_base_address (ref2)) |
1317 | return true; | |
1318 | } | |
1319 | } | |
cfee318d SP |
1320 | |
1321 | return false; | |
1322 | } | |
1323 | ||
dea61d92 SP |
1324 | /* Aggregate several components into a useful partition that is |
1325 | registered in the PARTITIONS vector. Partitions will be | |
1326 | distributed in different loops. */ | |
1327 | ||
1328 | static void | |
83a95546 | 1329 | rdg_build_partitions (struct graph *rdg, |
2fd5894f | 1330 | vec<gimple> starting_stmts, |
83a95546 | 1331 | vec<partition_t> *partitions) |
dea61d92 | 1332 | { |
83a95546 | 1333 | bitmap processed = BITMAP_ALLOC (NULL); |
2fd5894f RB |
1334 | int i; |
1335 | gimple stmt; | |
80ab0b19 | 1336 | partition_t partition = partition_alloc (NULL, NULL); |
dea61d92 | 1337 | |
2fd5894f | 1338 | FOR_EACH_VEC_ELT (starting_stmts, i, stmt) |
dea61d92 | 1339 | { |
c61f8985 | 1340 | partition_t np; |
2fd5894f RB |
1341 | int v = rdg_vertex_for_stmt (rdg, stmt); |
1342 | ||
1343 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1344 | fprintf (dump_file, | |
1345 | "ldist asked to generate code for vertex %d\n", v); | |
b8698a0f | 1346 | |
dea61d92 SP |
1347 | if (bitmap_bit_p (processed, v)) |
1348 | continue; | |
b8698a0f | 1349 | |
83a95546 | 1350 | np = build_rdg_partition_for_vertex (rdg, v); |
c61f8985 | 1351 | bitmap_ior_into (partition->stmts, np->stmts); |
7ad672e4 | 1352 | partition->has_writes = partition_has_writes (np); |
c61f8985 RG |
1353 | bitmap_ior_into (processed, np->stmts); |
1354 | partition_free (np); | |
dea61d92 | 1355 | |
7ad672e4 | 1356 | if (partition_has_writes (partition)) |
dea61d92 SP |
1357 | { |
1358 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1359 | { | |
1360 | fprintf (dump_file, "ldist useful partition:\n"); | |
c61f8985 | 1361 | dump_bitmap (dump_file, partition->stmts); |
dea61d92 SP |
1362 | } |
1363 | ||
9771b263 | 1364 | partitions->safe_push (partition); |
80ab0b19 | 1365 | partition = partition_alloc (NULL, NULL); |
dea61d92 SP |
1366 | } |
1367 | } | |
1368 | ||
83a95546 RB |
1369 | /* All vertices should have been assigned to at least one partition now, |
1370 | other than vertices belonging to dead code. */ | |
dea61d92 | 1371 | |
83a95546 RB |
1372 | if (!bitmap_empty_p (partition->stmts)) |
1373 | { | |
1374 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1375 | { | |
1376 | fprintf (dump_file, "remaining partition:\n"); | |
1377 | dump_bitmap (dump_file, partition->stmts); | |
1378 | } | |
dea61d92 | 1379 | |
83a95546 | 1380 | partitions->safe_push (partition); |
dea61d92 | 1381 | } |
dea61d92 | 1382 | else |
c61f8985 | 1383 | partition_free (partition); |
83a95546 RB |
1384 | |
1385 | BITMAP_FREE (processed); | |
dea61d92 SP |
1386 | } |
1387 | ||
1388 | /* Dump to FILE the PARTITIONS. */ | |
1389 | ||
1390 | static void | |
9771b263 | 1391 | dump_rdg_partitions (FILE *file, vec<partition_t> partitions) |
dea61d92 SP |
1392 | { |
1393 | int i; | |
c61f8985 | 1394 | partition_t partition; |
dea61d92 | 1395 | |
9771b263 | 1396 | FOR_EACH_VEC_ELT (partitions, i, partition) |
c61f8985 | 1397 | debug_bitmap_file (file, partition->stmts); |
dea61d92 SP |
1398 | } |
1399 | ||
1400 | /* Debug PARTITIONS. */ | |
9771b263 | 1401 | extern void debug_rdg_partitions (vec<partition_t> ); |
dea61d92 | 1402 | |
24e47c76 | 1403 | DEBUG_FUNCTION void |
9771b263 | 1404 | debug_rdg_partitions (vec<partition_t> partitions) |
dea61d92 SP |
1405 | { |
1406 | dump_rdg_partitions (stderr, partitions); | |
1407 | } | |
1408 | ||
2b8aee8e SP |
1409 | /* Returns the number of read and write operations in the RDG. */ |
1410 | ||
1411 | static int | |
1412 | number_of_rw_in_rdg (struct graph *rdg) | |
1413 | { | |
1414 | int i, res = 0; | |
1415 | ||
1416 | for (i = 0; i < rdg->n_vertices; i++) | |
1417 | { | |
1418 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1419 | ++res; | |
1420 | ||
1421 | if (RDG_MEM_READS_STMT (rdg, i)) | |
1422 | ++res; | |
1423 | } | |
1424 | ||
1425 | return res; | |
1426 | } | |
1427 | ||
1428 | /* Returns the number of read and write operations in a PARTITION of | |
1429 | the RDG. */ | |
1430 | ||
1431 | static int | |
c61f8985 | 1432 | number_of_rw_in_partition (struct graph *rdg, partition_t partition) |
2b8aee8e SP |
1433 | { |
1434 | int res = 0; | |
1435 | unsigned i; | |
1436 | bitmap_iterator ii; | |
1437 | ||
c61f8985 | 1438 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii) |
2b8aee8e SP |
1439 | { |
1440 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1441 | ++res; | |
1442 | ||
1443 | if (RDG_MEM_READS_STMT (rdg, i)) | |
1444 | ++res; | |
1445 | } | |
1446 | ||
1447 | return res; | |
1448 | } | |
1449 | ||
1450 | /* Returns true when one of the PARTITIONS contains all the read or | |
1451 | write operations of RDG. */ | |
1452 | ||
1453 | static bool | |
9771b263 DN |
1454 | partition_contains_all_rw (struct graph *rdg, |
1455 | vec<partition_t> partitions) | |
2b8aee8e SP |
1456 | { |
1457 | int i; | |
c61f8985 | 1458 | partition_t partition; |
2b8aee8e SP |
1459 | int nrw = number_of_rw_in_rdg (rdg); |
1460 | ||
9771b263 | 1461 | FOR_EACH_VEC_ELT (partitions, i, partition) |
2b8aee8e SP |
1462 | if (nrw == number_of_rw_in_partition (rdg, partition)) |
1463 | return true; | |
1464 | ||
1465 | return false; | |
1466 | } | |
1467 | ||
2fd5894f RB |
1468 | |
1469 | /* Distributes the code from LOOP in such a way that producer | |
1470 | statements are placed before consumer statements. Tries to separate | |
1471 | only the statements from STMTS into separate loops. | |
1472 | Returns the number of distributed loops. */ | |
dea61d92 SP |
1473 | |
1474 | static int | |
36875e8f RB |
1475 | distribute_loop (struct loop *loop, vec<gimple> stmts, |
1476 | control_dependences *cd) | |
dea61d92 | 1477 | { |
2fd5894f RB |
1478 | struct graph *rdg; |
1479 | vec<loop_p> loop_nest; | |
9771b263 | 1480 | vec<partition_t> partitions; |
c61f8985 | 1481 | partition_t partition; |
be6b029b | 1482 | bool any_builtin; |
2fd5894f | 1483 | int i, nbp; |
dea61d92 | 1484 | |
2fd5894f RB |
1485 | loop_nest.create (3); |
1486 | if (!find_loop_nest (loop, &loop_nest)) | |
1487 | { | |
1488 | loop_nest.release (); | |
1489 | return 0; | |
1490 | } | |
1491 | ||
36875e8f | 1492 | rdg = build_rdg (loop_nest, cd); |
2fd5894f RB |
1493 | if (!rdg) |
1494 | { | |
1495 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1496 | fprintf (dump_file, | |
1497 | "Loop %d not distributed: failed to build the RDG.\n", | |
1498 | loop->num); | |
1499 | ||
1500 | loop_nest.release (); | |
1501 | return 0; | |
1502 | } | |
1503 | ||
1504 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1505 | dump_rdg (dump_file, rdg); | |
1506 | ||
1507 | partitions.create (3); | |
1508 | rdg_build_partitions (rdg, stmts, &partitions); | |
dea61d92 | 1509 | |
be6b029b | 1510 | any_builtin = false; |
9771b263 | 1511 | FOR_EACH_VEC_ELT (partitions, i, partition) |
be6b029b RG |
1512 | { |
1513 | classify_partition (loop, rdg, partition); | |
1514 | any_builtin |= partition_builtin_p (partition); | |
1515 | } | |
30d55936 | 1516 | |
c014f6f5 RG |
1517 | /* If we are only distributing patterns fuse all partitions that |
1518 | were not properly classified as builtins. Else fuse partitions | |
1519 | with similar memory accesses. */ | |
1520 | if (!flag_tree_loop_distribution) | |
1521 | { | |
1522 | partition_t into; | |
be6b029b RG |
1523 | /* If we did not detect any builtin simply bail out. */ |
1524 | if (!any_builtin) | |
1525 | { | |
1526 | nbp = 0; | |
1527 | goto ldist_done; | |
1528 | } | |
38ad2d07 RG |
1529 | /* Only fuse adjacent non-builtin partitions, see PR53616. |
1530 | ??? Use dependence information to improve partition ordering. */ | |
1531 | i = 0; | |
1532 | do | |
1533 | { | |
9771b263 | 1534 | for (; partitions.iterate (i, &into); ++i) |
38ad2d07 RG |
1535 | if (!partition_builtin_p (into)) |
1536 | break; | |
9771b263 | 1537 | for (++i; partitions.iterate (i, &partition); ++i) |
38ad2d07 RG |
1538 | if (!partition_builtin_p (partition)) |
1539 | { | |
1540 | bitmap_ior_into (into->stmts, partition->stmts); | |
b9fc0497 RB |
1541 | if (partition->kind == PKIND_REDUCTION) |
1542 | into->kind = PKIND_REDUCTION; | |
9771b263 | 1543 | partitions.ordered_remove (i); |
5eb010bc | 1544 | partition_free (partition); |
38ad2d07 RG |
1545 | i--; |
1546 | } | |
1547 | else | |
1548 | break; | |
1549 | } | |
9771b263 | 1550 | while ((unsigned) i < partitions.length ()); |
c014f6f5 RG |
1551 | } |
1552 | else | |
1553 | { | |
1554 | partition_t into; | |
1555 | int j; | |
9771b263 | 1556 | for (i = 0; partitions.iterate (i, &into); ++i) |
c014f6f5 RG |
1557 | { |
1558 | if (partition_builtin_p (into)) | |
1559 | continue; | |
1560 | for (j = i + 1; | |
9771b263 | 1561 | partitions.iterate (j, &partition); ++j) |
c014f6f5 RG |
1562 | { |
1563 | if (!partition_builtin_p (partition) | |
c014f6f5 RG |
1564 | && similar_memory_accesses (rdg, into, partition)) |
1565 | { | |
1566 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1567 | { | |
1568 | fprintf (dump_file, "fusing partitions\n"); | |
1569 | dump_bitmap (dump_file, into->stmts); | |
1570 | dump_bitmap (dump_file, partition->stmts); | |
1571 | fprintf (dump_file, "because they have similar " | |
1572 | "memory accesses\n"); | |
1573 | } | |
1574 | bitmap_ior_into (into->stmts, partition->stmts); | |
b9fc0497 RB |
1575 | if (partition->kind == PKIND_REDUCTION) |
1576 | into->kind = PKIND_REDUCTION; | |
9771b263 | 1577 | partitions.ordered_remove (j); |
5eb010bc | 1578 | partition_free (partition); |
c014f6f5 RG |
1579 | j--; |
1580 | } | |
1581 | } | |
1582 | } | |
1583 | } | |
30d55936 | 1584 | |
b9fc0497 RB |
1585 | /* Fuse all reduction partitions into the last. */ |
1586 | if (partitions.length () > 1) | |
1587 | { | |
1588 | partition_t into = partitions.last (); | |
1589 | for (i = partitions.length () - 2; i >= 0; --i) | |
1590 | { | |
1591 | partition_t what = partitions[i]; | |
1592 | if (what->kind == PKIND_REDUCTION) | |
1593 | { | |
1594 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1595 | { | |
1596 | fprintf (dump_file, "fusing partitions\n"); | |
1597 | dump_bitmap (dump_file, into->stmts); | |
1598 | dump_bitmap (dump_file, what->stmts); | |
1599 | fprintf (dump_file, "because the latter has reductions\n"); | |
1600 | } | |
1601 | bitmap_ior_into (into->stmts, what->stmts); | |
1602 | into->kind = PKIND_REDUCTION; | |
1603 | partitions.ordered_remove (i); | |
5eb010bc | 1604 | partition_free (what); |
b9fc0497 RB |
1605 | } |
1606 | } | |
1607 | } | |
1608 | ||
9771b263 | 1609 | nbp = partitions.length (); |
a4293fa6 | 1610 | if (nbp == 0 |
9771b263 DN |
1611 | || (nbp == 1 && !partition_builtin_p (partitions[0])) |
1612 | || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) | |
c014f6f5 RG |
1613 | { |
1614 | nbp = 0; | |
1615 | goto ldist_done; | |
1616 | } | |
dea61d92 SP |
1617 | |
1618 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1619 | dump_rdg_partitions (dump_file, partitions); | |
1620 | ||
9771b263 | 1621 | FOR_EACH_VEC_ELT (partitions, i, partition) |
d0582dc1 | 1622 | generate_code_for_partition (loop, partition, i < nbp - 1); |
dea61d92 | 1623 | |
dea61d92 SP |
1624 | ldist_done: |
1625 | ||
9771b263 | 1626 | FOR_EACH_VEC_ELT (partitions, i, partition) |
c61f8985 | 1627 | partition_free (partition); |
9771b263 | 1628 | partitions.release (); |
dea61d92 | 1629 | |
dea61d92 | 1630 | free_rdg (rdg); |
97463b2b | 1631 | loop_nest.release (); |
2fd5894f | 1632 | return nbp; |
dea61d92 SP |
1633 | } |
1634 | ||
1635 | /* Distribute all loops in the current function. */ | |
1636 | ||
1637 | static unsigned int | |
1638 | tree_loop_distribution (void) | |
1639 | { | |
1640 | struct loop *loop; | |
1641 | loop_iterator li; | |
c014f6f5 | 1642 | bool changed = false; |
1fa0c180 | 1643 | basic_block bb; |
36875e8f | 1644 | control_dependences *cd = NULL; |
1fa0c180 RG |
1645 | |
1646 | FOR_ALL_BB (bb) | |
1647 | { | |
1648 | gimple_stmt_iterator gsi; | |
1649 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1650 | gimple_set_uid (gsi_stmt (gsi), -1); | |
1651 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1652 | gimple_set_uid (gsi_stmt (gsi), -1); | |
1653 | } | |
dea61d92 | 1654 | |
c014f6f5 RG |
1655 | /* We can at the moment only distribute non-nested loops, thus restrict |
1656 | walking to innermost loops. */ | |
1657 | FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) | |
dea61d92 | 1658 | { |
6e1aa848 | 1659 | vec<gimple> work_list = vNULL; |
be6b029b | 1660 | basic_block *bbs; |
0e20c89f | 1661 | int num = loop->num; |
c014f6f5 | 1662 | int nb_generated_loops = 0; |
be6b029b | 1663 | unsigned int i; |
a3357f7d RG |
1664 | |
1665 | /* If the loop doesn't have a single exit we will fail anyway, | |
1666 | so do that early. */ | |
1667 | if (!single_exit (loop)) | |
1668 | continue; | |
dea61d92 | 1669 | |
f56f2d33 JH |
1670 | /* Only optimize hot loops. */ |
1671 | if (!optimize_loop_for_speed_p (loop)) | |
1672 | continue; | |
1673 | ||
be6b029b RG |
1674 | /* Initialize the worklist with stmts we seed the partitions with. */ |
1675 | bbs = get_loop_body_in_dom_order (loop); | |
1676 | for (i = 0; i < loop->num_nodes; ++i) | |
1677 | { | |
1678 | gimple_stmt_iterator gsi; | |
deb6c11a RB |
1679 | for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) |
1680 | { | |
1681 | gimple phi = gsi_stmt (gsi); | |
1682 | if (virtual_operand_p (gimple_phi_result (phi))) | |
1683 | continue; | |
1684 | /* Distribute stmts which have defs that are used outside of | |
1685 | the loop. */ | |
1686 | if (!stmt_has_scalar_dependences_outside_loop (loop, phi)) | |
1687 | continue; | |
1688 | work_list.safe_push (phi); | |
1689 | } | |
be6b029b RG |
1690 | for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi)) |
1691 | { | |
1692 | gimple stmt = gsi_stmt (gsi); | |
83a95546 RB |
1693 | |
1694 | /* If there is a stmt with side-effects bail out - we | |
1695 | cannot and should not distribute this loop. */ | |
1696 | if (gimple_has_side_effects (stmt)) | |
1697 | { | |
1698 | work_list.truncate (0); | |
1699 | goto out; | |
1700 | } | |
1701 | ||
b9fc0497 RB |
1702 | /* Distribute stmts which have defs that are used outside of |
1703 | the loop. */ | |
1704 | if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) | |
1705 | ; | |
1706 | /* Otherwise only distribute stores for now. */ | |
1707 | else if (!gimple_assign_single_p (stmt) | |
1708 | || is_gimple_reg (gimple_assign_lhs (stmt))) | |
be6b029b RG |
1709 | continue; |
1710 | ||
9771b263 | 1711 | work_list.safe_push (stmt); |
be6b029b RG |
1712 | } |
1713 | } | |
83a95546 | 1714 | out: |
be6b029b | 1715 | free (bbs); |
c014f6f5 | 1716 | |
9771b263 | 1717 | if (work_list.length () > 0) |
36875e8f RB |
1718 | { |
1719 | if (!cd) | |
1720 | { | |
1721 | calculate_dominance_info (CDI_POST_DOMINATORS); | |
1722 | cd = new control_dependences (create_edge_list ()); | |
1723 | free_dominance_info (CDI_POST_DOMINATORS); | |
1724 | } | |
1725 | nb_generated_loops = distribute_loop (loop, work_list, cd); | |
1726 | } | |
c014f6f5 RG |
1727 | |
1728 | if (nb_generated_loops > 0) | |
1729 | changed = true; | |
dea61d92 SP |
1730 | |
1731 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1732 | { | |
1733 | if (nb_generated_loops > 1) | |
1734 | fprintf (dump_file, "Loop %d distributed: split to %d loops.\n", | |
0e20c89f | 1735 | num, nb_generated_loops); |
dea61d92 | 1736 | else |
0e20c89f | 1737 | fprintf (dump_file, "Loop %d is the same.\n", num); |
dea61d92 SP |
1738 | } |
1739 | ||
9771b263 | 1740 | work_list.release (); |
dea61d92 SP |
1741 | } |
1742 | ||
36875e8f RB |
1743 | if (cd) |
1744 | delete cd; | |
1745 | ||
c014f6f5 RG |
1746 | if (changed) |
1747 | { | |
525174a2 | 1748 | mark_virtual_operands_for_renaming (cfun); |
c014f6f5 RG |
1749 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); |
1750 | } | |
1751 | ||
1752 | #ifdef ENABLE_CHECKING | |
1753 | verify_loop_structure (); | |
1754 | #endif | |
1755 | ||
5006671f | 1756 | return 0; |
dea61d92 SP |
1757 | } |
1758 | ||
1759 | static bool | |
1760 | gate_tree_loop_distribution (void) | |
1761 | { | |
20769d5e SP |
1762 | return flag_tree_loop_distribution |
1763 | || flag_tree_loop_distribute_patterns; | |
dea61d92 SP |
1764 | } |
1765 | ||
27a4cd48 DM |
1766 | namespace { |
1767 | ||
1768 | const pass_data pass_data_loop_distribution = | |
dea61d92 | 1769 | { |
27a4cd48 DM |
1770 | GIMPLE_PASS, /* type */ |
1771 | "ldist", /* name */ | |
1772 | OPTGROUP_LOOP, /* optinfo_flags */ | |
1773 | true, /* has_gate */ | |
1774 | true, /* has_execute */ | |
1775 | TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ | |
1776 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1777 | 0, /* properties_provided */ | |
1778 | 0, /* properties_destroyed */ | |
1779 | 0, /* todo_flags_start */ | |
1780 | TODO_verify_ssa, /* todo_flags_finish */ | |
dea61d92 | 1781 | }; |
27a4cd48 DM |
1782 | |
1783 | class pass_loop_distribution : public gimple_opt_pass | |
1784 | { | |
1785 | public: | |
1786 | pass_loop_distribution(gcc::context *ctxt) | |
1787 | : gimple_opt_pass(pass_data_loop_distribution, ctxt) | |
1788 | {} | |
1789 | ||
1790 | /* opt_pass methods: */ | |
1791 | bool gate () { return gate_tree_loop_distribution (); } | |
1792 | unsigned int execute () { return tree_loop_distribution (); } | |
1793 | ||
1794 | }; // class pass_loop_distribution | |
1795 | ||
1796 | } // anon namespace | |
1797 | ||
1798 | gimple_opt_pass * | |
1799 | make_pass_loop_distribution (gcc::context *ctxt) | |
1800 | { | |
1801 | return new pass_loop_distribution (ctxt); | |
1802 | } |