]>
Commit | Line | Data |
---|---|---|
dea61d92 | 1 | /* Loop distribution. |
cbe34bb5 | 2 | Copyright (C) 2006-2017 Free Software Foundation, Inc. |
dea61d92 SP |
3 | Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> |
4 | and Sebastian Pop <sebastian.pop@amd.com>. | |
5 | ||
6 | This file is part of GCC. | |
b8698a0f | 7 | |
dea61d92 SP |
8 | GCC is free software; you can redistribute it and/or modify it |
9 | under the terms of the GNU General Public License as published by the | |
10 | Free Software Foundation; either version 3, or (at your option) any | |
11 | later version. | |
b8698a0f | 12 | |
dea61d92 SP |
13 | GCC is distributed in the hope that it will be useful, but WITHOUT |
14 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 | FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 | for more details. | |
b8698a0f | 17 | |
dea61d92 SP |
18 | You should have received a copy of the GNU General Public License |
19 | along with GCC; see the file COPYING3. If not see | |
20 | <http://www.gnu.org/licenses/>. */ | |
21 | ||
22 | /* This pass performs loop distribution: for example, the loop | |
23 | ||
24 | |DO I = 2, N | |
25 | | A(I) = B(I) + C | |
26 | | D(I) = A(I-1)*E | |
27 | |ENDDO | |
28 | ||
b8698a0f | 29 | is transformed to |
dea61d92 SP |
30 | |
31 | |DOALL I = 2, N | |
32 | | A(I) = B(I) + C | |
33 | |ENDDO | |
34 | | | |
35 | |DOALL I = 2, N | |
36 | | D(I) = A(I-1)*E | |
37 | |ENDDO | |
38 | ||
39 | This pass uses an RDG, Reduced Dependence Graph built on top of the | |
40 | data dependence relations. The RDG is then topologically sorted to | |
41 | obtain a map of information producers/consumers based on which it | |
42 | generates the new loops. */ | |
43 | ||
44 | #include "config.h" | |
45 | #include "system.h" | |
46 | #include "coretypes.h" | |
c7131fb2 | 47 | #include "backend.h" |
40e23961 | 48 | #include "tree.h" |
c7131fb2 | 49 | #include "gimple.h" |
957060b5 AM |
50 | #include "cfghooks.h" |
51 | #include "tree-pass.h" | |
c7131fb2 | 52 | #include "ssa.h" |
957060b5 | 53 | #include "gimple-pretty-print.h" |
c7131fb2 | 54 | #include "fold-const.h" |
60393bbc | 55 | #include "cfganal.h" |
5be5c238 | 56 | #include "gimple-iterator.h" |
18f429e2 | 57 | #include "gimplify-me.h" |
d8a2d370 | 58 | #include "stor-layout.h" |
442b4905 | 59 | #include "tree-cfg.h" |
e28030cf | 60 | #include "tree-ssa-loop-manip.h" |
442b4905 AM |
61 | #include "tree-ssa-loop.h" |
62 | #include "tree-into-ssa.h" | |
7a300452 | 63 | #include "tree-ssa.h" |
dea61d92 | 64 | #include "cfgloop.h" |
dea61d92 | 65 | #include "tree-scalar-evolution.h" |
9fafb14a | 66 | #include "params.h" |
826a536d | 67 | #include "tree-vectorizer.h" |
80ab0b19 RB |
68 | |
69 | ||
9fafb14a BC |
70 | #define MAX_DATAREFS_NUM \ |
71 | ((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) | |
72 | ||
17c5cbdf BC |
73 | /* Hashtable helpers. */ |
74 | ||
75 | struct ddr_hasher : nofree_ptr_hash <struct data_dependence_relation> | |
76 | { | |
77 | static inline hashval_t hash (const data_dependence_relation *); | |
78 | static inline bool equal (const data_dependence_relation *, | |
79 | const data_dependence_relation *); | |
80 | }; | |
81 | ||
82 | /* Hash function for data dependence. */ | |
83 | ||
84 | inline hashval_t | |
85 | ddr_hasher::hash (const data_dependence_relation *ddr) | |
86 | { | |
87 | inchash::hash h; | |
88 | h.add_ptr (DDR_A (ddr)); | |
89 | h.add_ptr (DDR_B (ddr)); | |
90 | return h.end (); | |
91 | } | |
92 | ||
93 | /* Hash table equality function for data dependence. */ | |
94 | ||
95 | inline bool | |
96 | ddr_hasher::equal (const data_dependence_relation *ddr1, | |
97 | const data_dependence_relation *ddr2) | |
98 | { | |
99 | return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2)); | |
100 | } | |
101 | ||
4084ea5f BC |
102 | /* The loop (nest) to be distributed. */ |
103 | static vec<loop_p> loop_nest; | |
104 | ||
9fafb14a BC |
105 | /* Vector of data references in the loop to be distributed. */ |
106 | static vec<data_reference_p> datarefs_vec; | |
107 | ||
108 | /* Store index of data reference in aux field. */ | |
109 | #define DR_INDEX(dr) ((uintptr_t) (dr)->aux) | |
110 | ||
17c5cbdf BC |
111 | /* Hash table for data dependence relation in the loop to be distributed. */ |
112 | static hash_table<ddr_hasher> ddrs_table (389); | |
113 | ||
114 | ||
80ab0b19 | 115 | /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ |
526ceb68 | 116 | struct rdg_vertex |
80ab0b19 RB |
117 | { |
118 | /* The statement represented by this vertex. */ | |
355fe088 | 119 | gimple *stmt; |
80ab0b19 RB |
120 | |
121 | /* Vector of data-references in this statement. */ | |
122 | vec<data_reference_p> datarefs; | |
123 | ||
124 | /* True when the statement contains a write to memory. */ | |
125 | bool has_mem_write; | |
126 | ||
127 | /* True when the statement contains a read from memory. */ | |
128 | bool has_mem_reads; | |
526ceb68 | 129 | }; |
80ab0b19 RB |
130 | |
131 | #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt | |
132 | #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs | |
133 | #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write | |
134 | #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads | |
135 | #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I])) | |
136 | #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I])) | |
137 | #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I])) | |
138 | #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I])) | |
139 | ||
140 | /* Data dependence type. */ | |
141 | ||
142 | enum rdg_dep_type | |
143 | { | |
144 | /* Read After Write (RAW). */ | |
145 | flow_dd = 'f', | |
146 | ||
36875e8f RB |
147 | /* Control dependence (execute conditional on). */ |
148 | control_dd = 'c' | |
80ab0b19 RB |
149 | }; |
150 | ||
151 | /* Dependence information attached to an edge of the RDG. */ | |
152 | ||
526ceb68 | 153 | struct rdg_edge |
80ab0b19 RB |
154 | { |
155 | /* Type of the dependence. */ | |
156 | enum rdg_dep_type type; | |
526ceb68 | 157 | }; |
80ab0b19 RB |
158 | |
159 | #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type | |
80ab0b19 | 160 | |
80ab0b19 RB |
161 | /* Dump vertex I in RDG to FILE. */ |
162 | ||
163 | static void | |
164 | dump_rdg_vertex (FILE *file, struct graph *rdg, int i) | |
165 | { | |
166 | struct vertex *v = &(rdg->vertices[i]); | |
167 | struct graph_edge *e; | |
168 | ||
169 | fprintf (file, "(vertex %d: (%s%s) (in:", i, | |
170 | RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", | |
171 | RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); | |
172 | ||
173 | if (v->pred) | |
174 | for (e = v->pred; e; e = e->pred_next) | |
175 | fprintf (file, " %d", e->src); | |
176 | ||
177 | fprintf (file, ") (out:"); | |
178 | ||
179 | if (v->succ) | |
180 | for (e = v->succ; e; e = e->succ_next) | |
181 | fprintf (file, " %d", e->dest); | |
182 | ||
183 | fprintf (file, ")\n"); | |
184 | print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); | |
185 | fprintf (file, ")\n"); | |
186 | } | |
187 | ||
188 | /* Call dump_rdg_vertex on stderr. */ | |
189 | ||
190 | DEBUG_FUNCTION void | |
191 | debug_rdg_vertex (struct graph *rdg, int i) | |
192 | { | |
193 | dump_rdg_vertex (stderr, rdg, i); | |
194 | } | |
195 | ||
80ab0b19 RB |
196 | /* Dump the reduced dependence graph RDG to FILE. */ |
197 | ||
198 | static void | |
199 | dump_rdg (FILE *file, struct graph *rdg) | |
200 | { | |
80ab0b19 | 201 | fprintf (file, "(rdg\n"); |
2fd5894f RB |
202 | for (int i = 0; i < rdg->n_vertices; i++) |
203 | dump_rdg_vertex (file, rdg, i); | |
80ab0b19 | 204 | fprintf (file, ")\n"); |
80ab0b19 RB |
205 | } |
206 | ||
207 | /* Call dump_rdg on stderr. */ | |
208 | ||
209 | DEBUG_FUNCTION void | |
210 | debug_rdg (struct graph *rdg) | |
211 | { | |
212 | dump_rdg (stderr, rdg); | |
213 | } | |
214 | ||
215 | static void | |
216 | dot_rdg_1 (FILE *file, struct graph *rdg) | |
217 | { | |
218 | int i; | |
174ec470 RB |
219 | pretty_printer buffer; |
220 | pp_needs_newline (&buffer) = false; | |
221 | buffer.buffer->stream = file; | |
80ab0b19 RB |
222 | |
223 | fprintf (file, "digraph RDG {\n"); | |
224 | ||
225 | for (i = 0; i < rdg->n_vertices; i++) | |
226 | { | |
227 | struct vertex *v = &(rdg->vertices[i]); | |
228 | struct graph_edge *e; | |
229 | ||
174ec470 RB |
230 | fprintf (file, "%d [label=\"[%d] ", i, i); |
231 | pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); | |
232 | pp_flush (&buffer); | |
233 | fprintf (file, "\"]\n"); | |
234 | ||
80ab0b19 RB |
235 | /* Highlight reads from memory. */ |
236 | if (RDG_MEM_READS_STMT (rdg, i)) | |
237 | fprintf (file, "%d [style=filled, fillcolor=green]\n", i); | |
238 | ||
239 | /* Highlight stores to memory. */ | |
240 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
241 | fprintf (file, "%d [style=filled, fillcolor=red]\n", i); | |
242 | ||
243 | if (v->succ) | |
244 | for (e = v->succ; e; e = e->succ_next) | |
245 | switch (RDGE_TYPE (e)) | |
246 | { | |
80ab0b19 RB |
247 | case flow_dd: |
248 | /* These are the most common dependences: don't print these. */ | |
249 | fprintf (file, "%d -> %d \n", i, e->dest); | |
250 | break; | |
251 | ||
36875e8f RB |
252 | case control_dd: |
253 | fprintf (file, "%d -> %d [label=control] \n", i, e->dest); | |
254 | break; | |
255 | ||
80ab0b19 RB |
256 | default: |
257 | gcc_unreachable (); | |
258 | } | |
259 | } | |
260 | ||
261 | fprintf (file, "}\n\n"); | |
262 | } | |
263 | ||
264 | /* Display the Reduced Dependence Graph using dotty. */ | |
265 | ||
266 | DEBUG_FUNCTION void | |
267 | dot_rdg (struct graph *rdg) | |
268 | { | |
174ec470 | 269 | /* When debugging, you may want to enable the following code. */ |
b6d94045 | 270 | #ifdef HAVE_POPEN |
c3284718 | 271 | FILE *file = popen ("dot -Tx11", "w"); |
174ec470 RB |
272 | if (!file) |
273 | return; | |
80ab0b19 | 274 | dot_rdg_1 (file, rdg); |
174ec470 RB |
275 | fflush (file); |
276 | close (fileno (file)); | |
277 | pclose (file); | |
80ab0b19 RB |
278 | #else |
279 | dot_rdg_1 (stderr, rdg); | |
280 | #endif | |
281 | } | |
282 | ||
283 | /* Returns the index of STMT in RDG. */ | |
284 | ||
285 | static int | |
355fe088 | 286 | rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt) |
80ab0b19 RB |
287 | { |
288 | int index = gimple_uid (stmt); | |
289 | gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); | |
290 | return index; | |
291 | } | |
292 | ||
80ab0b19 RB |
293 | /* Creates dependence edges in RDG for all the uses of DEF. IDEF is |
294 | the index of DEF in RDG. */ | |
295 | ||
296 | static void | |
297 | create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) | |
298 | { | |
299 | use_operand_p imm_use_p; | |
300 | imm_use_iterator iterator; | |
301 | ||
302 | FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) | |
303 | { | |
304 | struct graph_edge *e; | |
305 | int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); | |
306 | ||
307 | if (use < 0) | |
308 | continue; | |
309 | ||
310 | e = add_edge (rdg, idef, use); | |
311 | e->data = XNEW (struct rdg_edge); | |
312 | RDGE_TYPE (e) = flow_dd; | |
80ab0b19 RB |
313 | } |
314 | } | |
315 | ||
36875e8f RB |
316 | /* Creates an edge for the control dependences of BB to the vertex V. */ |
317 | ||
318 | static void | |
319 | create_edge_for_control_dependence (struct graph *rdg, basic_block bb, | |
320 | int v, control_dependences *cd) | |
321 | { | |
322 | bitmap_iterator bi; | |
323 | unsigned edge_n; | |
324 | EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), | |
325 | 0, edge_n, bi) | |
326 | { | |
30fd2977 | 327 | basic_block cond_bb = cd->get_edge_src (edge_n); |
355fe088 | 328 | gimple *stmt = last_stmt (cond_bb); |
36875e8f RB |
329 | if (stmt && is_ctrl_stmt (stmt)) |
330 | { | |
331 | struct graph_edge *e; | |
332 | int c = rdg_vertex_for_stmt (rdg, stmt); | |
333 | if (c < 0) | |
334 | continue; | |
335 | ||
336 | e = add_edge (rdg, c, v); | |
337 | e->data = XNEW (struct rdg_edge); | |
338 | RDGE_TYPE (e) = control_dd; | |
36875e8f RB |
339 | } |
340 | } | |
341 | } | |
342 | ||
80ab0b19 RB |
343 | /* Creates the edges of the reduced dependence graph RDG. */ |
344 | ||
345 | static void | |
447f3223 | 346 | create_rdg_flow_edges (struct graph *rdg) |
80ab0b19 RB |
347 | { |
348 | int i; | |
80ab0b19 RB |
349 | def_operand_p def_p; |
350 | ssa_op_iter iter; | |
351 | ||
80ab0b19 RB |
352 | for (i = 0; i < rdg->n_vertices; i++) |
353 | FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), | |
354 | iter, SSA_OP_DEF) | |
355 | create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); | |
447f3223 | 356 | } |
36875e8f | 357 | |
447f3223 RB |
358 | /* Creates the edges of the reduced dependence graph RDG. */ |
359 | ||
360 | static void | |
b71b7a8e | 361 | create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop) |
447f3223 RB |
362 | { |
363 | int i; | |
364 | ||
365 | for (i = 0; i < rdg->n_vertices; i++) | |
366 | { | |
355fe088 | 367 | gimple *stmt = RDG_STMT (rdg, i); |
447f3223 RB |
368 | if (gimple_code (stmt) == GIMPLE_PHI) |
369 | { | |
370 | edge_iterator ei; | |
371 | edge e; | |
372 | FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) | |
b71b7a8e | 373 | if (flow_bb_inside_loop_p (loop, e->src)) |
36875e8f | 374 | create_edge_for_control_dependence (rdg, e->src, i, cd); |
447f3223 RB |
375 | } |
376 | else | |
377 | create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); | |
378 | } | |
80ab0b19 RB |
379 | } |
380 | ||
381 | /* Build the vertices of the reduced dependence graph RDG. Return false | |
382 | if that failed. */ | |
383 | ||
384 | static bool | |
9fafb14a | 385 | create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop) |
80ab0b19 RB |
386 | { |
387 | int i; | |
355fe088 | 388 | gimple *stmt; |
80ab0b19 RB |
389 | |
390 | FOR_EACH_VEC_ELT (stmts, i, stmt) | |
391 | { | |
392 | struct vertex *v = &(rdg->vertices[i]); | |
393 | ||
394 | /* Record statement to vertex mapping. */ | |
395 | gimple_set_uid (stmt, i); | |
396 | ||
397 | v->data = XNEW (struct rdg_vertex); | |
398 | RDGV_STMT (v) = stmt; | |
399 | RDGV_DATAREFS (v).create (0); | |
400 | RDGV_HAS_MEM_WRITE (v) = false; | |
401 | RDGV_HAS_MEM_READS (v) = false; | |
402 | if (gimple_code (stmt) == GIMPLE_PHI) | |
403 | continue; | |
404 | ||
9fafb14a BC |
405 | unsigned drp = datarefs_vec.length (); |
406 | if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec)) | |
80ab0b19 | 407 | return false; |
9fafb14a | 408 | for (unsigned j = drp; j < datarefs_vec.length (); ++j) |
80ab0b19 | 409 | { |
9fafb14a | 410 | data_reference_p dr = datarefs_vec[j]; |
80ab0b19 RB |
411 | if (DR_IS_READ (dr)) |
412 | RDGV_HAS_MEM_READS (v) = true; | |
413 | else | |
414 | RDGV_HAS_MEM_WRITE (v) = true; | |
415 | RDGV_DATAREFS (v).safe_push (dr); | |
416 | } | |
417 | } | |
418 | return true; | |
419 | } | |
420 | ||
3be57c56 BC |
421 | /* Array mapping basic block's index to its topological order. */ |
422 | static int *bb_top_order_index; | |
423 | /* And size of the array. */ | |
424 | static int bb_top_order_index_size; | |
425 | ||
426 | /* If X has a smaller topological sort number than Y, returns -1; | |
427 | if greater, returns 1. */ | |
428 | ||
429 | static int | |
430 | bb_top_order_cmp (const void *x, const void *y) | |
431 | { | |
432 | basic_block bb1 = *(const basic_block *) x; | |
433 | basic_block bb2 = *(const basic_block *) y; | |
434 | ||
435 | gcc_assert (bb1->index < bb_top_order_index_size | |
436 | && bb2->index < bb_top_order_index_size); | |
437 | gcc_assert (bb1 == bb2 | |
438 | || bb_top_order_index[bb1->index] | |
439 | != bb_top_order_index[bb2->index]); | |
440 | ||
441 | return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]); | |
442 | } | |
443 | ||
444 | /* Initialize STMTS with all the statements of LOOP. We use topological | |
445 | order to discover all statements. The order is important because | |
446 | generate_loops_for_partition is using the same traversal for identifying | |
447 | statements in loop copies. */ | |
80ab0b19 RB |
448 | |
449 | static void | |
355fe088 | 450 | stmts_from_loop (struct loop *loop, vec<gimple *> *stmts) |
80ab0b19 RB |
451 | { |
452 | unsigned int i; | |
3be57c56 | 453 | basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp); |
80ab0b19 RB |
454 | |
455 | for (i = 0; i < loop->num_nodes; i++) | |
456 | { | |
457 | basic_block bb = bbs[i]; | |
80ab0b19 | 458 | |
538dd0b7 DM |
459 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); |
460 | gsi_next (&bsi)) | |
461 | if (!virtual_operand_p (gimple_phi_result (bsi.phi ()))) | |
462 | stmts->safe_push (bsi.phi ()); | |
80ab0b19 | 463 | |
538dd0b7 DM |
464 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); |
465 | gsi_next (&bsi)) | |
80ab0b19 | 466 | { |
355fe088 | 467 | gimple *stmt = gsi_stmt (bsi); |
80ab0b19 RB |
468 | if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) |
469 | stmts->safe_push (stmt); | |
470 | } | |
471 | } | |
472 | ||
473 | free (bbs); | |
474 | } | |
475 | ||
80ab0b19 RB |
476 | /* Free the reduced dependence graph RDG. */ |
477 | ||
478 | static void | |
479 | free_rdg (struct graph *rdg) | |
480 | { | |
481 | int i; | |
482 | ||
483 | for (i = 0; i < rdg->n_vertices; i++) | |
484 | { | |
485 | struct vertex *v = &(rdg->vertices[i]); | |
486 | struct graph_edge *e; | |
487 | ||
488 | for (e = v->succ; e; e = e->succ_next) | |
447f3223 | 489 | free (e->data); |
80ab0b19 RB |
490 | |
491 | if (v->data) | |
492 | { | |
493 | gimple_set_uid (RDGV_STMT (v), -1); | |
9fafb14a | 494 | (RDGV_DATAREFS (v)).release (); |
80ab0b19 RB |
495 | free (v->data); |
496 | } | |
497 | } | |
498 | ||
499 | free_graph (rdg); | |
500 | } | |
501 | ||
4084ea5f BC |
502 | /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of |
503 | LOOP, and one edge per flow dependence or control dependence from control | |
9fafb14a BC |
504 | dependence CD. During visiting each statement, data references are also |
505 | collected and recorded in global data DATAREFS_VEC. */ | |
80ab0b19 RB |
506 | |
507 | static struct graph * | |
4084ea5f | 508 | build_rdg (struct loop *loop, control_dependences *cd) |
80ab0b19 RB |
509 | { |
510 | struct graph *rdg; | |
80ab0b19 | 511 | |
97463b2b | 512 | /* Create the RDG vertices from the stmts of the loop nest. */ |
355fe088 | 513 | auto_vec<gimple *, 10> stmts; |
4084ea5f | 514 | stmts_from_loop (loop, &stmts); |
24f161fd | 515 | rdg = new_graph (stmts.length ()); |
9fafb14a | 516 | if (!create_rdg_vertices (rdg, stmts, loop)) |
80ab0b19 | 517 | { |
80ab0b19 RB |
518 | free_rdg (rdg); |
519 | return NULL; | |
520 | } | |
521 | stmts.release (); | |
97463b2b | 522 | |
447f3223 RB |
523 | create_rdg_flow_edges (rdg); |
524 | if (cd) | |
4084ea5f | 525 | create_rdg_cd_edges (rdg, cd, loop); |
447f3223 | 526 | |
80ab0b19 RB |
527 | return rdg; |
528 | } | |
529 | ||
80ab0b19 | 530 | |
f1eb4621 | 531 | /* Kind of distributed loop. */ |
b9fc0497 | 532 | enum partition_kind { |
510d73a0 | 533 | PKIND_NORMAL, PKIND_MEMSET, PKIND_MEMCPY, PKIND_MEMMOVE |
b9fc0497 | 534 | }; |
30d55936 | 535 | |
f1eb4621 BC |
536 | /* Type of distributed loop. */ |
537 | enum partition_type { | |
538 | /* The distributed loop can be executed parallelly. */ | |
539 | PTYPE_PARALLEL = 0, | |
540 | /* The distributed loop has to be executed sequentially. */ | |
541 | PTYPE_SEQUENTIAL | |
542 | }; | |
543 | ||
a7a44c07 | 544 | /* Partition for loop distribution. */ |
526ceb68 | 545 | struct partition |
c61f8985 | 546 | { |
a7a44c07 | 547 | /* Statements of the partition. */ |
c61f8985 | 548 | bitmap stmts; |
a7a44c07 | 549 | /* Loops of the partition. */ |
80ab0b19 | 550 | bitmap loops; |
a7a44c07 | 551 | /* True if the partition defines variable which is used outside of loop. */ |
826a536d | 552 | bool reduction_p; |
a7a44c07 BC |
553 | /* For builtin partition, true if it executes one iteration more than |
554 | number of loop (latch) iterations. */ | |
34e82342 | 555 | bool plus_one; |
30d55936 | 556 | enum partition_kind kind; |
f1eb4621 | 557 | enum partition_type type; |
d0582dc1 RG |
558 | /* data-references a kind != PKIND_NORMAL partition is about. */ |
559 | data_reference_p main_dr; | |
560 | data_reference_p secondary_dr; | |
a7a44c07 | 561 | /* Number of loop (latch) iterations. */ |
818625cf | 562 | tree niter; |
a7a44c07 BC |
563 | /* Data references in the partition. */ |
564 | bitmap datarefs; | |
526ceb68 | 565 | }; |
c61f8985 | 566 | |
c61f8985 RG |
567 | |
568 | /* Allocate and initialize a partition from BITMAP. */ | |
569 | ||
526ceb68 | 570 | static partition * |
a7a44c07 | 571 | partition_alloc (void) |
c61f8985 | 572 | { |
526ceb68 | 573 | partition *partition = XCNEW (struct partition); |
a7a44c07 BC |
574 | partition->stmts = BITMAP_ALLOC (NULL); |
575 | partition->loops = BITMAP_ALLOC (NULL); | |
826a536d | 576 | partition->reduction_p = false; |
30d55936 | 577 | partition->kind = PKIND_NORMAL; |
a7a44c07 | 578 | partition->datarefs = BITMAP_ALLOC (NULL); |
c61f8985 RG |
579 | return partition; |
580 | } | |
581 | ||
582 | /* Free PARTITION. */ | |
583 | ||
584 | static void | |
526ceb68 | 585 | partition_free (partition *partition) |
c61f8985 RG |
586 | { |
587 | BITMAP_FREE (partition->stmts); | |
80ab0b19 | 588 | BITMAP_FREE (partition->loops); |
a7a44c07 | 589 | BITMAP_FREE (partition->datarefs); |
c61f8985 RG |
590 | free (partition); |
591 | } | |
592 | ||
30d55936 RG |
593 | /* Returns true if the partition can be generated as a builtin. */ |
594 | ||
595 | static bool | |
526ceb68 | 596 | partition_builtin_p (partition *partition) |
30d55936 | 597 | { |
826a536d | 598 | return partition->kind != PKIND_NORMAL; |
30d55936 | 599 | } |
c61f8985 | 600 | |
826a536d | 601 | /* Returns true if the partition contains a reduction. */ |
7ad672e4 RG |
602 | |
603 | static bool | |
526ceb68 | 604 | partition_reduction_p (partition *partition) |
7ad672e4 | 605 | { |
826a536d | 606 | return partition->reduction_p; |
7ad672e4 RG |
607 | } |
608 | ||
821dbeef BC |
609 | /* Partitions are fused because of different reasons. */ |
610 | enum fuse_type | |
611 | { | |
612 | FUSE_NON_BUILTIN = 0, | |
613 | FUSE_REDUCTION = 1, | |
614 | FUSE_SHARE_REF = 2, | |
615 | FUSE_SAME_SCC = 3, | |
616 | FUSE_FINALIZE = 4 | |
617 | }; | |
618 | ||
619 | /* Description on different fusing reason. */ | |
620 | static const char *fuse_message[] = { | |
621 | "they are non-builtins", | |
622 | "they have reductions", | |
623 | "they have shared memory refs", | |
624 | "they are in the same dependence scc", | |
625 | "there is no point to distribute loop"}; | |
626 | ||
826a536d | 627 | static void |
f1eb4621 | 628 | update_type_for_merge (struct graph *, partition *, partition *); |
821dbeef | 629 | |
f1eb4621 BC |
630 | /* Merge PARTITION into the partition DEST. RDG is the reduced dependence |
631 | graph and we update type for result partition if it is non-NULL. */ | |
a7a44c07 | 632 | |
f1eb4621 BC |
633 | static void |
634 | partition_merge_into (struct graph *rdg, partition *dest, | |
635 | partition *partition, enum fuse_type ft) | |
636 | { | |
821dbeef BC |
637 | if (dump_file && (dump_flags & TDF_DETAILS)) |
638 | { | |
639 | fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]); | |
640 | fprintf (dump_file, " Part 1: "); | |
641 | dump_bitmap (dump_file, dest->stmts); | |
642 | fprintf (dump_file, " Part 2: "); | |
643 | dump_bitmap (dump_file, partition->stmts); | |
644 | } | |
f1eb4621 BC |
645 | |
646 | dest->kind = PKIND_NORMAL; | |
647 | if (dest->type == PTYPE_PARALLEL) | |
648 | dest->type = partition->type; | |
649 | ||
650 | bitmap_ior_into (dest->stmts, partition->stmts); | |
651 | if (partition_reduction_p (partition)) | |
652 | dest->reduction_p = true; | |
653 | ||
654 | /* Further check if any data dependence prevents us from executing the | |
655 | new partition parallelly. */ | |
656 | if (dest->type == PTYPE_PARALLEL && rdg != NULL) | |
657 | update_type_for_merge (rdg, dest, partition); | |
658 | ||
659 | bitmap_ior_into (dest->datarefs, partition->datarefs); | |
826a536d RB |
660 | } |
661 | ||
662 | ||
c07a8cb3 RG |
663 | /* Returns true when DEF is an SSA_NAME defined in LOOP and used after |
664 | the LOOP. */ | |
665 | ||
666 | static bool | |
667 | ssa_name_has_uses_outside_loop_p (tree def, loop_p loop) | |
668 | { | |
669 | imm_use_iterator imm_iter; | |
670 | use_operand_p use_p; | |
671 | ||
672 | FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) | |
e665269a | 673 | { |
355fe088 | 674 | gimple *use_stmt = USE_STMT (use_p); |
e665269a RG |
675 | if (!is_gimple_debug (use_stmt) |
676 | && loop != loop_containing_stmt (use_stmt)) | |
677 | return true; | |
678 | } | |
c07a8cb3 RG |
679 | |
680 | return false; | |
681 | } | |
682 | ||
683 | /* Returns true when STMT defines a scalar variable used after the | |
88af7c1a | 684 | loop LOOP. */ |
c07a8cb3 RG |
685 | |
686 | static bool | |
355fe088 | 687 | stmt_has_scalar_dependences_outside_loop (loop_p loop, gimple *stmt) |
c07a8cb3 | 688 | { |
88af7c1a RG |
689 | def_operand_p def_p; |
690 | ssa_op_iter op_iter; | |
c07a8cb3 | 691 | |
9ca86fc3 RG |
692 | if (gimple_code (stmt) == GIMPLE_PHI) |
693 | return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt), loop); | |
694 | ||
88af7c1a RG |
695 | FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) |
696 | if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p), loop)) | |
697 | return true; | |
c07a8cb3 | 698 | |
88af7c1a | 699 | return false; |
c07a8cb3 RG |
700 | } |
701 | ||
dea61d92 SP |
702 | /* Return a copy of LOOP placed before LOOP. */ |
703 | ||
704 | static struct loop * | |
705 | copy_loop_before (struct loop *loop) | |
706 | { | |
707 | struct loop *res; | |
708 | edge preheader = loop_preheader_edge (loop); | |
709 | ||
dea61d92 | 710 | initialize_original_copy_tables (); |
5ce9450f | 711 | res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); |
30d55936 | 712 | gcc_assert (res != NULL); |
dea61d92 | 713 | free_original_copy_tables (); |
2cfc56b9 | 714 | delete_update_ssa (); |
dea61d92 SP |
715 | |
716 | return res; | |
717 | } | |
718 | ||
719 | /* Creates an empty basic block after LOOP. */ | |
720 | ||
721 | static void | |
722 | create_bb_after_loop (struct loop *loop) | |
723 | { | |
724 | edge exit = single_exit (loop); | |
725 | ||
726 | if (!exit) | |
727 | return; | |
728 | ||
729 | split_edge (exit); | |
730 | } | |
731 | ||
732 | /* Generate code for PARTITION from the code in LOOP. The loop is | |
733 | copied when COPY_P is true. All the statements not flagged in the | |
734 | PARTITION bitmap are removed from the loop or from its copy. The | |
735 | statements are indexed in sequence inside a basic block, and the | |
30d55936 | 736 | basic blocks of a loop are taken in dom order. */ |
dea61d92 | 737 | |
30d55936 | 738 | static void |
526ceb68 | 739 | generate_loops_for_partition (struct loop *loop, partition *partition, |
c61f8985 | 740 | bool copy_p) |
dea61d92 | 741 | { |
2fd5894f | 742 | unsigned i; |
dea61d92 SP |
743 | basic_block *bbs; |
744 | ||
745 | if (copy_p) | |
746 | { | |
747 | loop = copy_loop_before (loop); | |
30d55936 | 748 | gcc_assert (loop != NULL); |
dea61d92 SP |
749 | create_preheader (loop, CP_SIMPLE_PREHEADERS); |
750 | create_bb_after_loop (loop); | |
751 | } | |
752 | ||
2fd5894f | 753 | /* Remove stmts not in the PARTITION bitmap. */ |
dea61d92 SP |
754 | bbs = get_loop_body_in_dom_order (loop); |
755 | ||
b03c3082 | 756 | if (MAY_HAVE_DEBUG_STMTS) |
2fd5894f | 757 | for (i = 0; i < loop->num_nodes; i++) |
b03c3082 JJ |
758 | { |
759 | basic_block bb = bbs[i]; | |
760 | ||
538dd0b7 DM |
761 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); |
762 | gsi_next (&bsi)) | |
2fd5894f | 763 | { |
538dd0b7 | 764 | gphi *phi = bsi.phi (); |
2fd5894f RB |
765 | if (!virtual_operand_p (gimple_phi_result (phi)) |
766 | && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) | |
767 | reset_debug_uses (phi); | |
768 | } | |
b03c3082 | 769 | |
538dd0b7 | 770 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) |
b03c3082 | 771 | { |
355fe088 | 772 | gimple *stmt = gsi_stmt (bsi); |
b03c3082 JJ |
773 | if (gimple_code (stmt) != GIMPLE_LABEL |
774 | && !is_gimple_debug (stmt) | |
2fd5894f | 775 | && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) |
b03c3082 JJ |
776 | reset_debug_uses (stmt); |
777 | } | |
778 | } | |
779 | ||
2fd5894f | 780 | for (i = 0; i < loop->num_nodes; i++) |
dea61d92 SP |
781 | { |
782 | basic_block bb = bbs[i]; | |
dea61d92 | 783 | |
538dd0b7 | 784 | for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) |
2fd5894f | 785 | { |
538dd0b7 | 786 | gphi *phi = bsi.phi (); |
2fd5894f RB |
787 | if (!virtual_operand_p (gimple_phi_result (phi)) |
788 | && !bitmap_bit_p (partition->stmts, gimple_uid (phi))) | |
2706a615 | 789 | remove_phi_node (&bsi, true); |
2fd5894f RB |
790 | else |
791 | gsi_next (&bsi); | |
792 | } | |
dea61d92 | 793 | |
538dd0b7 | 794 | for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) |
2706a615 | 795 | { |
355fe088 | 796 | gimple *stmt = gsi_stmt (bsi); |
b03c3082 JJ |
797 | if (gimple_code (stmt) != GIMPLE_LABEL |
798 | && !is_gimple_debug (stmt) | |
2fd5894f | 799 | && !bitmap_bit_p (partition->stmts, gimple_uid (stmt))) |
2706a615 | 800 | { |
36875e8f RB |
801 | /* Choose an arbitrary path through the empty CFG part |
802 | that this unnecessary control stmt controls. */ | |
538dd0b7 | 803 | if (gcond *cond_stmt = dyn_cast <gcond *> (stmt)) |
36875e8f | 804 | { |
538dd0b7 | 805 | gimple_cond_make_false (cond_stmt); |
36875e8f RB |
806 | update_stmt (stmt); |
807 | } | |
808 | else if (gimple_code (stmt) == GIMPLE_SWITCH) | |
809 | { | |
538dd0b7 | 810 | gswitch *switch_stmt = as_a <gswitch *> (stmt); |
36875e8f | 811 | gimple_switch_set_index |
538dd0b7 | 812 | (switch_stmt, CASE_LOW (gimple_switch_label (switch_stmt, 1))); |
36875e8f RB |
813 | update_stmt (stmt); |
814 | } | |
815 | else | |
816 | { | |
817 | unlink_stmt_vdef (stmt); | |
818 | gsi_remove (&bsi, true); | |
819 | release_defs (stmt); | |
820 | continue; | |
821 | } | |
2706a615 | 822 | } |
36875e8f | 823 | gsi_next (&bsi); |
2706a615 | 824 | } |
dea61d92 SP |
825 | } |
826 | ||
827 | free (bbs); | |
dea61d92 SP |
828 | } |
829 | ||
d0582dc1 | 830 | /* Build the size argument for a memory operation call. */ |
3661e899 | 831 | |
d0582dc1 | 832 | static tree |
d995e887 RB |
833 | build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter, |
834 | bool plus_one) | |
3661e899 | 835 | { |
d995e887 RB |
836 | tree size = fold_convert_loc (loc, sizetype, nb_iter); |
837 | if (plus_one) | |
838 | size = size_binop (PLUS_EXPR, size, size_one_node); | |
839 | size = fold_build2_loc (loc, MULT_EXPR, sizetype, size, | |
d0582dc1 | 840 | TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); |
d995e887 RB |
841 | size = fold_convert_loc (loc, size_type_node, size); |
842 | return size; | |
d0582dc1 RG |
843 | } |
844 | ||
845 | /* Build an address argument for a memory operation call. */ | |
846 | ||
847 | static tree | |
848 | build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes) | |
849 | { | |
850 | tree addr_base; | |
851 | ||
852 | addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); | |
853 | addr_base = fold_convert_loc (loc, sizetype, addr_base); | |
854 | ||
855 | /* Test for a negative stride, iterating over every element. */ | |
856 | if (tree_int_cst_sgn (DR_STEP (dr)) == -1) | |
857 | { | |
858 | addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, | |
859 | fold_convert_loc (loc, sizetype, nb_bytes)); | |
860 | addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, | |
861 | TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); | |
862 | } | |
863 | ||
864 | return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base); | |
3661e899 TB |
865 | } |
866 | ||
401f3a81 JJ |
867 | /* If VAL memory representation contains the same value in all bytes, |
868 | return that value, otherwise return -1. | |
869 | E.g. for 0x24242424 return 0x24, for IEEE double | |
870 | 747708026454360457216.0 return 0x44, etc. */ | |
871 | ||
872 | static int | |
873 | const_with_all_bytes_same (tree val) | |
874 | { | |
875 | unsigned char buf[64]; | |
876 | int i, len; | |
877 | ||
878 | if (integer_zerop (val) | |
401f3a81 JJ |
879 | || (TREE_CODE (val) == CONSTRUCTOR |
880 | && !TREE_CLOBBER_P (val) | |
881 | && CONSTRUCTOR_NELTS (val) == 0)) | |
882 | return 0; | |
883 | ||
9e207d6f JJ |
884 | if (real_zerop (val)) |
885 | { | |
886 | /* Only return 0 for +0.0, not for -0.0, which doesn't have | |
887 | an all bytes same memory representation. Don't transform | |
888 | -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */ | |
889 | switch (TREE_CODE (val)) | |
890 | { | |
891 | case REAL_CST: | |
892 | if (!real_isneg (TREE_REAL_CST_PTR (val))) | |
893 | return 0; | |
894 | break; | |
895 | case COMPLEX_CST: | |
896 | if (!const_with_all_bytes_same (TREE_REALPART (val)) | |
897 | && !const_with_all_bytes_same (TREE_IMAGPART (val))) | |
898 | return 0; | |
899 | break; | |
900 | case VECTOR_CST: | |
901 | unsigned int j; | |
902 | for (j = 0; j < VECTOR_CST_NELTS (val); ++j) | |
66088065 | 903 | if (const_with_all_bytes_same (VECTOR_CST_ELT (val, j))) |
9e207d6f JJ |
904 | break; |
905 | if (j == VECTOR_CST_NELTS (val)) | |
906 | return 0; | |
907 | break; | |
908 | default: | |
909 | break; | |
910 | } | |
911 | } | |
912 | ||
401f3a81 JJ |
913 | if (CHAR_BIT != 8 || BITS_PER_UNIT != 8) |
914 | return -1; | |
915 | ||
916 | len = native_encode_expr (val, buf, sizeof (buf)); | |
917 | if (len == 0) | |
918 | return -1; | |
919 | for (i = 1; i < len; i++) | |
920 | if (buf[i] != buf[0]) | |
921 | return -1; | |
922 | return buf[0]; | |
923 | } | |
924 | ||
30d55936 | 925 | /* Generate a call to memset for PARTITION in LOOP. */ |
dea61d92 | 926 | |
cfee318d | 927 | static void |
526ceb68 | 928 | generate_memset_builtin (struct loop *loop, partition *partition) |
dea61d92 | 929 | { |
30d55936 | 930 | gimple_stmt_iterator gsi; |
355fe088 | 931 | gimple *stmt, *fn_call; |
818625cf | 932 | tree mem, fn, nb_bytes; |
30d55936 | 933 | location_t loc; |
b6dd5261 | 934 | tree val; |
30d55936 | 935 | |
d0582dc1 | 936 | stmt = DR_STMT (partition->main_dr); |
30d55936 | 937 | loc = gimple_location (stmt); |
30d55936 RG |
938 | |
939 | /* The new statements will be placed before LOOP. */ | |
940 | gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | |
dea61d92 | 941 | |
d995e887 RB |
942 | nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, |
943 | partition->plus_one); | |
d0582dc1 RG |
944 | nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, |
945 | false, GSI_CONTINUE_LINKING); | |
946 | mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); | |
947 | mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, | |
948 | false, GSI_CONTINUE_LINKING); | |
dea61d92 | 949 | |
b6dd5261 RG |
950 | /* This exactly matches the pattern recognition in classify_partition. */ |
951 | val = gimple_assign_rhs1 (stmt); | |
401f3a81 JJ |
952 | /* Handle constants like 0x15151515 and similarly |
953 | floating point constants etc. where all bytes are the same. */ | |
954 | int bytev = const_with_all_bytes_same (val); | |
955 | if (bytev != -1) | |
956 | val = build_int_cst (integer_type_node, bytev); | |
957 | else if (TREE_CODE (val) == INTEGER_CST) | |
958 | val = fold_convert (integer_type_node, val); | |
959 | else if (!useless_type_conversion_p (integer_type_node, TREE_TYPE (val))) | |
b6dd5261 | 960 | { |
b731b390 | 961 | tree tem = make_ssa_name (integer_type_node); |
355fe088 | 962 | gimple *cstmt = gimple_build_assign (tem, NOP_EXPR, val); |
401f3a81 JJ |
963 | gsi_insert_after (&gsi, cstmt, GSI_CONTINUE_LINKING); |
964 | val = tem; | |
b6dd5261 RG |
965 | } |
966 | ||
e79983f4 | 967 | fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); |
b6dd5261 | 968 | fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); |
d0582dc1 | 969 | gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); |
dea61d92 SP |
970 | |
971 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
b6dd5261 RG |
972 | { |
973 | fprintf (dump_file, "generated memset"); | |
401f3a81 | 974 | if (bytev == 0) |
b6dd5261 | 975 | fprintf (dump_file, " zero\n"); |
b6dd5261 RG |
976 | else |
977 | fprintf (dump_file, "\n"); | |
978 | } | |
dea61d92 SP |
979 | } |
980 | ||
d0582dc1 RG |
981 | /* Generate a call to memcpy for PARTITION in LOOP. */ |
982 | ||
983 | static void | |
526ceb68 | 984 | generate_memcpy_builtin (struct loop *loop, partition *partition) |
d0582dc1 RG |
985 | { |
986 | gimple_stmt_iterator gsi; | |
355fe088 | 987 | gimple *stmt, *fn_call; |
818625cf | 988 | tree dest, src, fn, nb_bytes; |
d0582dc1 RG |
989 | location_t loc; |
990 | enum built_in_function kind; | |
991 | ||
992 | stmt = DR_STMT (partition->main_dr); | |
993 | loc = gimple_location (stmt); | |
d0582dc1 RG |
994 | |
995 | /* The new statements will be placed before LOOP. */ | |
996 | gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | |
997 | ||
d995e887 RB |
998 | nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter, |
999 | partition->plus_one); | |
d0582dc1 RG |
1000 | nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, |
1001 | false, GSI_CONTINUE_LINKING); | |
1002 | dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes); | |
1003 | src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes); | |
510d73a0 RB |
1004 | if (partition->kind == PKIND_MEMCPY |
1005 | || ! ptr_derefs_may_alias_p (dest, src)) | |
d0582dc1 | 1006 | kind = BUILT_IN_MEMCPY; |
510d73a0 RB |
1007 | else |
1008 | kind = BUILT_IN_MEMMOVE; | |
d0582dc1 RG |
1009 | |
1010 | dest = force_gimple_operand_gsi (&gsi, dest, true, NULL_TREE, | |
1011 | false, GSI_CONTINUE_LINKING); | |
1012 | src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, | |
1013 | false, GSI_CONTINUE_LINKING); | |
1014 | fn = build_fold_addr_expr (builtin_decl_implicit (kind)); | |
1015 | fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); | |
1016 | gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); | |
1017 | ||
1018 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1019 | { | |
1020 | if (kind == BUILT_IN_MEMCPY) | |
1021 | fprintf (dump_file, "generated memcpy\n"); | |
1022 | else | |
1023 | fprintf (dump_file, "generated memmove\n"); | |
1024 | } | |
1025 | } | |
1026 | ||
30d55936 | 1027 | /* Remove and destroy the loop LOOP. */ |
dea61d92 | 1028 | |
30d55936 RG |
1029 | static void |
1030 | destroy_loop (struct loop *loop) | |
dea61d92 | 1031 | { |
30d55936 RG |
1032 | unsigned nbbs = loop->num_nodes; |
1033 | edge exit = single_exit (loop); | |
1034 | basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; | |
dea61d92 | 1035 | basic_block *bbs; |
30d55936 | 1036 | unsigned i; |
dea61d92 SP |
1037 | |
1038 | bbs = get_loop_body_in_dom_order (loop); | |
1039 | ||
30d55936 RG |
1040 | redirect_edge_pred (exit, src); |
1041 | exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); | |
1042 | exit->flags |= EDGE_FALLTHRU; | |
1043 | cancel_loop_tree (loop); | |
1044 | rescan_loop_exit (exit, false, true); | |
dea61d92 | 1045 | |
b9aba0a0 RB |
1046 | i = nbbs; |
1047 | do | |
c014f6f5 RG |
1048 | { |
1049 | /* We have made sure to not leave any dangling uses of SSA | |
1050 | names defined in the loop. With the exception of virtuals. | |
1051 | Make sure we replace all uses of virtual defs that will remain | |
1052 | outside of the loop with the bare symbol as delete_basic_block | |
1053 | will release them. */ | |
b9aba0a0 | 1054 | --i; |
538dd0b7 DM |
1055 | for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); |
1056 | gsi_next (&gsi)) | |
c014f6f5 | 1057 | { |
538dd0b7 | 1058 | gphi *phi = gsi.phi (); |
ea057359 | 1059 | if (virtual_operand_p (gimple_phi_result (phi))) |
c014f6f5 RG |
1060 | mark_virtual_phi_result_for_renaming (phi); |
1061 | } | |
538dd0b7 DM |
1062 | for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); |
1063 | gsi_next (&gsi)) | |
c014f6f5 | 1064 | { |
355fe088 | 1065 | gimple *stmt = gsi_stmt (gsi); |
c014f6f5 RG |
1066 | tree vdef = gimple_vdef (stmt); |
1067 | if (vdef && TREE_CODE (vdef) == SSA_NAME) | |
1068 | mark_virtual_operand_for_renaming (vdef); | |
1069 | } | |
1070 | delete_basic_block (bbs[i]); | |
1071 | } | |
b9aba0a0 RB |
1072 | while (i != 0); |
1073 | ||
dea61d92 | 1074 | free (bbs); |
30d55936 RG |
1075 | |
1076 | set_immediate_dominator (CDI_DOMINATORS, dest, | |
1077 | recompute_dominator (CDI_DOMINATORS, dest)); | |
dea61d92 SP |
1078 | } |
1079 | ||
b71b7a8e | 1080 | /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */ |
dea61d92 | 1081 | |
b71b7a8e | 1082 | static bool |
d0582dc1 | 1083 | generate_code_for_partition (struct loop *loop, |
526ceb68 | 1084 | partition *partition, bool copy_p) |
dea61d92 | 1085 | { |
30d55936 RG |
1086 | switch (partition->kind) |
1087 | { | |
826a536d RB |
1088 | case PKIND_NORMAL: |
1089 | /* Reductions all have to be in the last partition. */ | |
1090 | gcc_assert (!partition_reduction_p (partition) | |
1091 | || !copy_p); | |
1092 | generate_loops_for_partition (loop, partition, copy_p); | |
b71b7a8e | 1093 | return false; |
826a536d | 1094 | |
30d55936 | 1095 | case PKIND_MEMSET: |
d0582dc1 | 1096 | generate_memset_builtin (loop, partition); |
d0582dc1 RG |
1097 | break; |
1098 | ||
1099 | case PKIND_MEMCPY: | |
510d73a0 | 1100 | case PKIND_MEMMOVE: |
d0582dc1 | 1101 | generate_memcpy_builtin (loop, partition); |
30d55936 RG |
1102 | break; |
1103 | ||
1104 | default: | |
1105 | gcc_unreachable (); | |
1106 | } | |
dea61d92 | 1107 | |
826a536d RB |
1108 | /* Common tail for partitions we turn into a call. If this was the last |
1109 | partition for which we generate code, we have to destroy the loop. */ | |
1110 | if (!copy_p) | |
b71b7a8e RB |
1111 | return true; |
1112 | return false; | |
dea61d92 SP |
1113 | } |
1114 | ||
17c5cbdf BC |
1115 | /* Return data dependence relation for data references A and B. The two |
1116 | data references must be in lexicographic order wrto reduced dependence | |
1117 | graph RDG. We firstly try to find ddr from global ddr hash table. If | |
1118 | it doesn't exist, compute the ddr and cache it. */ | |
1119 | ||
1120 | static data_dependence_relation * | |
1121 | get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b) | |
1122 | { | |
1123 | struct data_dependence_relation ent, **slot; | |
1124 | struct data_dependence_relation *ddr; | |
1125 | ||
1126 | gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b)); | |
1127 | gcc_assert (rdg_vertex_for_stmt (rdg, DR_STMT (a)) | |
1128 | <= rdg_vertex_for_stmt (rdg, DR_STMT (b))); | |
1129 | ent.a = a; | |
1130 | ent.b = b; | |
1131 | slot = ddrs_table.find_slot (&ent, INSERT); | |
1132 | if (*slot == NULL) | |
1133 | { | |
1134 | ddr = initialize_data_dependence_relation (a, b, loop_nest); | |
1135 | compute_affine_dependence (ddr, loop_nest[0]); | |
1136 | *slot = ddr; | |
1137 | } | |
1138 | ||
1139 | return *slot; | |
1140 | } | |
dea61d92 | 1141 | |
f1eb4621 BC |
1142 | /* In reduced dependence graph RDG for loop distribution, return true if |
1143 | dependence between references DR1 and DR2 leads to a dependence cycle | |
1144 | and such dependence cycle can't be resolved by runtime alias check. */ | |
1145 | ||
1146 | static bool | |
1147 | data_dep_in_cycle_p (struct graph *rdg, | |
1148 | data_reference_p dr1, data_reference_p dr2) | |
1149 | { | |
1150 | struct data_dependence_relation *ddr; | |
1151 | ||
1152 | /* Re-shuffle data-refs to be in topological order. */ | |
1153 | if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) | |
1154 | > rdg_vertex_for_stmt (rdg, DR_STMT (dr2))) | |
1155 | std::swap (dr1, dr2); | |
1156 | ||
1157 | ddr = get_data_dependence (rdg, dr1, dr2); | |
1158 | ||
1159 | /* In case of no data dependence. */ | |
1160 | if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
1161 | return false; | |
1162 | /* For unknown data dependence or known data dependence which can't be | |
1163 | expressed in classic distance vector, we check if it can be resolved | |
1164 | by runtime alias check. If yes, we still consider data dependence | |
1165 | as won't introduce data dependence cycle. */ | |
1166 | else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know | |
1167 | || DDR_NUM_DIST_VECTS (ddr) == 0) | |
1168 | return !runtime_alias_check_p (ddr, NULL, true); | |
1169 | else if (DDR_NUM_DIST_VECTS (ddr) > 1) | |
1170 | return true; | |
1171 | else if (DDR_REVERSED_P (ddr) | |
1172 | || lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1)) | |
1173 | return false; | |
1174 | ||
1175 | return true; | |
1176 | } | |
1177 | ||
1178 | /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update | |
1179 | PARTITION1's type after merging PARTITION2 into PARTITION1. */ | |
1180 | ||
1181 | static void | |
1182 | update_type_for_merge (struct graph *rdg, | |
1183 | partition *partition1, partition *partition2) | |
1184 | { | |
1185 | unsigned i, j; | |
1186 | bitmap_iterator bi, bj; | |
1187 | data_reference_p dr1, dr2; | |
1188 | ||
1189 | EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi) | |
1190 | { | |
1191 | unsigned start = (partition1 == partition2) ? i + 1 : 0; | |
1192 | ||
1193 | dr1 = datarefs_vec[i]; | |
1194 | EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, start, j, bj) | |
1195 | { | |
1196 | dr2 = datarefs_vec[j]; | |
1197 | if (DR_IS_READ (dr1) && DR_IS_READ (dr2)) | |
1198 | continue; | |
1199 | ||
1200 | /* Partition can only be executed sequentially if there is any | |
1201 | data dependence cycle. */ | |
1202 | if (data_dep_in_cycle_p (rdg, dr1, dr2)) | |
1203 | { | |
1204 | partition1->type = PTYPE_SEQUENTIAL; | |
1205 | return; | |
1206 | } | |
1207 | } | |
1208 | } | |
1209 | } | |
1210 | ||
24f161fd RB |
1211 | /* Returns a partition with all the statements needed for computing |
1212 | the vertex V of the RDG, also including the loop exit conditions. */ | |
dea61d92 | 1213 | |
526ceb68 | 1214 | static partition * |
24f161fd | 1215 | build_rdg_partition_for_vertex (struct graph *rdg, int v) |
dea61d92 | 1216 | { |
a7a44c07 | 1217 | partition *partition = partition_alloc (); |
00f96dc9 | 1218 | auto_vec<int, 3> nodes; |
a7a44c07 | 1219 | unsigned i, j; |
dea61d92 | 1220 | int x; |
a7a44c07 | 1221 | data_reference_p dr; |
dea61d92 | 1222 | |
174ec470 | 1223 | graphds_dfs (rdg, &v, 1, &nodes, false, NULL); |
dea61d92 | 1224 | |
9771b263 | 1225 | FOR_EACH_VEC_ELT (nodes, i, x) |
24f161fd RB |
1226 | { |
1227 | bitmap_set_bit (partition->stmts, x); | |
1228 | bitmap_set_bit (partition->loops, | |
1229 | loop_containing_stmt (RDG_STMT (rdg, x))->num); | |
a7a44c07 BC |
1230 | |
1231 | for (j = 0; RDG_DATAREFS (rdg, x).iterate (j, &dr); ++j) | |
1232 | { | |
1233 | unsigned idx = (unsigned) DR_INDEX (dr); | |
1234 | gcc_assert (idx < datarefs_vec.length ()); | |
1235 | ||
f1eb4621 BC |
1236 | /* Partition can only be executed sequentially if there is any |
1237 | unknown data reference. */ | |
1238 | if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) | |
1239 | || !DR_INIT (dr) || !DR_STEP (dr)) | |
1240 | partition->type = PTYPE_SEQUENTIAL; | |
1241 | ||
a7a44c07 BC |
1242 | bitmap_set_bit (partition->datarefs, idx); |
1243 | } | |
24f161fd | 1244 | } |
dea61d92 | 1245 | |
f1eb4621 BC |
1246 | if (partition->type == PTYPE_SEQUENTIAL) |
1247 | return partition; | |
1248 | ||
1249 | /* Further check if any data dependence prevents us from executing the | |
1250 | partition parallelly. */ | |
1251 | update_type_for_merge (rdg, partition, partition); | |
1252 | ||
dea61d92 SP |
1253 | return partition; |
1254 | } | |
1255 | ||
30d55936 | 1256 | /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. |
4a52eb19 BC |
1257 | For the moment we detect memset, memcpy and memmove patterns. Bitmap |
1258 | STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */ | |
cfee318d | 1259 | |
30d55936 | 1260 | static void |
4a52eb19 BC |
1261 | classify_partition (loop_p loop, struct graph *rdg, partition *partition, |
1262 | bitmap stmt_in_all_partitions) | |
cfee318d | 1263 | { |
cfee318d | 1264 | bitmap_iterator bi; |
30d55936 RG |
1265 | unsigned i; |
1266 | tree nb_iter; | |
d0582dc1 | 1267 | data_reference_p single_load, single_store; |
4a52eb19 | 1268 | bool volatiles_p = false, plus_one = false, has_reduction = false; |
30d55936 RG |
1269 | |
1270 | partition->kind = PKIND_NORMAL; | |
d0582dc1 RG |
1271 | partition->main_dr = NULL; |
1272 | partition->secondary_dr = NULL; | |
818625cf | 1273 | partition->niter = NULL_TREE; |
d995e887 | 1274 | partition->plus_one = false; |
30d55936 | 1275 | |
c61f8985 | 1276 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) |
30d55936 | 1277 | { |
355fe088 | 1278 | gimple *stmt = RDG_STMT (rdg, i); |
30d55936 RG |
1279 | |
1280 | if (gimple_has_volatile_ops (stmt)) | |
b9fc0497 | 1281 | volatiles_p = true; |
cfee318d | 1282 | |
4a52eb19 BC |
1283 | /* If the stmt is not included by all partitions and there is uses |
1284 | outside of the loop, then mark the partition as reduction. */ | |
9ca86fc3 | 1285 | if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) |
30d55936 | 1286 | { |
4a52eb19 BC |
1287 | /* Due to limitation in the transform phase we have to fuse all |
1288 | reduction partitions. As a result, this could cancel valid | |
1289 | loop distribution especially for loop that induction variable | |
1290 | is used outside of loop. To workaround this issue, we skip | |
1291 | marking partition as reudction if the reduction stmt belongs | |
1292 | to all partitions. In such case, reduction will be computed | |
1293 | correctly no matter how partitions are fused/distributed. */ | |
1294 | if (!bitmap_bit_p (stmt_in_all_partitions, i)) | |
1295 | { | |
1296 | partition->reduction_p = true; | |
1297 | return; | |
1298 | } | |
1299 | has_reduction = true; | |
30d55936 RG |
1300 | } |
1301 | } | |
1302 | ||
b9fc0497 RB |
1303 | /* Perform general partition disqualification for builtins. */ |
1304 | if (volatiles_p | |
4a52eb19 BC |
1305 | /* Simple workaround to prevent classifying the partition as builtin |
1306 | if it contains any use outside of loop. */ | |
1307 | || has_reduction | |
b9fc0497 RB |
1308 | || !flag_tree_loop_distribute_patterns) |
1309 | return; | |
1310 | ||
d0582dc1 RG |
1311 | /* Detect memset and memcpy. */ |
1312 | single_load = NULL; | |
1313 | single_store = NULL; | |
30d55936 RG |
1314 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi) |
1315 | { | |
355fe088 | 1316 | gimple *stmt = RDG_STMT (rdg, i); |
d0582dc1 RG |
1317 | data_reference_p dr; |
1318 | unsigned j; | |
30d55936 RG |
1319 | |
1320 | if (gimple_code (stmt) == GIMPLE_PHI) | |
1321 | continue; | |
1322 | ||
1323 | /* Any scalar stmts are ok. */ | |
1324 | if (!gimple_vuse (stmt)) | |
1325 | continue; | |
1326 | ||
d0582dc1 RG |
1327 | /* Otherwise just regular loads/stores. */ |
1328 | if (!gimple_assign_single_p (stmt)) | |
1329 | return; | |
1330 | ||
1331 | /* But exactly one store and/or load. */ | |
9771b263 | 1332 | for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j) |
30d55936 | 1333 | { |
d002d099 JJ |
1334 | tree type = TREE_TYPE (DR_REF (dr)); |
1335 | ||
1336 | /* The memset, memcpy and memmove library calls are only | |
1337 | able to deal with generic address space. */ | |
1338 | if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type))) | |
1339 | return; | |
1340 | ||
d0582dc1 RG |
1341 | if (DR_IS_READ (dr)) |
1342 | { | |
1343 | if (single_load != NULL) | |
1344 | return; | |
1345 | single_load = dr; | |
1346 | } | |
1347 | else | |
1348 | { | |
1349 | if (single_store != NULL) | |
1350 | return; | |
1351 | single_store = dr; | |
1352 | } | |
30d55936 | 1353 | } |
30d55936 RG |
1354 | } |
1355 | ||
818625cf RB |
1356 | if (!single_store) |
1357 | return; | |
1358 | ||
d995e887 | 1359 | nb_iter = number_of_latch_executions (loop); |
818625cf RB |
1360 | if (!nb_iter || nb_iter == chrec_dont_know) |
1361 | return; | |
d995e887 RB |
1362 | if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, |
1363 | gimple_bb (DR_STMT (single_store)))) | |
1364 | plus_one = true; | |
818625cf | 1365 | |
d0582dc1 RG |
1366 | if (single_store && !single_load) |
1367 | { | |
355fe088 | 1368 | gimple *stmt = DR_STMT (single_store); |
d0582dc1 | 1369 | tree rhs = gimple_assign_rhs1 (stmt); |
401f3a81 JJ |
1370 | if (const_with_all_bytes_same (rhs) == -1 |
1371 | && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs)) | |
1372 | || (TYPE_MODE (TREE_TYPE (rhs)) | |
1373 | != TYPE_MODE (unsigned_char_type_node)))) | |
d0582dc1 RG |
1374 | return; |
1375 | if (TREE_CODE (rhs) == SSA_NAME | |
1376 | && !SSA_NAME_IS_DEFAULT_DEF (rhs) | |
1377 | && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs)))) | |
1378 | return; | |
ca406576 RB |
1379 | if (!adjacent_dr_p (single_store) |
1380 | || !dominated_by_p (CDI_DOMINATORS, | |
1381 | loop->latch, gimple_bb (stmt))) | |
d0582dc1 RG |
1382 | return; |
1383 | partition->kind = PKIND_MEMSET; | |
1384 | partition->main_dr = single_store; | |
818625cf | 1385 | partition->niter = nb_iter; |
d995e887 | 1386 | partition->plus_one = plus_one; |
d0582dc1 RG |
1387 | } |
1388 | else if (single_store && single_load) | |
1389 | { | |
355fe088 TS |
1390 | gimple *store = DR_STMT (single_store); |
1391 | gimple *load = DR_STMT (single_load); | |
d0582dc1 RG |
1392 | /* Direct aggregate copy or via an SSA name temporary. */ |
1393 | if (load != store | |
1394 | && gimple_assign_lhs (load) != gimple_assign_rhs1 (store)) | |
1395 | return; | |
1396 | if (!adjacent_dr_p (single_store) | |
1397 | || !adjacent_dr_p (single_load) | |
1398 | || !operand_equal_p (DR_STEP (single_store), | |
ca406576 RB |
1399 | DR_STEP (single_load), 0) |
1400 | || !dominated_by_p (CDI_DOMINATORS, | |
1401 | loop->latch, gimple_bb (store))) | |
d0582dc1 | 1402 | return; |
f20132e7 RG |
1403 | /* Now check that if there is a dependence this dependence is |
1404 | of a suitable form for memmove. */ | |
17c5cbdf | 1405 | ddr_p ddr = get_data_dependence (rdg, single_load, single_store); |
f20132e7 | 1406 | if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
17c5cbdf BC |
1407 | return; |
1408 | ||
f20132e7 RG |
1409 | if (DDR_ARE_DEPENDENT (ddr) != chrec_known) |
1410 | { | |
1411 | if (DDR_NUM_DIST_VECTS (ddr) == 0) | |
17c5cbdf BC |
1412 | return; |
1413 | ||
f20132e7 | 1414 | lambda_vector dist_v; |
9771b263 | 1415 | FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v) |
f20132e7 RG |
1416 | { |
1417 | int dist = dist_v[index_in_loop_nest (loop->num, | |
1418 | DDR_LOOP_NEST (ddr))]; | |
1419 | if (dist > 0 && !DDR_REVERSED_P (ddr)) | |
17c5cbdf | 1420 | return; |
f20132e7 | 1421 | } |
510d73a0 | 1422 | partition->kind = PKIND_MEMMOVE; |
f20132e7 | 1423 | } |
510d73a0 RB |
1424 | else |
1425 | partition->kind = PKIND_MEMCPY; | |
d0582dc1 RG |
1426 | partition->main_dr = single_store; |
1427 | partition->secondary_dr = single_load; | |
818625cf | 1428 | partition->niter = nb_iter; |
d995e887 | 1429 | partition->plus_one = plus_one; |
d0582dc1 | 1430 | } |
cfee318d SP |
1431 | } |
1432 | ||
95f7d11b BC |
1433 | /* Returns true when PARTITION1 and PARTITION2 access the same memory |
1434 | object in RDG. */ | |
cfee318d SP |
1435 | |
1436 | static bool | |
95f7d11b BC |
1437 | share_memory_accesses (struct graph *rdg, |
1438 | partition *partition1, partition *partition2) | |
cfee318d | 1439 | { |
95f7d11b | 1440 | unsigned i, j; |
cfee318d | 1441 | bitmap_iterator bi, bj; |
95f7d11b | 1442 | data_reference_p dr1, dr2; |
1fa0c180 RG |
1443 | |
1444 | /* First check whether in the intersection of the two partitions are | |
1445 | any loads or stores. Common loads are the situation that happens | |
1446 | most often. */ | |
1447 | EXECUTE_IF_AND_IN_BITMAP (partition1->stmts, partition2->stmts, 0, i, bi) | |
1448 | if (RDG_MEM_WRITE_STMT (rdg, i) | |
1449 | || RDG_MEM_READS_STMT (rdg, i)) | |
1450 | return true; | |
cfee318d | 1451 | |
95f7d11b BC |
1452 | /* Then check whether the two partitions access the same memory object. */ |
1453 | EXECUTE_IF_SET_IN_BITMAP (partition1->datarefs, 0, i, bi) | |
1454 | { | |
1455 | dr1 = datarefs_vec[i]; | |
1456 | ||
1457 | if (!DR_BASE_ADDRESS (dr1) | |
1458 | || !DR_OFFSET (dr1) || !DR_INIT (dr1) || !DR_STEP (dr1)) | |
1459 | continue; | |
1460 | ||
1461 | EXECUTE_IF_SET_IN_BITMAP (partition2->datarefs, 0, j, bj) | |
1462 | { | |
1463 | dr2 = datarefs_vec[j]; | |
1464 | ||
1465 | if (!DR_BASE_ADDRESS (dr2) | |
1466 | || !DR_OFFSET (dr2) || !DR_INIT (dr2) || !DR_STEP (dr2)) | |
1467 | continue; | |
1468 | ||
1469 | if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0) | |
1470 | && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0) | |
1471 | && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0) | |
1472 | && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)) | |
1473 | return true; | |
1474 | } | |
1475 | } | |
cfee318d SP |
1476 | |
1477 | return false; | |
1478 | } | |
1479 | ||
4a52eb19 BC |
1480 | /* For each seed statement in STARTING_STMTS, this function builds |
1481 | partition for it by adding depended statements according to RDG. | |
1482 | All partitions are recorded in PARTITIONS. */ | |
dea61d92 SP |
1483 | |
1484 | static void | |
83a95546 | 1485 | rdg_build_partitions (struct graph *rdg, |
355fe088 | 1486 | vec<gimple *> starting_stmts, |
526ceb68 | 1487 | vec<partition *> *partitions) |
dea61d92 | 1488 | { |
0e3de1d4 | 1489 | auto_bitmap processed; |
2fd5894f | 1490 | int i; |
355fe088 | 1491 | gimple *stmt; |
dea61d92 | 1492 | |
2fd5894f | 1493 | FOR_EACH_VEC_ELT (starting_stmts, i, stmt) |
dea61d92 | 1494 | { |
2fd5894f RB |
1495 | int v = rdg_vertex_for_stmt (rdg, stmt); |
1496 | ||
1497 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1498 | fprintf (dump_file, | |
1499 | "ldist asked to generate code for vertex %d\n", v); | |
b8698a0f | 1500 | |
24f161fd RB |
1501 | /* If the vertex is already contained in another partition so |
1502 | is the partition rooted at it. */ | |
dea61d92 SP |
1503 | if (bitmap_bit_p (processed, v)) |
1504 | continue; | |
b8698a0f | 1505 | |
526ceb68 | 1506 | partition *partition = build_rdg_partition_for_vertex (rdg, v); |
24f161fd | 1507 | bitmap_ior_into (processed, partition->stmts); |
dea61d92 | 1508 | |
826a536d | 1509 | if (dump_file && (dump_flags & TDF_DETAILS)) |
dea61d92 | 1510 | { |
f1eb4621 BC |
1511 | fprintf (dump_file, "ldist creates useful %s partition:\n", |
1512 | partition->type == PTYPE_PARALLEL ? "parallel" : "sequent"); | |
1513 | bitmap_print (dump_file, partition->stmts, " ", "\n"); | |
dea61d92 | 1514 | } |
826a536d RB |
1515 | |
1516 | partitions->safe_push (partition); | |
dea61d92 SP |
1517 | } |
1518 | ||
83a95546 RB |
1519 | /* All vertices should have been assigned to at least one partition now, |
1520 | other than vertices belonging to dead code. */ | |
dea61d92 SP |
1521 | } |
1522 | ||
1523 | /* Dump to FILE the PARTITIONS. */ | |
1524 | ||
1525 | static void | |
526ceb68 | 1526 | dump_rdg_partitions (FILE *file, vec<partition *> partitions) |
dea61d92 SP |
1527 | { |
1528 | int i; | |
526ceb68 | 1529 | partition *partition; |
dea61d92 | 1530 | |
9771b263 | 1531 | FOR_EACH_VEC_ELT (partitions, i, partition) |
c61f8985 | 1532 | debug_bitmap_file (file, partition->stmts); |
dea61d92 SP |
1533 | } |
1534 | ||
1535 | /* Debug PARTITIONS. */ | |
526ceb68 | 1536 | extern void debug_rdg_partitions (vec<partition *> ); |
dea61d92 | 1537 | |
24e47c76 | 1538 | DEBUG_FUNCTION void |
526ceb68 | 1539 | debug_rdg_partitions (vec<partition *> partitions) |
dea61d92 SP |
1540 | { |
1541 | dump_rdg_partitions (stderr, partitions); | |
1542 | } | |
1543 | ||
2b8aee8e SP |
1544 | /* Returns the number of read and write operations in the RDG. */ |
1545 | ||
1546 | static int | |
1547 | number_of_rw_in_rdg (struct graph *rdg) | |
1548 | { | |
1549 | int i, res = 0; | |
1550 | ||
1551 | for (i = 0; i < rdg->n_vertices; i++) | |
1552 | { | |
1553 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1554 | ++res; | |
1555 | ||
1556 | if (RDG_MEM_READS_STMT (rdg, i)) | |
1557 | ++res; | |
1558 | } | |
1559 | ||
1560 | return res; | |
1561 | } | |
1562 | ||
1563 | /* Returns the number of read and write operations in a PARTITION of | |
1564 | the RDG. */ | |
1565 | ||
1566 | static int | |
526ceb68 | 1567 | number_of_rw_in_partition (struct graph *rdg, partition *partition) |
2b8aee8e SP |
1568 | { |
1569 | int res = 0; | |
1570 | unsigned i; | |
1571 | bitmap_iterator ii; | |
1572 | ||
c61f8985 | 1573 | EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, ii) |
2b8aee8e SP |
1574 | { |
1575 | if (RDG_MEM_WRITE_STMT (rdg, i)) | |
1576 | ++res; | |
1577 | ||
1578 | if (RDG_MEM_READS_STMT (rdg, i)) | |
1579 | ++res; | |
1580 | } | |
1581 | ||
1582 | return res; | |
1583 | } | |
1584 | ||
1585 | /* Returns true when one of the PARTITIONS contains all the read or | |
1586 | write operations of RDG. */ | |
1587 | ||
1588 | static bool | |
9771b263 | 1589 | partition_contains_all_rw (struct graph *rdg, |
526ceb68 | 1590 | vec<partition *> partitions) |
2b8aee8e SP |
1591 | { |
1592 | int i; | |
526ceb68 | 1593 | partition *partition; |
2b8aee8e SP |
1594 | int nrw = number_of_rw_in_rdg (rdg); |
1595 | ||
9771b263 | 1596 | FOR_EACH_VEC_ELT (partitions, i, partition) |
2b8aee8e SP |
1597 | if (nrw == number_of_rw_in_partition (rdg, partition)) |
1598 | return true; | |
1599 | ||
1600 | return false; | |
1601 | } | |
1602 | ||
447f3223 RB |
1603 | /* Compute partition dependence created by the data references in DRS1 |
1604 | and DRS2 and modify and return DIR according to that. */ | |
1605 | ||
1606 | static int | |
4084ea5f | 1607 | pg_add_dependence_edges (struct graph *rdg, int dir, |
a7a44c07 | 1608 | bitmap drs1, bitmap drs2) |
447f3223 | 1609 | { |
a7a44c07 BC |
1610 | unsigned i, j; |
1611 | bitmap_iterator bi, bj; | |
1612 | data_reference_p dr1, dr2, saved_dr1; | |
447f3223 RB |
1613 | |
1614 | /* dependence direction - 0 is no dependence, -1 is back, | |
1615 | 1 is forth, 2 is both (we can stop then, merging will occur). */ | |
a7a44c07 BC |
1616 | EXECUTE_IF_SET_IN_BITMAP (drs1, 0, i, bi) |
1617 | { | |
1618 | dr1 = datarefs_vec[i]; | |
1619 | ||
1620 | EXECUTE_IF_SET_IN_BITMAP (drs2, 0, j, bj) | |
1621 | { | |
1622 | dr2 = datarefs_vec[j]; | |
1623 | ||
1624 | /* Skip all <read, read> data dependence. */ | |
1625 | if (DR_IS_READ (dr1) && DR_IS_READ (dr2)) | |
1626 | continue; | |
1627 | ||
1628 | saved_dr1 = dr1; | |
1629 | int this_dir = 1; | |
1630 | ddr_p ddr; | |
1631 | /* Re-shuffle data-refs to be in dominator order. */ | |
1632 | if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) | |
1633 | > rdg_vertex_for_stmt (rdg, DR_STMT (dr2))) | |
1634 | { | |
1635 | std::swap (dr1, dr2); | |
1636 | this_dir = -this_dir; | |
1637 | } | |
17c5cbdf | 1638 | ddr = get_data_dependence (rdg, dr1, dr2); |
a7a44c07 BC |
1639 | if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) |
1640 | this_dir = 2; | |
1641 | else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE) | |
1642 | { | |
1643 | if (DDR_REVERSED_P (ddr)) | |
1644 | { | |
1645 | std::swap (dr1, dr2); | |
1646 | this_dir = -this_dir; | |
1647 | } | |
1648 | /* Known dependences can still be unordered througout the | |
1649 | iteration space, see gcc.dg/tree-ssa/ldist-16.c. */ | |
1650 | if (DDR_NUM_DIST_VECTS (ddr) != 1) | |
1651 | this_dir = 2; | |
1652 | /* If the overlap is exact preserve stmt order. */ | |
1653 | else if (lambda_vector_zerop (DDR_DIST_VECT (ddr, 0), 1)) | |
1654 | ; | |
1655 | else | |
1656 | { | |
1657 | /* Else as the distance vector is lexicographic positive | |
1658 | swap the dependence direction. */ | |
1659 | this_dir = -this_dir; | |
1660 | } | |
1661 | } | |
1662 | else | |
1663 | this_dir = 0; | |
a7a44c07 BC |
1664 | if (this_dir == 2) |
1665 | return 2; | |
1666 | else if (dir == 0) | |
1667 | dir = this_dir; | |
1668 | else if (this_dir != 0 && dir != this_dir) | |
1669 | return 2; | |
1670 | /* Shuffle "back" dr1. */ | |
1671 | dr1 = saved_dr1; | |
1672 | } | |
1673 | } | |
447f3223 RB |
1674 | return dir; |
1675 | } | |
1676 | ||
1677 | /* Compare postorder number of the partition graph vertices V1 and V2. */ | |
1678 | ||
1679 | static int | |
1680 | pgcmp (const void *v1_, const void *v2_) | |
1681 | { | |
1682 | const vertex *v1 = (const vertex *)v1_; | |
1683 | const vertex *v2 = (const vertex *)v2_; | |
1684 | return v2->post - v1->post; | |
1685 | } | |
2fd5894f RB |
1686 | |
1687 | /* Distributes the code from LOOP in such a way that producer | |
1688 | statements are placed before consumer statements. Tries to separate | |
1689 | only the statements from STMTS into separate loops. | |
b71b7a8e RB |
1690 | Returns the number of distributed loops. Set *DESTROY_P to whether |
1691 | LOOP needs to be destroyed. */ | |
dea61d92 SP |
1692 | |
1693 | static int | |
355fe088 | 1694 | distribute_loop (struct loop *loop, vec<gimple *> stmts, |
b71b7a8e | 1695 | control_dependences *cd, int *nb_calls, bool *destroy_p) |
dea61d92 | 1696 | { |
2fd5894f | 1697 | struct graph *rdg; |
526ceb68 | 1698 | partition *partition; |
be6b029b | 1699 | bool any_builtin; |
2fd5894f | 1700 | int i, nbp; |
447f3223 RB |
1701 | graph *pg = NULL; |
1702 | int num_sccs = 1; | |
dea61d92 | 1703 | |
c9326aef | 1704 | *destroy_p = false; |
826a536d | 1705 | *nb_calls = 0; |
4084ea5f | 1706 | loop_nest.create (0); |
2fd5894f | 1707 | if (!find_loop_nest (loop, &loop_nest)) |
4084ea5f BC |
1708 | { |
1709 | loop_nest.release (); | |
1710 | return 0; | |
1711 | } | |
2fd5894f | 1712 | |
9fafb14a | 1713 | datarefs_vec.create (20); |
4084ea5f | 1714 | rdg = build_rdg (loop, cd); |
2fd5894f RB |
1715 | if (!rdg) |
1716 | { | |
1717 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1718 | fprintf (dump_file, | |
1719 | "Loop %d not distributed: failed to build the RDG.\n", | |
1720 | loop->num); | |
1721 | ||
4084ea5f | 1722 | loop_nest.release (); |
9fafb14a BC |
1723 | free_data_refs (datarefs_vec); |
1724 | return 0; | |
1725 | } | |
1726 | ||
1727 | if (datarefs_vec.length () > MAX_DATAREFS_NUM) | |
1728 | { | |
1729 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1730 | fprintf (dump_file, | |
1731 | "Loop %d not distributed: too many memory references.\n", | |
1732 | loop->num); | |
1733 | ||
1734 | free_rdg (rdg); | |
1735 | loop_nest.release (); | |
1736 | free_data_refs (datarefs_vec); | |
2fd5894f RB |
1737 | return 0; |
1738 | } | |
1739 | ||
9fafb14a BC |
1740 | data_reference_p dref; |
1741 | for (i = 0; datarefs_vec.iterate (i, &dref); ++i) | |
1742 | dref->aux = (void *) (uintptr_t) i; | |
1743 | ||
2fd5894f RB |
1744 | if (dump_file && (dump_flags & TDF_DETAILS)) |
1745 | dump_rdg (dump_file, rdg); | |
1746 | ||
526ceb68 | 1747 | auto_vec<struct partition *, 3> partitions; |
2fd5894f | 1748 | rdg_build_partitions (rdg, stmts, &partitions); |
dea61d92 | 1749 | |
4a52eb19 BC |
1750 | auto_bitmap stmt_in_all_partitions; |
1751 | bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts); | |
1752 | for (i = 1; partitions.iterate (i, &partition); ++i) | |
1753 | bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts); | |
1754 | ||
be6b029b | 1755 | any_builtin = false; |
9771b263 | 1756 | FOR_EACH_VEC_ELT (partitions, i, partition) |
be6b029b | 1757 | { |
4a52eb19 | 1758 | classify_partition (loop, rdg, partition, stmt_in_all_partitions); |
be6b029b RG |
1759 | any_builtin |= partition_builtin_p (partition); |
1760 | } | |
30d55936 | 1761 | |
447f3223 RB |
1762 | /* If we are only distributing patterns but did not detect any, |
1763 | simply bail out. */ | |
9fed7f3a RB |
1764 | if (!flag_tree_loop_distribution |
1765 | && !any_builtin) | |
1766 | { | |
1767 | nbp = 0; | |
1768 | goto ldist_done; | |
1769 | } | |
1770 | ||
447f3223 RB |
1771 | /* If we are only distributing patterns fuse all partitions that |
1772 | were not classified as builtins. This also avoids chopping | |
1773 | a loop into pieces, separated by builtin calls. That is, we | |
1774 | only want no or a single loop body remaining. */ | |
526ceb68 | 1775 | struct partition *into; |
447f3223 RB |
1776 | if (!flag_tree_loop_distribution) |
1777 | { | |
1778 | for (i = 0; partitions.iterate (i, &into); ++i) | |
1779 | if (!partition_builtin_p (into)) | |
1780 | break; | |
1781 | for (++i; partitions.iterate (i, &partition); ++i) | |
1782 | if (!partition_builtin_p (partition)) | |
1783 | { | |
f1eb4621 | 1784 | partition_merge_into (NULL, into, partition, FUSE_NON_BUILTIN); |
447f3223 RB |
1785 | partitions.unordered_remove (i); |
1786 | partition_free (partition); | |
1787 | i--; | |
1788 | } | |
1789 | } | |
1790 | ||
1791 | /* Due to limitations in the transform phase we have to fuse all | |
1792 | reduction partitions into the last partition so the existing | |
1793 | loop will contain all loop-closed PHI nodes. */ | |
1794 | for (i = 0; partitions.iterate (i, &into); ++i) | |
1795 | if (partition_reduction_p (into)) | |
1796 | break; | |
1797 | for (i = i + 1; partitions.iterate (i, &partition); ++i) | |
1798 | if (partition_reduction_p (partition)) | |
1799 | { | |
f1eb4621 | 1800 | partition_merge_into (rdg, into, partition, FUSE_REDUCTION); |
447f3223 RB |
1801 | partitions.unordered_remove (i); |
1802 | partition_free (partition); | |
1803 | i--; | |
1804 | } | |
1805 | ||
9fed7f3a RB |
1806 | /* Apply our simple cost model - fuse partitions with similar |
1807 | memory accesses. */ | |
9fed7f3a RB |
1808 | for (i = 0; partitions.iterate (i, &into); ++i) |
1809 | { | |
16eba420 | 1810 | bool changed = false; |
9fed7f3a RB |
1811 | if (partition_builtin_p (into)) |
1812 | continue; | |
1813 | for (int j = i + 1; | |
1814 | partitions.iterate (j, &partition); ++j) | |
1815 | { | |
95f7d11b | 1816 | if (share_memory_accesses (rdg, into, partition)) |
9fed7f3a | 1817 | { |
f1eb4621 | 1818 | partition_merge_into (rdg, into, partition, FUSE_SHARE_REF); |
447f3223 | 1819 | partitions.unordered_remove (j); |
9fed7f3a RB |
1820 | partition_free (partition); |
1821 | j--; | |
16eba420 | 1822 | changed = true; |
9fed7f3a RB |
1823 | } |
1824 | } | |
16eba420 RB |
1825 | /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar |
1826 | accesses when 1 and 2 have similar accesses but not 0 and 1 | |
1827 | then in the next iteration we will fail to consider merging | |
1828 | 1 into 0,2. So try again if we did any merging into 0. */ | |
1829 | if (changed) | |
1830 | i--; | |
9fed7f3a RB |
1831 | } |
1832 | ||
447f3223 RB |
1833 | /* Build the partition dependency graph. */ |
1834 | if (partitions.length () > 1) | |
c014f6f5 | 1835 | { |
447f3223 RB |
1836 | pg = new_graph (partitions.length ()); |
1837 | struct pgdata { | |
526ceb68 | 1838 | struct partition *partition; |
447f3223 RB |
1839 | }; |
1840 | #define PGDATA(i) ((pgdata *)(pg->vertices[i].data)) | |
1841 | for (i = 0; partitions.iterate (i, &partition); ++i) | |
1842 | { | |
1843 | vertex *v = &pg->vertices[i]; | |
1844 | pgdata *data = new pgdata; | |
447f3223 RB |
1845 | /* FIXME - leaks. */ |
1846 | v->data = data; | |
447f3223 | 1847 | data->partition = partition; |
447f3223 | 1848 | } |
526ceb68 | 1849 | struct partition *partition1, *partition2; |
447f3223 RB |
1850 | for (i = 0; partitions.iterate (i, &partition1); ++i) |
1851 | for (int j = i + 1; partitions.iterate (j, &partition2); ++j) | |
1852 | { | |
1853 | /* dependence direction - 0 is no dependence, -1 is back, | |
1854 | 1 is forth, 2 is both (we can stop then, merging will occur). */ | |
a7a44c07 BC |
1855 | int dir = pg_add_dependence_edges (rdg, 0, |
1856 | partition1->datarefs, | |
1857 | partition2->datarefs); | |
447f3223 RB |
1858 | if (dir == 1 || dir == 2) |
1859 | add_edge (pg, i, j); | |
1860 | if (dir == -1 || dir == 2) | |
1861 | add_edge (pg, j, i); | |
1862 | } | |
1863 | ||
1864 | /* Add edges to the reduction partition (if any) to force it last. */ | |
1865 | unsigned j; | |
1866 | for (j = 0; partitions.iterate (j, &partition); ++j) | |
1867 | if (partition_reduction_p (partition)) | |
1868 | break; | |
1869 | if (j < partitions.length ()) | |
38ad2d07 | 1870 | { |
447f3223 RB |
1871 | for (unsigned i = 0; partitions.iterate (i, &partition); ++i) |
1872 | if (i != j) | |
1873 | add_edge (pg, i, j); | |
1874 | } | |
1875 | ||
1876 | /* Compute partitions we cannot separate and fuse them. */ | |
1877 | num_sccs = graphds_scc (pg, NULL); | |
1878 | for (i = 0; i < num_sccs; ++i) | |
1879 | { | |
526ceb68 | 1880 | struct partition *first; |
447f3223 RB |
1881 | int j; |
1882 | for (j = 0; partitions.iterate (j, &first); ++j) | |
1883 | if (pg->vertices[j].component == i) | |
38ad2d07 | 1884 | break; |
447f3223 RB |
1885 | for (j = j + 1; partitions.iterate (j, &partition); ++j) |
1886 | if (pg->vertices[j].component == i) | |
38ad2d07 | 1887 | { |
f1eb4621 BC |
1888 | partition_merge_into (NULL, first, |
1889 | partition, FUSE_SAME_SCC); | |
1890 | first->type = PTYPE_SEQUENTIAL; | |
447f3223 | 1891 | partitions[j] = NULL; |
5eb010bc | 1892 | partition_free (partition); |
447f3223 | 1893 | PGDATA (j)->partition = NULL; |
38ad2d07 | 1894 | } |
38ad2d07 | 1895 | } |
30d55936 | 1896 | |
447f3223 RB |
1897 | /* Now order the remaining nodes in postorder. */ |
1898 | qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp); | |
1899 | partitions.truncate (0); | |
1900 | for (i = 0; i < pg->n_vertices; ++i) | |
b9fc0497 | 1901 | { |
447f3223 RB |
1902 | pgdata *data = PGDATA (i); |
1903 | if (data->partition) | |
1904 | partitions.safe_push (data->partition); | |
447f3223 | 1905 | delete data; |
b9fc0497 | 1906 | } |
447f3223 RB |
1907 | gcc_assert (partitions.length () == (unsigned)num_sccs); |
1908 | free_graph (pg); | |
b9fc0497 RB |
1909 | } |
1910 | ||
9771b263 | 1911 | nbp = partitions.length (); |
a4293fa6 | 1912 | if (nbp == 0 |
9771b263 DN |
1913 | || (nbp == 1 && !partition_builtin_p (partitions[0])) |
1914 | || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) | |
c014f6f5 RG |
1915 | { |
1916 | nbp = 0; | |
1917 | goto ldist_done; | |
1918 | } | |
dea61d92 SP |
1919 | |
1920 | if (dump_file && (dump_flags & TDF_DETAILS)) | |
1921 | dump_rdg_partitions (dump_file, partitions); | |
1922 | ||
9771b263 | 1923 | FOR_EACH_VEC_ELT (partitions, i, partition) |
826a536d RB |
1924 | { |
1925 | if (partition_builtin_p (partition)) | |
1926 | (*nb_calls)++; | |
b71b7a8e | 1927 | *destroy_p |= generate_code_for_partition (loop, partition, i < nbp - 1); |
826a536d | 1928 | } |
dea61d92 | 1929 | |
dea61d92 | 1930 | ldist_done: |
4084ea5f | 1931 | loop_nest.release (); |
9fafb14a | 1932 | free_data_refs (datarefs_vec); |
17c5cbdf BC |
1933 | for (hash_table<ddr_hasher>::iterator iter = ddrs_table.begin (); |
1934 | iter != ddrs_table.end (); ++iter) | |
1935 | { | |
1936 | free_dependence_relation (*iter); | |
1937 | *iter = NULL; | |
1938 | } | |
1939 | ddrs_table.empty (); | |
dea61d92 | 1940 | |
9771b263 | 1941 | FOR_EACH_VEC_ELT (partitions, i, partition) |
c61f8985 | 1942 | partition_free (partition); |
dea61d92 | 1943 | |
dea61d92 | 1944 | free_rdg (rdg); |
826a536d | 1945 | return nbp - *nb_calls; |
dea61d92 SP |
1946 | } |
1947 | ||
1948 | /* Distribute all loops in the current function. */ | |
1949 | ||
be55bfe6 TS |
1950 | namespace { |
1951 | ||
1952 | const pass_data pass_data_loop_distribution = | |
1953 | { | |
1954 | GIMPLE_PASS, /* type */ | |
1955 | "ldist", /* name */ | |
1956 | OPTGROUP_LOOP, /* optinfo_flags */ | |
be55bfe6 TS |
1957 | TV_TREE_LOOP_DISTRIBUTION, /* tv_id */ |
1958 | ( PROP_cfg | PROP_ssa ), /* properties_required */ | |
1959 | 0, /* properties_provided */ | |
1960 | 0, /* properties_destroyed */ | |
1961 | 0, /* todo_flags_start */ | |
3bea341f | 1962 | 0, /* todo_flags_finish */ |
be55bfe6 TS |
1963 | }; |
1964 | ||
1965 | class pass_loop_distribution : public gimple_opt_pass | |
1966 | { | |
1967 | public: | |
1968 | pass_loop_distribution (gcc::context *ctxt) | |
1969 | : gimple_opt_pass (pass_data_loop_distribution, ctxt) | |
1970 | {} | |
1971 | ||
1972 | /* opt_pass methods: */ | |
1973 | virtual bool gate (function *) | |
1974 | { | |
1975 | return flag_tree_loop_distribution | |
1976 | || flag_tree_loop_distribute_patterns; | |
1977 | } | |
1978 | ||
1979 | virtual unsigned int execute (function *); | |
1980 | ||
1981 | }; // class pass_loop_distribution | |
1982 | ||
1983 | unsigned int | |
1984 | pass_loop_distribution::execute (function *fun) | |
dea61d92 SP |
1985 | { |
1986 | struct loop *loop; | |
c014f6f5 | 1987 | bool changed = false; |
1fa0c180 | 1988 | basic_block bb; |
36875e8f | 1989 | control_dependences *cd = NULL; |
b71b7a8e | 1990 | auto_vec<loop_p> loops_to_be_destroyed; |
1fa0c180 | 1991 | |
773d9217 BC |
1992 | if (number_of_loops (fun) <= 1) |
1993 | return 0; | |
1994 | ||
3be57c56 BC |
1995 | /* Compute topological order for basic blocks. Topological order is |
1996 | needed because data dependence is computed for data references in | |
1997 | lexicographical order. */ | |
1998 | if (bb_top_order_index == NULL) | |
1999 | { | |
2000 | int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun)); | |
2001 | ||
2002 | bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun)); | |
2003 | bb_top_order_index_size | |
2004 | = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true); | |
2005 | for (int i = 0; i < bb_top_order_index_size; i++) | |
2006 | bb_top_order_index[rpo[i]] = i; | |
2007 | ||
2008 | free (rpo); | |
2009 | } | |
2010 | ||
be55bfe6 | 2011 | FOR_ALL_BB_FN (bb, fun) |
1fa0c180 RG |
2012 | { |
2013 | gimple_stmt_iterator gsi; | |
2014 | for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2015 | gimple_set_uid (gsi_stmt (gsi), -1); | |
2016 | for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2017 | gimple_set_uid (gsi_stmt (gsi), -1); | |
2018 | } | |
dea61d92 | 2019 | |
c014f6f5 RG |
2020 | /* We can at the moment only distribute non-nested loops, thus restrict |
2021 | walking to innermost loops. */ | |
f0bd40b1 | 2022 | FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) |
dea61d92 | 2023 | { |
355fe088 | 2024 | auto_vec<gimple *> work_list; |
be6b029b | 2025 | basic_block *bbs; |
0e20c89f | 2026 | int num = loop->num; |
be6b029b | 2027 | unsigned int i; |
a3357f7d RG |
2028 | |
2029 | /* If the loop doesn't have a single exit we will fail anyway, | |
2030 | so do that early. */ | |
2031 | if (!single_exit (loop)) | |
2032 | continue; | |
dea61d92 | 2033 | |
f56f2d33 JH |
2034 | /* Only optimize hot loops. */ |
2035 | if (!optimize_loop_for_speed_p (loop)) | |
2036 | continue; | |
2037 | ||
be6b029b RG |
2038 | /* Initialize the worklist with stmts we seed the partitions with. */ |
2039 | bbs = get_loop_body_in_dom_order (loop); | |
2040 | for (i = 0; i < loop->num_nodes; ++i) | |
2041 | { | |
538dd0b7 DM |
2042 | for (gphi_iterator gsi = gsi_start_phis (bbs[i]); |
2043 | !gsi_end_p (gsi); | |
2044 | gsi_next (&gsi)) | |
deb6c11a | 2045 | { |
538dd0b7 | 2046 | gphi *phi = gsi.phi (); |
deb6c11a RB |
2047 | if (virtual_operand_p (gimple_phi_result (phi))) |
2048 | continue; | |
2049 | /* Distribute stmts which have defs that are used outside of | |
be55bfe6 | 2050 | the loop. */ |
deb6c11a RB |
2051 | if (!stmt_has_scalar_dependences_outside_loop (loop, phi)) |
2052 | continue; | |
2053 | work_list.safe_push (phi); | |
2054 | } | |
538dd0b7 DM |
2055 | for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); |
2056 | !gsi_end_p (gsi); | |
2057 | gsi_next (&gsi)) | |
be6b029b | 2058 | { |
355fe088 | 2059 | gimple *stmt = gsi_stmt (gsi); |
83a95546 RB |
2060 | |
2061 | /* If there is a stmt with side-effects bail out - we | |
be55bfe6 | 2062 | cannot and should not distribute this loop. */ |
83a95546 RB |
2063 | if (gimple_has_side_effects (stmt)) |
2064 | { | |
2065 | work_list.truncate (0); | |
2066 | goto out; | |
2067 | } | |
2068 | ||
b9fc0497 | 2069 | /* Distribute stmts which have defs that are used outside of |
be55bfe6 | 2070 | the loop. */ |
b9fc0497 RB |
2071 | if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) |
2072 | ; | |
2073 | /* Otherwise only distribute stores for now. */ | |
e179190c | 2074 | else if (!gimple_vdef (stmt)) |
be6b029b RG |
2075 | continue; |
2076 | ||
9771b263 | 2077 | work_list.safe_push (stmt); |
be6b029b RG |
2078 | } |
2079 | } | |
83a95546 | 2080 | out: |
be6b029b | 2081 | free (bbs); |
c014f6f5 | 2082 | |
826a536d RB |
2083 | int nb_generated_loops = 0; |
2084 | int nb_generated_calls = 0; | |
2085 | location_t loc = find_loop_location (loop); | |
9771b263 | 2086 | if (work_list.length () > 0) |
36875e8f RB |
2087 | { |
2088 | if (!cd) | |
2089 | { | |
ca406576 | 2090 | calculate_dominance_info (CDI_DOMINATORS); |
36875e8f | 2091 | calculate_dominance_info (CDI_POST_DOMINATORS); |
30fd2977 | 2092 | cd = new control_dependences (); |
36875e8f RB |
2093 | free_dominance_info (CDI_POST_DOMINATORS); |
2094 | } | |
b71b7a8e | 2095 | bool destroy_p; |
826a536d | 2096 | nb_generated_loops = distribute_loop (loop, work_list, cd, |
b71b7a8e RB |
2097 | &nb_generated_calls, |
2098 | &destroy_p); | |
2099 | if (destroy_p) | |
2100 | loops_to_be_destroyed.safe_push (loop); | |
36875e8f | 2101 | } |
c014f6f5 | 2102 | |
826a536d | 2103 | if (nb_generated_loops + nb_generated_calls > 0) |
dea61d92 | 2104 | { |
826a536d RB |
2105 | changed = true; |
2106 | dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, | |
2107 | loc, "Loop %d distributed: split to %d loops " | |
2108 | "and %d library calls.\n", | |
2109 | num, nb_generated_loops, nb_generated_calls); | |
dea61d92 | 2110 | } |
826a536d RB |
2111 | else if (dump_file && (dump_flags & TDF_DETAILS)) |
2112 | fprintf (dump_file, "Loop %d is the same.\n", num); | |
dea61d92 SP |
2113 | } |
2114 | ||
36875e8f RB |
2115 | if (cd) |
2116 | delete cd; | |
2117 | ||
3be57c56 BC |
2118 | if (bb_top_order_index != NULL) |
2119 | { | |
2120 | free (bb_top_order_index); | |
2121 | bb_top_order_index = NULL; | |
2122 | bb_top_order_index_size = 0; | |
2123 | } | |
2124 | ||
c014f6f5 RG |
2125 | if (changed) |
2126 | { | |
30fd2977 RB |
2127 | /* Destroy loop bodies that could not be reused. Do this late as we |
2128 | otherwise can end up refering to stale data in control dependences. */ | |
2129 | unsigned i; | |
2130 | FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop) | |
3be57c56 | 2131 | destroy_loop (loop); |
30fd2977 | 2132 | |
d0ed943c RB |
2133 | /* Cached scalar evolutions now may refer to wrong or non-existing |
2134 | loops. */ | |
2135 | scev_reset_htab (); | |
be55bfe6 | 2136 | mark_virtual_operands_for_renaming (fun); |
c014f6f5 RG |
2137 | rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); |
2138 | } | |
2139 | ||
b2b29377 | 2140 | checking_verify_loop_structure (); |
c014f6f5 | 2141 | |
5006671f | 2142 | return 0; |
dea61d92 SP |
2143 | } |
2144 | ||
27a4cd48 DM |
2145 | } // anon namespace |
2146 | ||
2147 | gimple_opt_pass * | |
2148 | make_pass_loop_distribution (gcc::context *ctxt) | |
2149 | { | |
2150 | return new pass_loop_distribution (ctxt); | |
2151 | } |