2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>.
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it
9 under the terms of the GNU General Public License as published by the
10 Free Software Foundation; either version 3, or (at your option) any
13 GCC is distributed in the hope that it will be useful, but WITHOUT
14 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
22 /* This pass performs loop distribution: for example, the loop
39 Loop distribution is the dual of loop fusion. It separates statements
40 of a loop (or loop nest) into multiple loops (or loop nests) with the
41 same loop header. The major goal is to separate statements which may
42 be vectorized from those that can't. This pass implements distribution
43 in the following steps:
45 1) Seed partitions with specific type statements. For now we support
46 two types seed statements: statement defining variable used outside
47 of loop; statement storing to memory.
48 2) Build reduced dependence graph (RDG) for loop to be distributed.
49 The vertices (RDG:V) model all statements in the loop and the edges
50 (RDG:E) model flow and control dependencies between statements.
51 3) Apart from RDG, compute data dependencies between memory references.
52 4) Starting from seed statement, build up partition by adding depended
53 statements according to RDG's dependence information. Partition is
54 classified as parallel type if it can be executed paralleled; or as
55 sequential type if it can't. Parallel type partition is further
56 classified as different builtin kinds if it can be implemented as
57 builtin function calls.
58 5) Build partition dependence graph (PG) based on data dependencies.
59 The vertices (PG:V) model all partitions and the edges (PG:E) model
60 all data dependencies between every partitions pair. In general,
61 data dependence is either compilation time known or unknown. In C
62 family languages, there exists quite amount compilation time unknown
63 dependencies because of possible alias relation of data references.
64 We categorize PG's edge to two types: "true" edge that represents
65 compilation time known data dependencies; "alias" edge for all other
67 6) Traverse subgraph of PG as if all "alias" edges don't exist. Merge
68 partitions in each strong connected component (SCC) correspondingly.
69 Build new PG for merged partitions.
70 7) Traverse PG again and this time with both "true" and "alias" edges
71 included. We try to break SCCs by removing some edges. Because
72 SCCs by "true" edges are all fused in step 6), we can break SCCs
73 by removing some "alias" edges. It's NP-hard to choose optimal
74 edge set, fortunately simple approximation is good enough for us
75 given the small problem scale.
76 8) Collect all data dependencies of the removed "alias" edges. Create
77 runtime alias checks for collected data dependencies.
78 9) Version loop under the condition of runtime alias checks. Given
79 loop distribution generally introduces additional overhead, it is
80 only useful if vectorization is achieved in distributed loop. We
81 version loop with internal function call IFN_LOOP_DIST_ALIAS. If
82 no distributed loop can be vectorized, we simply remove distributed
83 loops and recover to the original one.
86 1) We only distribute innermost loops now. This pass should handle loop
88 2) We only fuse partitions in SCC now. A better fusion algorithm is
89 desired to minimize loop overhead, maximize parallelism and maximize
94 #include "coretypes.h"
99 #include "tree-pass.h"
101 #include "gimple-pretty-print.h"
102 #include "fold-const.h"
104 #include "gimple-iterator.h"
105 #include "gimplify-me.h"
106 #include "stor-layout.h"
107 #include "tree-cfg.h"
108 #include "tree-ssa-loop-manip.h"
109 #include "tree-ssa-loop.h"
110 #include "tree-into-ssa.h"
111 #include "tree-ssa.h"
113 #include "tree-scalar-evolution.h"
115 #include "tree-vectorizer.h"
118 #define MAX_DATAREFS_NUM \
119 ((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
121 /* Hashtable helpers. */
123 struct ddr_hasher
: nofree_ptr_hash
<struct data_dependence_relation
>
125 static inline hashval_t
hash (const data_dependence_relation
*);
126 static inline bool equal (const data_dependence_relation
*,
127 const data_dependence_relation
*);
130 /* Hash function for data dependence. */
133 ddr_hasher::hash (const data_dependence_relation
*ddr
)
136 h
.add_ptr (DDR_A (ddr
));
137 h
.add_ptr (DDR_B (ddr
));
141 /* Hash table equality function for data dependence. */
144 ddr_hasher::equal (const data_dependence_relation
*ddr1
,
145 const data_dependence_relation
*ddr2
)
147 return (DDR_A (ddr1
) == DDR_A (ddr2
) && DDR_B (ddr1
) == DDR_B (ddr2
));
150 /* The loop (nest) to be distributed. */
151 static vec
<loop_p
> loop_nest
;
153 /* Vector of data references in the loop to be distributed. */
154 static vec
<data_reference_p
> datarefs_vec
;
156 /* Store index of data reference in aux field. */
157 #define DR_INDEX(dr) ((uintptr_t) (dr)->aux)
159 /* Hash table for data dependence relation in the loop to be distributed. */
160 static hash_table
<ddr_hasher
> ddrs_table (389);
163 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
166 /* The statement represented by this vertex. */
169 /* Vector of data-references in this statement. */
170 vec
<data_reference_p
> datarefs
;
172 /* True when the statement contains a write to memory. */
175 /* True when the statement contains a read from memory. */
179 #define RDGV_STMT(V) ((struct rdg_vertex *) ((V)->data))->stmt
180 #define RDGV_DATAREFS(V) ((struct rdg_vertex *) ((V)->data))->datarefs
181 #define RDGV_HAS_MEM_WRITE(V) ((struct rdg_vertex *) ((V)->data))->has_mem_write
182 #define RDGV_HAS_MEM_READS(V) ((struct rdg_vertex *) ((V)->data))->has_mem_reads
183 #define RDG_STMT(RDG, I) RDGV_STMT (&(RDG->vertices[I]))
184 #define RDG_DATAREFS(RDG, I) RDGV_DATAREFS (&(RDG->vertices[I]))
185 #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
186 #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
188 /* Data dependence type. */
192 /* Read After Write (RAW). */
195 /* Control dependence (execute conditional on). */
199 /* Dependence information attached to an edge of the RDG. */
203 /* Type of the dependence. */
204 enum rdg_dep_type type
;
207 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
209 /* Dump vertex I in RDG to FILE. */
212 dump_rdg_vertex (FILE *file
, struct graph
*rdg
, int i
)
214 struct vertex
*v
= &(rdg
->vertices
[i
]);
215 struct graph_edge
*e
;
217 fprintf (file
, "(vertex %d: (%s%s) (in:", i
,
218 RDG_MEM_WRITE_STMT (rdg
, i
) ? "w" : "",
219 RDG_MEM_READS_STMT (rdg
, i
) ? "r" : "");
222 for (e
= v
->pred
; e
; e
= e
->pred_next
)
223 fprintf (file
, " %d", e
->src
);
225 fprintf (file
, ") (out:");
228 for (e
= v
->succ
; e
; e
= e
->succ_next
)
229 fprintf (file
, " %d", e
->dest
);
231 fprintf (file
, ")\n");
232 print_gimple_stmt (file
, RDGV_STMT (v
), 0, TDF_VOPS
|TDF_MEMSYMS
);
233 fprintf (file
, ")\n");
236 /* Call dump_rdg_vertex on stderr. */
239 debug_rdg_vertex (struct graph
*rdg
, int i
)
241 dump_rdg_vertex (stderr
, rdg
, i
);
244 /* Dump the reduced dependence graph RDG to FILE. */
247 dump_rdg (FILE *file
, struct graph
*rdg
)
249 fprintf (file
, "(rdg\n");
250 for (int i
= 0; i
< rdg
->n_vertices
; i
++)
251 dump_rdg_vertex (file
, rdg
, i
);
252 fprintf (file
, ")\n");
255 /* Call dump_rdg on stderr. */
258 debug_rdg (struct graph
*rdg
)
260 dump_rdg (stderr
, rdg
);
264 dot_rdg_1 (FILE *file
, struct graph
*rdg
)
267 pretty_printer buffer
;
268 pp_needs_newline (&buffer
) = false;
269 buffer
.buffer
->stream
= file
;
271 fprintf (file
, "digraph RDG {\n");
273 for (i
= 0; i
< rdg
->n_vertices
; i
++)
275 struct vertex
*v
= &(rdg
->vertices
[i
]);
276 struct graph_edge
*e
;
278 fprintf (file
, "%d [label=\"[%d] ", i
, i
);
279 pp_gimple_stmt_1 (&buffer
, RDGV_STMT (v
), 0, TDF_SLIM
);
281 fprintf (file
, "\"]\n");
283 /* Highlight reads from memory. */
284 if (RDG_MEM_READS_STMT (rdg
, i
))
285 fprintf (file
, "%d [style=filled, fillcolor=green]\n", i
);
287 /* Highlight stores to memory. */
288 if (RDG_MEM_WRITE_STMT (rdg
, i
))
289 fprintf (file
, "%d [style=filled, fillcolor=red]\n", i
);
292 for (e
= v
->succ
; e
; e
= e
->succ_next
)
293 switch (RDGE_TYPE (e
))
296 /* These are the most common dependences: don't print these. */
297 fprintf (file
, "%d -> %d \n", i
, e
->dest
);
301 fprintf (file
, "%d -> %d [label=control] \n", i
, e
->dest
);
309 fprintf (file
, "}\n\n");
312 /* Display the Reduced Dependence Graph using dotty. */
315 dot_rdg (struct graph
*rdg
)
317 /* When debugging, you may want to enable the following code. */
319 FILE *file
= popen ("dot -Tx11", "w");
322 dot_rdg_1 (file
, rdg
);
324 close (fileno (file
));
327 dot_rdg_1 (stderr
, rdg
);
331 /* Returns the index of STMT in RDG. */
334 rdg_vertex_for_stmt (struct graph
*rdg ATTRIBUTE_UNUSED
, gimple
*stmt
)
336 int index
= gimple_uid (stmt
);
337 gcc_checking_assert (index
== -1 || RDG_STMT (rdg
, index
) == stmt
);
341 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
342 the index of DEF in RDG. */
345 create_rdg_edges_for_scalar (struct graph
*rdg
, tree def
, int idef
)
347 use_operand_p imm_use_p
;
348 imm_use_iterator iterator
;
350 FOR_EACH_IMM_USE_FAST (imm_use_p
, iterator
, def
)
352 struct graph_edge
*e
;
353 int use
= rdg_vertex_for_stmt (rdg
, USE_STMT (imm_use_p
));
358 e
= add_edge (rdg
, idef
, use
);
359 e
->data
= XNEW (struct rdg_edge
);
360 RDGE_TYPE (e
) = flow_dd
;
364 /* Creates an edge for the control dependences of BB to the vertex V. */
367 create_edge_for_control_dependence (struct graph
*rdg
, basic_block bb
,
368 int v
, control_dependences
*cd
)
372 EXECUTE_IF_SET_IN_BITMAP (cd
->get_edges_dependent_on (bb
->index
),
375 basic_block cond_bb
= cd
->get_edge_src (edge_n
);
376 gimple
*stmt
= last_stmt (cond_bb
);
377 if (stmt
&& is_ctrl_stmt (stmt
))
379 struct graph_edge
*e
;
380 int c
= rdg_vertex_for_stmt (rdg
, stmt
);
384 e
= add_edge (rdg
, c
, v
);
385 e
->data
= XNEW (struct rdg_edge
);
386 RDGE_TYPE (e
) = control_dd
;
391 /* Creates the edges of the reduced dependence graph RDG. */
394 create_rdg_flow_edges (struct graph
*rdg
)
400 for (i
= 0; i
< rdg
->n_vertices
; i
++)
401 FOR_EACH_PHI_OR_STMT_DEF (def_p
, RDG_STMT (rdg
, i
),
403 create_rdg_edges_for_scalar (rdg
, DEF_FROM_PTR (def_p
), i
);
406 /* Creates the edges of the reduced dependence graph RDG. */
409 create_rdg_cd_edges (struct graph
*rdg
, control_dependences
*cd
, loop_p loop
)
413 for (i
= 0; i
< rdg
->n_vertices
; i
++)
415 gimple
*stmt
= RDG_STMT (rdg
, i
);
416 if (gimple_code (stmt
) == GIMPLE_PHI
)
420 FOR_EACH_EDGE (e
, ei
, gimple_bb (stmt
)->preds
)
421 if (flow_bb_inside_loop_p (loop
, e
->src
))
422 create_edge_for_control_dependence (rdg
, e
->src
, i
, cd
);
425 create_edge_for_control_dependence (rdg
, gimple_bb (stmt
), i
, cd
);
429 /* Build the vertices of the reduced dependence graph RDG. Return false
433 create_rdg_vertices (struct graph
*rdg
, vec
<gimple
*> stmts
, loop_p loop
)
438 FOR_EACH_VEC_ELT (stmts
, i
, stmt
)
440 struct vertex
*v
= &(rdg
->vertices
[i
]);
442 /* Record statement to vertex mapping. */
443 gimple_set_uid (stmt
, i
);
445 v
->data
= XNEW (struct rdg_vertex
);
446 RDGV_STMT (v
) = stmt
;
447 RDGV_DATAREFS (v
).create (0);
448 RDGV_HAS_MEM_WRITE (v
) = false;
449 RDGV_HAS_MEM_READS (v
) = false;
450 if (gimple_code (stmt
) == GIMPLE_PHI
)
453 unsigned drp
= datarefs_vec
.length ();
454 if (!find_data_references_in_stmt (loop
, stmt
, &datarefs_vec
))
456 for (unsigned j
= drp
; j
< datarefs_vec
.length (); ++j
)
458 data_reference_p dr
= datarefs_vec
[j
];
460 RDGV_HAS_MEM_READS (v
) = true;
462 RDGV_HAS_MEM_WRITE (v
) = true;
463 RDGV_DATAREFS (v
).safe_push (dr
);
469 /* Array mapping basic block's index to its topological order. */
470 static int *bb_top_order_index
;
471 /* And size of the array. */
472 static int bb_top_order_index_size
;
474 /* If X has a smaller topological sort number than Y, returns -1;
475 if greater, returns 1. */
478 bb_top_order_cmp (const void *x
, const void *y
)
480 basic_block bb1
= *(const basic_block
*) x
;
481 basic_block bb2
= *(const basic_block
*) y
;
483 gcc_assert (bb1
->index
< bb_top_order_index_size
484 && bb2
->index
< bb_top_order_index_size
);
485 gcc_assert (bb1
== bb2
486 || bb_top_order_index
[bb1
->index
]
487 != bb_top_order_index
[bb2
->index
]);
489 return (bb_top_order_index
[bb1
->index
] - bb_top_order_index
[bb2
->index
]);
492 /* Initialize STMTS with all the statements of LOOP. We use topological
493 order to discover all statements. The order is important because
494 generate_loops_for_partition is using the same traversal for identifying
495 statements in loop copies. */
498 stmts_from_loop (struct loop
*loop
, vec
<gimple
*> *stmts
)
501 basic_block
*bbs
= get_loop_body_in_custom_order (loop
, bb_top_order_cmp
);
503 for (i
= 0; i
< loop
->num_nodes
; i
++)
505 basic_block bb
= bbs
[i
];
507 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
509 if (!virtual_operand_p (gimple_phi_result (bsi
.phi ())))
510 stmts
->safe_push (bsi
.phi ());
512 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);
515 gimple
*stmt
= gsi_stmt (bsi
);
516 if (gimple_code (stmt
) != GIMPLE_LABEL
&& !is_gimple_debug (stmt
))
517 stmts
->safe_push (stmt
);
524 /* Free the reduced dependence graph RDG. */
527 free_rdg (struct graph
*rdg
)
531 for (i
= 0; i
< rdg
->n_vertices
; i
++)
533 struct vertex
*v
= &(rdg
->vertices
[i
]);
534 struct graph_edge
*e
;
536 for (e
= v
->succ
; e
; e
= e
->succ_next
)
541 gimple_set_uid (RDGV_STMT (v
), -1);
542 (RDGV_DATAREFS (v
)).release ();
550 /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
551 LOOP, and one edge per flow dependence or control dependence from control
552 dependence CD. During visiting each statement, data references are also
553 collected and recorded in global data DATAREFS_VEC. */
555 static struct graph
*
556 build_rdg (struct loop
*loop
, control_dependences
*cd
)
560 /* Create the RDG vertices from the stmts of the loop nest. */
561 auto_vec
<gimple
*, 10> stmts
;
562 stmts_from_loop (loop
, &stmts
);
563 rdg
= new_graph (stmts
.length ());
564 if (!create_rdg_vertices (rdg
, stmts
, loop
))
571 create_rdg_flow_edges (rdg
);
573 create_rdg_cd_edges (rdg
, cd
, loop
);
579 /* Kind of distributed loop. */
580 enum partition_kind
{
581 PKIND_NORMAL
, PKIND_MEMSET
, PKIND_MEMCPY
, PKIND_MEMMOVE
584 /* Type of distributed loop. */
585 enum partition_type
{
586 /* The distributed loop can be executed parallelly. */
588 /* The distributed loop has to be executed sequentially. */
592 /* Partition for loop distribution. */
595 /* Statements of the partition. */
597 /* Loops of the partition. */
599 /* True if the partition defines variable which is used outside of loop. */
601 /* For builtin partition, true if it executes one iteration more than
602 number of loop (latch) iterations. */
604 enum partition_kind kind
;
605 enum partition_type type
;
606 /* data-references a kind != PKIND_NORMAL partition is about. */
607 data_reference_p main_dr
;
608 data_reference_p secondary_dr
;
609 /* Number of loop (latch) iterations. */
611 /* Data references in the partition. */
616 /* Allocate and initialize a partition from BITMAP. */
619 partition_alloc (void)
621 partition
*partition
= XCNEW (struct partition
);
622 partition
->stmts
= BITMAP_ALLOC (NULL
);
623 partition
->loops
= BITMAP_ALLOC (NULL
);
624 partition
->reduction_p
= false;
625 partition
->kind
= PKIND_NORMAL
;
626 partition
->datarefs
= BITMAP_ALLOC (NULL
);
630 /* Free PARTITION. */
633 partition_free (partition
*partition
)
635 BITMAP_FREE (partition
->stmts
);
636 BITMAP_FREE (partition
->loops
);
637 BITMAP_FREE (partition
->datarefs
);
641 /* Returns true if the partition can be generated as a builtin. */
644 partition_builtin_p (partition
*partition
)
646 return partition
->kind
!= PKIND_NORMAL
;
649 /* Returns true if the partition contains a reduction. */
652 partition_reduction_p (partition
*partition
)
654 return partition
->reduction_p
;
657 /* Partitions are fused because of different reasons. */
660 FUSE_NON_BUILTIN
= 0,
667 /* Description on different fusing reason. */
668 static const char *fuse_message
[] = {
669 "they are non-builtins",
670 "they have reductions",
671 "they have shared memory refs",
672 "they are in the same dependence scc",
673 "there is no point to distribute loop"};
676 update_type_for_merge (struct graph
*, partition
*, partition
*);
678 /* Merge PARTITION into the partition DEST. RDG is the reduced dependence
679 graph and we update type for result partition if it is non-NULL. */
682 partition_merge_into (struct graph
*rdg
, partition
*dest
,
683 partition
*partition
, enum fuse_type ft
)
685 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
687 fprintf (dump_file
, "Fuse partitions because %s:\n", fuse_message
[ft
]);
688 fprintf (dump_file
, " Part 1: ");
689 dump_bitmap (dump_file
, dest
->stmts
);
690 fprintf (dump_file
, " Part 2: ");
691 dump_bitmap (dump_file
, partition
->stmts
);
694 dest
->kind
= PKIND_NORMAL
;
695 if (dest
->type
== PTYPE_PARALLEL
)
696 dest
->type
= partition
->type
;
698 bitmap_ior_into (dest
->stmts
, partition
->stmts
);
699 if (partition_reduction_p (partition
))
700 dest
->reduction_p
= true;
702 /* Further check if any data dependence prevents us from executing the
703 new partition parallelly. */
704 if (dest
->type
== PTYPE_PARALLEL
&& rdg
!= NULL
)
705 update_type_for_merge (rdg
, dest
, partition
);
707 bitmap_ior_into (dest
->datarefs
, partition
->datarefs
);
711 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
715 ssa_name_has_uses_outside_loop_p (tree def
, loop_p loop
)
717 imm_use_iterator imm_iter
;
720 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, def
)
722 gimple
*use_stmt
= USE_STMT (use_p
);
723 if (!is_gimple_debug (use_stmt
)
724 && loop
!= loop_containing_stmt (use_stmt
))
731 /* Returns true when STMT defines a scalar variable used after the
735 stmt_has_scalar_dependences_outside_loop (loop_p loop
, gimple
*stmt
)
740 if (gimple_code (stmt
) == GIMPLE_PHI
)
741 return ssa_name_has_uses_outside_loop_p (gimple_phi_result (stmt
), loop
);
743 FOR_EACH_SSA_DEF_OPERAND (def_p
, stmt
, op_iter
, SSA_OP_DEF
)
744 if (ssa_name_has_uses_outside_loop_p (DEF_FROM_PTR (def_p
), loop
))
750 /* Return a copy of LOOP placed before LOOP. */
753 copy_loop_before (struct loop
*loop
)
756 edge preheader
= loop_preheader_edge (loop
);
758 initialize_original_copy_tables ();
759 res
= slpeel_tree_duplicate_loop_to_edge_cfg (loop
, NULL
, preheader
);
760 gcc_assert (res
!= NULL
);
761 free_original_copy_tables ();
762 delete_update_ssa ();
767 /* Creates an empty basic block after LOOP. */
770 create_bb_after_loop (struct loop
*loop
)
772 edge exit
= single_exit (loop
);
780 /* Generate code for PARTITION from the code in LOOP. The loop is
781 copied when COPY_P is true. All the statements not flagged in the
782 PARTITION bitmap are removed from the loop or from its copy. The
783 statements are indexed in sequence inside a basic block, and the
784 basic blocks of a loop are taken in dom order. */
787 generate_loops_for_partition (struct loop
*loop
, partition
*partition
,
795 int orig_loop_num
= loop
->orig_loop_num
;
796 loop
= copy_loop_before (loop
);
797 gcc_assert (loop
!= NULL
);
798 loop
->orig_loop_num
= orig_loop_num
;
799 create_preheader (loop
, CP_SIMPLE_PREHEADERS
);
800 create_bb_after_loop (loop
);
804 /* Origin number is set to the new versioned loop's num. */
805 gcc_assert (loop
->orig_loop_num
!= loop
->num
);
808 /* Remove stmts not in the PARTITION bitmap. */
809 bbs
= get_loop_body_in_dom_order (loop
);
811 if (MAY_HAVE_DEBUG_STMTS
)
812 for (i
= 0; i
< loop
->num_nodes
; i
++)
814 basic_block bb
= bbs
[i
];
816 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);
819 gphi
*phi
= bsi
.phi ();
820 if (!virtual_operand_p (gimple_phi_result (phi
))
821 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
822 reset_debug_uses (phi
);
825 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
827 gimple
*stmt
= gsi_stmt (bsi
);
828 if (gimple_code (stmt
) != GIMPLE_LABEL
829 && !is_gimple_debug (stmt
)
830 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
831 reset_debug_uses (stmt
);
835 for (i
= 0; i
< loop
->num_nodes
; i
++)
837 basic_block bb
= bbs
[i
];
839 for (gphi_iterator bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
);)
841 gphi
*phi
= bsi
.phi ();
842 if (!virtual_operand_p (gimple_phi_result (phi
))
843 && !bitmap_bit_p (partition
->stmts
, gimple_uid (phi
)))
844 remove_phi_node (&bsi
, true);
849 for (gimple_stmt_iterator bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
);)
851 gimple
*stmt
= gsi_stmt (bsi
);
852 if (gimple_code (stmt
) != GIMPLE_LABEL
853 && !is_gimple_debug (stmt
)
854 && !bitmap_bit_p (partition
->stmts
, gimple_uid (stmt
)))
856 /* Choose an arbitrary path through the empty CFG part
857 that this unnecessary control stmt controls. */
858 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
860 gimple_cond_make_false (cond_stmt
);
863 else if (gimple_code (stmt
) == GIMPLE_SWITCH
)
865 gswitch
*switch_stmt
= as_a
<gswitch
*> (stmt
);
866 gimple_switch_set_index
867 (switch_stmt
, CASE_LOW (gimple_switch_label (switch_stmt
, 1)));
872 unlink_stmt_vdef (stmt
);
873 gsi_remove (&bsi
, true);
885 /* Build the size argument for a memory operation call. */
888 build_size_arg_loc (location_t loc
, data_reference_p dr
, tree nb_iter
,
891 tree size
= fold_convert_loc (loc
, sizetype
, nb_iter
);
893 size
= size_binop (PLUS_EXPR
, size
, size_one_node
);
894 size
= fold_build2_loc (loc
, MULT_EXPR
, sizetype
, size
,
895 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
896 size
= fold_convert_loc (loc
, size_type_node
, size
);
900 /* Build an address argument for a memory operation call. */
903 build_addr_arg_loc (location_t loc
, data_reference_p dr
, tree nb_bytes
)
907 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, DR_OFFSET (dr
), DR_INIT (dr
));
908 addr_base
= fold_convert_loc (loc
, sizetype
, addr_base
);
910 /* Test for a negative stride, iterating over every element. */
911 if (tree_int_cst_sgn (DR_STEP (dr
)) == -1)
913 addr_base
= size_binop_loc (loc
, MINUS_EXPR
, addr_base
,
914 fold_convert_loc (loc
, sizetype
, nb_bytes
));
915 addr_base
= size_binop_loc (loc
, PLUS_EXPR
, addr_base
,
916 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
))));
919 return fold_build_pointer_plus_loc (loc
, DR_BASE_ADDRESS (dr
), addr_base
);
922 /* If VAL memory representation contains the same value in all bytes,
923 return that value, otherwise return -1.
924 E.g. for 0x24242424 return 0x24, for IEEE double
925 747708026454360457216.0 return 0x44, etc. */
928 const_with_all_bytes_same (tree val
)
930 unsigned char buf
[64];
933 if (integer_zerop (val
)
934 || (TREE_CODE (val
) == CONSTRUCTOR
935 && !TREE_CLOBBER_P (val
)
936 && CONSTRUCTOR_NELTS (val
) == 0))
939 if (real_zerop (val
))
941 /* Only return 0 for +0.0, not for -0.0, which doesn't have
942 an all bytes same memory representation. Don't transform
943 -0.0 stores into +0.0 even for !HONOR_SIGNED_ZEROS. */
944 switch (TREE_CODE (val
))
947 if (!real_isneg (TREE_REAL_CST_PTR (val
)))
951 if (!const_with_all_bytes_same (TREE_REALPART (val
))
952 && !const_with_all_bytes_same (TREE_IMAGPART (val
)))
957 for (j
= 0; j
< VECTOR_CST_NELTS (val
); ++j
)
958 if (const_with_all_bytes_same (VECTOR_CST_ELT (val
, j
)))
960 if (j
== VECTOR_CST_NELTS (val
))
968 if (CHAR_BIT
!= 8 || BITS_PER_UNIT
!= 8)
971 len
= native_encode_expr (val
, buf
, sizeof (buf
));
974 for (i
= 1; i
< len
; i
++)
975 if (buf
[i
] != buf
[0])
980 /* Generate a call to memset for PARTITION in LOOP. */
983 generate_memset_builtin (struct loop
*loop
, partition
*partition
)
985 gimple_stmt_iterator gsi
;
986 gimple
*stmt
, *fn_call
;
987 tree mem
, fn
, nb_bytes
;
991 stmt
= DR_STMT (partition
->main_dr
);
992 loc
= gimple_location (stmt
);
994 /* The new statements will be placed before LOOP. */
995 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
997 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
998 partition
->plus_one
);
999 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
1000 false, GSI_CONTINUE_LINKING
);
1001 mem
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
1002 mem
= force_gimple_operand_gsi (&gsi
, mem
, true, NULL_TREE
,
1003 false, GSI_CONTINUE_LINKING
);
1005 /* This exactly matches the pattern recognition in classify_partition. */
1006 val
= gimple_assign_rhs1 (stmt
);
1007 /* Handle constants like 0x15151515 and similarly
1008 floating point constants etc. where all bytes are the same. */
1009 int bytev
= const_with_all_bytes_same (val
);
1011 val
= build_int_cst (integer_type_node
, bytev
);
1012 else if (TREE_CODE (val
) == INTEGER_CST
)
1013 val
= fold_convert (integer_type_node
, val
);
1014 else if (!useless_type_conversion_p (integer_type_node
, TREE_TYPE (val
)))
1016 tree tem
= make_ssa_name (integer_type_node
);
1017 gimple
*cstmt
= gimple_build_assign (tem
, NOP_EXPR
, val
);
1018 gsi_insert_after (&gsi
, cstmt
, GSI_CONTINUE_LINKING
);
1022 fn
= build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET
));
1023 fn_call
= gimple_build_call (fn
, 3, mem
, val
, nb_bytes
);
1024 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
1026 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1028 fprintf (dump_file
, "generated memset");
1030 fprintf (dump_file
, " zero\n");
1032 fprintf (dump_file
, "\n");
1036 /* Generate a call to memcpy for PARTITION in LOOP. */
1039 generate_memcpy_builtin (struct loop
*loop
, partition
*partition
)
1041 gimple_stmt_iterator gsi
;
1042 gimple
*stmt
, *fn_call
;
1043 tree dest
, src
, fn
, nb_bytes
;
1045 enum built_in_function kind
;
1047 stmt
= DR_STMT (partition
->main_dr
);
1048 loc
= gimple_location (stmt
);
1050 /* The new statements will be placed before LOOP. */
1051 gsi
= gsi_last_bb (loop_preheader_edge (loop
)->src
);
1053 nb_bytes
= build_size_arg_loc (loc
, partition
->main_dr
, partition
->niter
,
1054 partition
->plus_one
);
1055 nb_bytes
= force_gimple_operand_gsi (&gsi
, nb_bytes
, true, NULL_TREE
,
1056 false, GSI_CONTINUE_LINKING
);
1057 dest
= build_addr_arg_loc (loc
, partition
->main_dr
, nb_bytes
);
1058 src
= build_addr_arg_loc (loc
, partition
->secondary_dr
, nb_bytes
);
1059 if (partition
->kind
== PKIND_MEMCPY
1060 || ! ptr_derefs_may_alias_p (dest
, src
))
1061 kind
= BUILT_IN_MEMCPY
;
1063 kind
= BUILT_IN_MEMMOVE
;
1065 dest
= force_gimple_operand_gsi (&gsi
, dest
, true, NULL_TREE
,
1066 false, GSI_CONTINUE_LINKING
);
1067 src
= force_gimple_operand_gsi (&gsi
, src
, true, NULL_TREE
,
1068 false, GSI_CONTINUE_LINKING
);
1069 fn
= build_fold_addr_expr (builtin_decl_implicit (kind
));
1070 fn_call
= gimple_build_call (fn
, 3, dest
, src
, nb_bytes
);
1071 gsi_insert_after (&gsi
, fn_call
, GSI_CONTINUE_LINKING
);
1073 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1075 if (kind
== BUILT_IN_MEMCPY
)
1076 fprintf (dump_file
, "generated memcpy\n");
1078 fprintf (dump_file
, "generated memmove\n");
1082 /* Remove and destroy the loop LOOP. */
1085 destroy_loop (struct loop
*loop
)
1087 unsigned nbbs
= loop
->num_nodes
;
1088 edge exit
= single_exit (loop
);
1089 basic_block src
= loop_preheader_edge (loop
)->src
, dest
= exit
->dest
;
1093 bbs
= get_loop_body_in_dom_order (loop
);
1095 redirect_edge_pred (exit
, src
);
1096 exit
->flags
&= ~(EDGE_TRUE_VALUE
|EDGE_FALSE_VALUE
);
1097 exit
->flags
|= EDGE_FALLTHRU
;
1098 cancel_loop_tree (loop
);
1099 rescan_loop_exit (exit
, false, true);
1104 /* We have made sure to not leave any dangling uses of SSA
1105 names defined in the loop. With the exception of virtuals.
1106 Make sure we replace all uses of virtual defs that will remain
1107 outside of the loop with the bare symbol as delete_basic_block
1108 will release them. */
1110 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]); !gsi_end_p (gsi
);
1113 gphi
*phi
= gsi
.phi ();
1114 if (virtual_operand_p (gimple_phi_result (phi
)))
1115 mark_virtual_phi_result_for_renaming (phi
);
1117 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
);
1120 gimple
*stmt
= gsi_stmt (gsi
);
1121 tree vdef
= gimple_vdef (stmt
);
1122 if (vdef
&& TREE_CODE (vdef
) == SSA_NAME
)
1123 mark_virtual_operand_for_renaming (vdef
);
1125 delete_basic_block (bbs
[i
]);
1131 set_immediate_dominator (CDI_DOMINATORS
, dest
,
1132 recompute_dominator (CDI_DOMINATORS
, dest
));
1135 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
1138 generate_code_for_partition (struct loop
*loop
,
1139 partition
*partition
, bool copy_p
)
1141 switch (partition
->kind
)
1144 /* Reductions all have to be in the last partition. */
1145 gcc_assert (!partition_reduction_p (partition
)
1147 generate_loops_for_partition (loop
, partition
, copy_p
);
1151 generate_memset_builtin (loop
, partition
);
1156 generate_memcpy_builtin (loop
, partition
);
1163 /* Common tail for partitions we turn into a call. If this was the last
1164 partition for which we generate code, we have to destroy the loop. */
1170 /* Return data dependence relation for data references A and B. The two
1171 data references must be in lexicographic order wrto reduced dependence
1172 graph RDG. We firstly try to find ddr from global ddr hash table. If
1173 it doesn't exist, compute the ddr and cache it. */
1175 static data_dependence_relation
*
1176 get_data_dependence (struct graph
*rdg
, data_reference_p a
, data_reference_p b
)
1178 struct data_dependence_relation ent
, **slot
;
1179 struct data_dependence_relation
*ddr
;
1181 gcc_assert (DR_IS_WRITE (a
) || DR_IS_WRITE (b
));
1182 gcc_assert (rdg_vertex_for_stmt (rdg
, DR_STMT (a
))
1183 <= rdg_vertex_for_stmt (rdg
, DR_STMT (b
)));
1186 slot
= ddrs_table
.find_slot (&ent
, INSERT
);
1189 ddr
= initialize_data_dependence_relation (a
, b
, loop_nest
);
1190 compute_affine_dependence (ddr
, loop_nest
[0]);
1197 /* In reduced dependence graph RDG for loop distribution, return true if
1198 dependence between references DR1 and DR2 leads to a dependence cycle
1199 and such dependence cycle can't be resolved by runtime alias check. */
1202 data_dep_in_cycle_p (struct graph
*rdg
,
1203 data_reference_p dr1
, data_reference_p dr2
)
1205 struct data_dependence_relation
*ddr
;
1207 /* Re-shuffle data-refs to be in topological order. */
1208 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1209 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1210 std::swap (dr1
, dr2
);
1212 ddr
= get_data_dependence (rdg
, dr1
, dr2
);
1214 /* In case of no data dependence. */
1215 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1217 /* For unknown data dependence or known data dependence which can't be
1218 expressed in classic distance vector, we check if it can be resolved
1219 by runtime alias check. If yes, we still consider data dependence
1220 as won't introduce data dependence cycle. */
1221 else if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
1222 || DDR_NUM_DIST_VECTS (ddr
) == 0)
1223 return !runtime_alias_check_p (ddr
, NULL
, true);
1224 else if (DDR_NUM_DIST_VECTS (ddr
) > 1)
1226 else if (DDR_REVERSED_P (ddr
)
1227 || lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1233 /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
1234 PARTITION1's type after merging PARTITION2 into PARTITION1. */
1237 update_type_for_merge (struct graph
*rdg
,
1238 partition
*partition1
, partition
*partition2
)
1241 bitmap_iterator bi
, bj
;
1242 data_reference_p dr1
, dr2
;
1244 EXECUTE_IF_SET_IN_BITMAP (partition1
->datarefs
, 0, i
, bi
)
1246 unsigned start
= (partition1
== partition2
) ? i
+ 1 : 0;
1248 dr1
= datarefs_vec
[i
];
1249 EXECUTE_IF_SET_IN_BITMAP (partition2
->datarefs
, start
, j
, bj
)
1251 dr2
= datarefs_vec
[j
];
1252 if (DR_IS_READ (dr1
) && DR_IS_READ (dr2
))
1255 /* Partition can only be executed sequentially if there is any
1256 data dependence cycle. */
1257 if (data_dep_in_cycle_p (rdg
, dr1
, dr2
))
1259 partition1
->type
= PTYPE_SEQUENTIAL
;
1266 /* Returns a partition with all the statements needed for computing
1267 the vertex V of the RDG, also including the loop exit conditions. */
1270 build_rdg_partition_for_vertex (struct graph
*rdg
, int v
)
1272 partition
*partition
= partition_alloc ();
1273 auto_vec
<int, 3> nodes
;
1276 data_reference_p dr
;
1278 graphds_dfs (rdg
, &v
, 1, &nodes
, false, NULL
);
1280 FOR_EACH_VEC_ELT (nodes
, i
, x
)
1282 bitmap_set_bit (partition
->stmts
, x
);
1283 bitmap_set_bit (partition
->loops
,
1284 loop_containing_stmt (RDG_STMT (rdg
, x
))->num
);
1286 for (j
= 0; RDG_DATAREFS (rdg
, x
).iterate (j
, &dr
); ++j
)
1288 unsigned idx
= (unsigned) DR_INDEX (dr
);
1289 gcc_assert (idx
< datarefs_vec
.length ());
1291 /* Partition can only be executed sequentially if there is any
1292 unknown data reference. */
1293 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
)
1294 || !DR_INIT (dr
) || !DR_STEP (dr
))
1295 partition
->type
= PTYPE_SEQUENTIAL
;
1297 bitmap_set_bit (partition
->datarefs
, idx
);
1301 if (partition
->type
== PTYPE_SEQUENTIAL
)
1304 /* Further check if any data dependence prevents us from executing the
1305 partition parallelly. */
1306 update_type_for_merge (rdg
, partition
, partition
);
1311 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
1312 For the moment we detect memset, memcpy and memmove patterns. Bitmap
1313 STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */
1316 classify_partition (loop_p loop
, struct graph
*rdg
, partition
*partition
,
1317 bitmap stmt_in_all_partitions
)
1322 data_reference_p single_load
, single_store
;
1323 bool volatiles_p
= false, plus_one
= false, has_reduction
= false;
1325 partition
->kind
= PKIND_NORMAL
;
1326 partition
->main_dr
= NULL
;
1327 partition
->secondary_dr
= NULL
;
1328 partition
->niter
= NULL_TREE
;
1329 partition
->plus_one
= false;
1331 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1333 gimple
*stmt
= RDG_STMT (rdg
, i
);
1335 if (gimple_has_volatile_ops (stmt
))
1338 /* If the stmt is not included by all partitions and there is uses
1339 outside of the loop, then mark the partition as reduction. */
1340 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
1342 /* Due to limitation in the transform phase we have to fuse all
1343 reduction partitions. As a result, this could cancel valid
1344 loop distribution especially for loop that induction variable
1345 is used outside of loop. To workaround this issue, we skip
1346 marking partition as reudction if the reduction stmt belongs
1347 to all partitions. In such case, reduction will be computed
1348 correctly no matter how partitions are fused/distributed. */
1349 if (!bitmap_bit_p (stmt_in_all_partitions
, i
))
1351 partition
->reduction_p
= true;
1354 has_reduction
= true;
1358 /* Perform general partition disqualification for builtins. */
1360 /* Simple workaround to prevent classifying the partition as builtin
1361 if it contains any use outside of loop. */
1363 || !flag_tree_loop_distribute_patterns
)
1366 /* Detect memset and memcpy. */
1368 single_store
= NULL
;
1369 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, bi
)
1371 gimple
*stmt
= RDG_STMT (rdg
, i
);
1372 data_reference_p dr
;
1375 if (gimple_code (stmt
) == GIMPLE_PHI
)
1378 /* Any scalar stmts are ok. */
1379 if (!gimple_vuse (stmt
))
1382 /* Otherwise just regular loads/stores. */
1383 if (!gimple_assign_single_p (stmt
))
1386 /* But exactly one store and/or load. */
1387 for (j
= 0; RDG_DATAREFS (rdg
, i
).iterate (j
, &dr
); ++j
)
1389 tree type
= TREE_TYPE (DR_REF (dr
));
1391 /* The memset, memcpy and memmove library calls are only
1392 able to deal with generic address space. */
1393 if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type
)))
1396 if (DR_IS_READ (dr
))
1398 if (single_load
!= NULL
)
1404 if (single_store
!= NULL
)
1414 nb_iter
= number_of_latch_executions (loop
);
1415 if (!nb_iter
|| nb_iter
== chrec_dont_know
)
1417 if (dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
1418 gimple_bb (DR_STMT (single_store
))))
1421 if (single_store
&& !single_load
)
1423 gimple
*stmt
= DR_STMT (single_store
);
1424 tree rhs
= gimple_assign_rhs1 (stmt
);
1425 if (const_with_all_bytes_same (rhs
) == -1
1426 && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs
))
1427 || (TYPE_MODE (TREE_TYPE (rhs
))
1428 != TYPE_MODE (unsigned_char_type_node
))))
1430 if (TREE_CODE (rhs
) == SSA_NAME
1431 && !SSA_NAME_IS_DEFAULT_DEF (rhs
)
1432 && flow_bb_inside_loop_p (loop
, gimple_bb (SSA_NAME_DEF_STMT (rhs
))))
1434 if (!adjacent_dr_p (single_store
)
1435 || !dominated_by_p (CDI_DOMINATORS
,
1436 loop
->latch
, gimple_bb (stmt
)))
1438 partition
->kind
= PKIND_MEMSET
;
1439 partition
->main_dr
= single_store
;
1440 partition
->niter
= nb_iter
;
1441 partition
->plus_one
= plus_one
;
1443 else if (single_store
&& single_load
)
1445 gimple
*store
= DR_STMT (single_store
);
1446 gimple
*load
= DR_STMT (single_load
);
1447 /* Direct aggregate copy or via an SSA name temporary. */
1449 && gimple_assign_lhs (load
) != gimple_assign_rhs1 (store
))
1451 if (!adjacent_dr_p (single_store
)
1452 || !adjacent_dr_p (single_load
)
1453 || !operand_equal_p (DR_STEP (single_store
),
1454 DR_STEP (single_load
), 0)
1455 || !dominated_by_p (CDI_DOMINATORS
,
1456 loop
->latch
, gimple_bb (store
)))
1458 /* Now check that if there is a dependence this dependence is
1459 of a suitable form for memmove. */
1460 ddr_p ddr
= get_data_dependence (rdg
, single_load
, single_store
);
1461 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1464 if (DDR_ARE_DEPENDENT (ddr
) != chrec_known
)
1466 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1469 lambda_vector dist_v
;
1470 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1472 int dist
= dist_v
[index_in_loop_nest (loop
->num
,
1473 DDR_LOOP_NEST (ddr
))];
1474 if (dist
> 0 && !DDR_REVERSED_P (ddr
))
1477 partition
->kind
= PKIND_MEMMOVE
;
1480 partition
->kind
= PKIND_MEMCPY
;
1481 partition
->main_dr
= single_store
;
1482 partition
->secondary_dr
= single_load
;
1483 partition
->niter
= nb_iter
;
1484 partition
->plus_one
= plus_one
;
1488 /* Returns true when PARTITION1 and PARTITION2 access the same memory
1492 share_memory_accesses (struct graph
*rdg
,
1493 partition
*partition1
, partition
*partition2
)
1496 bitmap_iterator bi
, bj
;
1497 data_reference_p dr1
, dr2
;
1499 /* First check whether in the intersection of the two partitions are
1500 any loads or stores. Common loads are the situation that happens
1502 EXECUTE_IF_AND_IN_BITMAP (partition1
->stmts
, partition2
->stmts
, 0, i
, bi
)
1503 if (RDG_MEM_WRITE_STMT (rdg
, i
)
1504 || RDG_MEM_READS_STMT (rdg
, i
))
1507 /* Then check whether the two partitions access the same memory object. */
1508 EXECUTE_IF_SET_IN_BITMAP (partition1
->datarefs
, 0, i
, bi
)
1510 dr1
= datarefs_vec
[i
];
1512 if (!DR_BASE_ADDRESS (dr1
)
1513 || !DR_OFFSET (dr1
) || !DR_INIT (dr1
) || !DR_STEP (dr1
))
1516 EXECUTE_IF_SET_IN_BITMAP (partition2
->datarefs
, 0, j
, bj
)
1518 dr2
= datarefs_vec
[j
];
1520 if (!DR_BASE_ADDRESS (dr2
)
1521 || !DR_OFFSET (dr2
) || !DR_INIT (dr2
) || !DR_STEP (dr2
))
1524 if (operand_equal_p (DR_BASE_ADDRESS (dr1
), DR_BASE_ADDRESS (dr2
), 0)
1525 && operand_equal_p (DR_OFFSET (dr1
), DR_OFFSET (dr2
), 0)
1526 && operand_equal_p (DR_INIT (dr1
), DR_INIT (dr2
), 0)
1527 && operand_equal_p (DR_STEP (dr1
), DR_STEP (dr2
), 0))
1535 /* For each seed statement in STARTING_STMTS, this function builds
1536 partition for it by adding depended statements according to RDG.
1537 All partitions are recorded in PARTITIONS. */
1540 rdg_build_partitions (struct graph
*rdg
,
1541 vec
<gimple
*> starting_stmts
,
1542 vec
<partition
*> *partitions
)
1544 auto_bitmap processed
;
1548 FOR_EACH_VEC_ELT (starting_stmts
, i
, stmt
)
1550 int v
= rdg_vertex_for_stmt (rdg
, stmt
);
1552 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1554 "ldist asked to generate code for vertex %d\n", v
);
1556 /* If the vertex is already contained in another partition so
1557 is the partition rooted at it. */
1558 if (bitmap_bit_p (processed
, v
))
1561 partition
*partition
= build_rdg_partition_for_vertex (rdg
, v
);
1562 bitmap_ior_into (processed
, partition
->stmts
);
1564 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1566 fprintf (dump_file
, "ldist creates useful %s partition:\n",
1567 partition
->type
== PTYPE_PARALLEL
? "parallel" : "sequent");
1568 bitmap_print (dump_file
, partition
->stmts
, " ", "\n");
1571 partitions
->safe_push (partition
);
1574 /* All vertices should have been assigned to at least one partition now,
1575 other than vertices belonging to dead code. */
1578 /* Dump to FILE the PARTITIONS. */
1581 dump_rdg_partitions (FILE *file
, vec
<partition
*> partitions
)
1584 partition
*partition
;
1586 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1587 debug_bitmap_file (file
, partition
->stmts
);
1590 /* Debug PARTITIONS. */
1591 extern void debug_rdg_partitions (vec
<partition
*> );
1594 debug_rdg_partitions (vec
<partition
*> partitions
)
1596 dump_rdg_partitions (stderr
, partitions
);
1599 /* Returns the number of read and write operations in the RDG. */
1602 number_of_rw_in_rdg (struct graph
*rdg
)
1606 for (i
= 0; i
< rdg
->n_vertices
; i
++)
1608 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1611 if (RDG_MEM_READS_STMT (rdg
, i
))
1618 /* Returns the number of read and write operations in a PARTITION of
1622 number_of_rw_in_partition (struct graph
*rdg
, partition
*partition
)
1628 EXECUTE_IF_SET_IN_BITMAP (partition
->stmts
, 0, i
, ii
)
1630 if (RDG_MEM_WRITE_STMT (rdg
, i
))
1633 if (RDG_MEM_READS_STMT (rdg
, i
))
1640 /* Returns true when one of the PARTITIONS contains all the read or
1641 write operations of RDG. */
1644 partition_contains_all_rw (struct graph
*rdg
,
1645 vec
<partition
*> partitions
)
1648 partition
*partition
;
1649 int nrw
= number_of_rw_in_rdg (rdg
);
1651 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
1652 if (nrw
== number_of_rw_in_partition (rdg
, partition
))
1658 /* Compute partition dependence created by the data references in DRS1
1659 and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
1660 not NULL, we record dependence introduced by possible alias between
1661 two data references in ALIAS_DDRS; otherwise, we simply ignore such
1662 dependence as if it doesn't exist at all. */
1665 pg_add_dependence_edges (struct graph
*rdg
, int dir
,
1666 bitmap drs1
, bitmap drs2
, vec
<ddr_p
> *alias_ddrs
)
1669 bitmap_iterator bi
, bj
;
1670 data_reference_p dr1
, dr2
, saved_dr1
;
1672 /* dependence direction - 0 is no dependence, -1 is back,
1673 1 is forth, 2 is both (we can stop then, merging will occur). */
1674 EXECUTE_IF_SET_IN_BITMAP (drs1
, 0, i
, bi
)
1676 dr1
= datarefs_vec
[i
];
1678 EXECUTE_IF_SET_IN_BITMAP (drs2
, 0, j
, bj
)
1680 int res
, this_dir
= 1;
1683 dr2
= datarefs_vec
[j
];
1685 /* Skip all <read, read> data dependence. */
1686 if (DR_IS_READ (dr1
) && DR_IS_READ (dr2
))
1690 /* Re-shuffle data-refs to be in topological order. */
1691 if (rdg_vertex_for_stmt (rdg
, DR_STMT (dr1
))
1692 > rdg_vertex_for_stmt (rdg
, DR_STMT (dr2
)))
1694 std::swap (dr1
, dr2
);
1695 this_dir
= -this_dir
;
1697 ddr
= get_data_dependence (rdg
, dr1
, dr2
);
1698 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1701 res
= data_ref_compare_tree (DR_BASE_ADDRESS (dr1
),
1702 DR_BASE_ADDRESS (dr2
));
1703 /* Be conservative. If data references are not well analyzed,
1704 or the two data references have the same base address and
1705 offset, add dependence and consider it alias to each other.
1706 In other words, the dependence can not be resolved by
1707 runtime alias check. */
1708 if (!DR_BASE_ADDRESS (dr1
) || !DR_BASE_ADDRESS (dr2
)
1709 || !DR_OFFSET (dr1
) || !DR_OFFSET (dr2
)
1710 || !DR_INIT (dr1
) || !DR_INIT (dr2
)
1711 || !DR_STEP (dr1
) || !tree_fits_uhwi_p (DR_STEP (dr1
))
1712 || !DR_STEP (dr2
) || !tree_fits_uhwi_p (DR_STEP (dr2
))
1715 /* Data dependence could be resolved by runtime alias check,
1716 record it in ALIAS_DDRS. */
1717 else if (alias_ddrs
!= NULL
)
1718 alias_ddrs
->safe_push (ddr
);
1719 /* Or simply ignore it. */
1721 else if (DDR_ARE_DEPENDENT (ddr
) == NULL_TREE
)
1723 if (DDR_REVERSED_P (ddr
))
1724 this_dir
= -this_dir
;
1726 /* Known dependences can still be unordered througout the
1727 iteration space, see gcc.dg/tree-ssa/ldist-16.c. */
1728 if (DDR_NUM_DIST_VECTS (ddr
) != 1)
1730 /* If the overlap is exact preserve stmt order. */
1731 else if (lambda_vector_zerop (DDR_DIST_VECT (ddr
, 0), 1))
1733 /* Else as the distance vector is lexicographic positive swap
1734 the dependence direction. */
1736 this_dir
= -this_dir
;
1744 else if (this_dir
!= 0 && dir
!= this_dir
)
1746 /* Shuffle "back" dr1. */
1753 /* Compare postorder number of the partition graph vertices V1 and V2. */
1756 pgcmp (const void *v1_
, const void *v2_
)
1758 const vertex
*v1
= (const vertex
*)v1_
;
1759 const vertex
*v2
= (const vertex
*)v2_
;
1760 return v2
->post
- v1
->post
;
1763 /* Data attached to vertices of partition dependence graph. */
1766 /* ID of the corresponding partition. */
1768 /* The partition. */
1769 struct partition
*partition
;
1772 /* Data attached to edges of partition dependence graph. */
1775 /* If the dependence edge can be resolved by runtime alias check,
1776 this vector contains data dependence relations for runtime alias
1777 check. On the other hand, if the dependence edge is introduced
1778 because of compilation time known data dependence, this vector
1779 contains nothing. */
1780 vec
<ddr_p
> alias_ddrs
;
1783 /* Callback data for traversing edges in graph. */
1784 struct pg_edge_callback_data
1786 /* Bitmap contains strong connected components should be merged. */
1787 bitmap sccs_to_merge
;
1788 /* Array constains component information for all vertices. */
1789 int *vertices_component
;
1790 /* Vector to record all data dependence relations which are needed
1791 to break strong connected components by runtime alias checks. */
1792 vec
<ddr_p
> *alias_ddrs
;
1795 /* Initialize vertice's data for partition dependence graph PG with
1799 init_partition_graph_vertices (struct graph
*pg
,
1800 vec
<struct partition
*> *partitions
)
1803 partition
*partition
;
1804 struct pg_vdata
*data
;
1806 for (i
= 0; partitions
->iterate (i
, &partition
); ++i
)
1808 data
= new pg_vdata
;
1809 pg
->vertices
[i
].data
= data
;
1811 data
->partition
= partition
;
1815 /* Add edge <I, J> to partition dependence graph PG. Attach vector of data
1816 dependence relations to the EDGE if DDRS isn't NULL. */
1819 add_partition_graph_edge (struct graph
*pg
, int i
, int j
, vec
<ddr_p
> *ddrs
)
1821 struct graph_edge
*e
= add_edge (pg
, i
, j
);
1823 /* If the edge is attached with data dependence relations, it means this
1824 dependence edge can be resolved by runtime alias checks. */
1827 struct pg_edata
*data
= new pg_edata
;
1829 gcc_assert (ddrs
->length () > 0);
1831 data
->alias_ddrs
= vNULL
;
1832 data
->alias_ddrs
.safe_splice (*ddrs
);
1836 /* Callback function for graph travesal algorithm. It returns true
1837 if edge E should skipped when traversing the graph. */
1840 pg_skip_alias_edge (struct graph_edge
*e
)
1842 struct pg_edata
*data
= (struct pg_edata
*)e
->data
;
1843 return (data
!= NULL
&& data
->alias_ddrs
.length () > 0);
1846 /* Callback function freeing data attached to edge E of graph. */
1849 free_partition_graph_edata_cb (struct graph
*, struct graph_edge
*e
, void *)
1851 if (e
->data
!= NULL
)
1853 struct pg_edata
*data
= (struct pg_edata
*)e
->data
;
1854 data
->alias_ddrs
.release ();
1859 /* Free data attached to vertice of partition dependence graph PG. */
1862 free_partition_graph_vdata (struct graph
*pg
)
1865 struct pg_vdata
*data
;
1867 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1869 data
= (struct pg_vdata
*)pg
->vertices
[i
].data
;
1874 /* Build and return partition dependence graph for PARTITIONS. RDG is
1875 reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
1876 is true, data dependence caused by possible alias between references
1877 is ignored, as if it doesn't exist at all; otherwise all depdendences
1880 static struct graph
*
1881 build_partition_graph (struct graph
*rdg
,
1882 vec
<struct partition
*> *partitions
,
1883 bool ignore_alias_p
)
1886 struct partition
*partition1
, *partition2
;
1887 graph
*pg
= new_graph (partitions
->length ());
1888 auto_vec
<ddr_p
> alias_ddrs
, *alias_ddrs_p
;
1890 alias_ddrs_p
= ignore_alias_p
? NULL
: &alias_ddrs
;
1892 init_partition_graph_vertices (pg
, partitions
);
1894 for (i
= 0; partitions
->iterate (i
, &partition1
); ++i
)
1896 for (j
= i
+ 1; partitions
->iterate (j
, &partition2
); ++j
)
1898 /* dependence direction - 0 is no dependence, -1 is back,
1899 1 is forth, 2 is both (we can stop then, merging will occur). */
1902 /* If the first partition has reduction, add back edge; if the
1903 second partition has reduction, add forth edge. This makes
1904 sure that reduction partition will be sorted as the last one. */
1905 if (partition_reduction_p (partition1
))
1907 else if (partition_reduction_p (partition2
))
1910 /* Cleanup the temporary vector. */
1911 alias_ddrs
.truncate (0);
1913 dir
= pg_add_dependence_edges (rdg
, dir
, partition1
->datarefs
,
1914 partition2
->datarefs
, alias_ddrs_p
);
1916 /* Add edge to partition graph if there exists dependence. There
1917 are two types of edges. One type edge is caused by compilation
1918 time known dependence, this type can not be resolved by runtime
1919 alias check. The other type can be resolved by runtime alias
1921 if (dir
== 1 || dir
== 2
1922 || alias_ddrs
.length () > 0)
1924 /* Attach data dependence relations to edge that can be resolved
1925 by runtime alias check. */
1926 bool alias_edge_p
= (dir
!= 1 && dir
!= 2);
1927 add_partition_graph_edge (pg
, i
, j
,
1928 (alias_edge_p
) ? &alias_ddrs
: NULL
);
1930 if (dir
== -1 || dir
== 2
1931 || alias_ddrs
.length () > 0)
1933 /* Attach data dependence relations to edge that can be resolved
1934 by runtime alias check. */
1935 bool alias_edge_p
= (dir
!= -1 && dir
!= 2);
1936 add_partition_graph_edge (pg
, j
, i
,
1937 (alias_edge_p
) ? &alias_ddrs
: NULL
);
1944 /* Sort partitions in PG by post order and store them in PARTITIONS. */
1947 sort_partitions_by_post_order (struct graph
*pg
,
1948 vec
<struct partition
*> *partitions
)
1951 struct pg_vdata
*data
;
1953 /* Now order the remaining nodes in postorder. */
1954 qsort (pg
->vertices
, pg
->n_vertices
, sizeof (vertex
), pgcmp
);
1955 partitions
->truncate (0);
1956 for (i
= 0; i
< pg
->n_vertices
; ++i
)
1958 data
= (struct pg_vdata
*)pg
->vertices
[i
].data
;
1959 if (data
->partition
)
1960 partitions
->safe_push (data
->partition
);
1964 /* Given reduced dependence graph RDG merge strong connected components
1965 of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
1966 possible alias between references is ignored, as if it doesn't exist
1967 at all; otherwise all depdendences are considered. */
1970 merge_dep_scc_partitions (struct graph
*rdg
,
1971 vec
<struct partition
*> *partitions
,
1972 bool ignore_alias_p
)
1974 struct partition
*partition1
, *partition2
;
1975 struct pg_vdata
*data
;
1976 graph
*pg
= build_partition_graph (rdg
, partitions
, ignore_alias_p
);
1977 int i
, j
, num_sccs
= graphds_scc (pg
, NULL
);
1979 /* Strong connected compoenent means dependence cycle, we cannot distribute
1980 them. So fuse them together. */
1981 if ((unsigned) num_sccs
< partitions
->length ())
1983 for (i
= 0; i
< num_sccs
; ++i
)
1985 for (j
= 0; partitions
->iterate (j
, &partition1
); ++j
)
1986 if (pg
->vertices
[j
].component
== i
)
1988 for (j
= j
+ 1; partitions
->iterate (j
, &partition2
); ++j
)
1989 if (pg
->vertices
[j
].component
== i
)
1991 partition_merge_into (NULL
, partition1
,
1992 partition2
, FUSE_SAME_SCC
);
1993 partition1
->type
= PTYPE_SEQUENTIAL
;
1994 (*partitions
)[j
] = NULL
;
1995 partition_free (partition2
);
1996 data
= (struct pg_vdata
*)pg
->vertices
[j
].data
;
1997 data
->partition
= NULL
;
2000 sort_partitions_by_post_order (pg
, partitions
);
2002 gcc_assert (partitions
->length () == (unsigned)num_sccs
);
2003 free_partition_graph_vdata (pg
);
2007 /* Callback function for traversing edge E in graph G. DATA is private
2011 pg_collect_alias_ddrs (struct graph
*g
, struct graph_edge
*e
, void *data
)
2013 int i
, j
, component
;
2014 struct pg_edge_callback_data
*cbdata
;
2015 struct pg_edata
*edata
= (struct pg_edata
*) e
->data
;
2017 /* If the edge doesn't have attached data dependence, it represents
2018 compilation time known dependences. This type dependence cannot
2019 be resolved by runtime alias check. */
2020 if (edata
== NULL
|| edata
->alias_ddrs
.length () == 0)
2023 cbdata
= (struct pg_edge_callback_data
*) data
;
2026 component
= cbdata
->vertices_component
[i
];
2027 /* Vertices are topologically sorted according to compilation time
2028 known dependences, so we can break strong connected components
2029 by removing edges of the opposite direction, i.e, edges pointing
2030 from vertice with smaller post number to vertice with bigger post
2032 if (g
->vertices
[i
].post
< g
->vertices
[j
].post
2033 /* We only need to remove edges connecting vertices in the same
2034 strong connected component to break it. */
2035 && component
== cbdata
->vertices_component
[j
]
2036 /* Check if we want to break the strong connected component or not. */
2037 && !bitmap_bit_p (cbdata
->sccs_to_merge
, component
))
2038 cbdata
->alias_ddrs
->safe_splice (edata
->alias_ddrs
);
2041 /* This is the main function breaking strong conected components in
2042 PARTITIONS giving reduced depdendence graph RDG. Store data dependence
2043 relations for runtime alias check in ALIAS_DDRS. */
2046 break_alias_scc_partitions (struct graph
*rdg
,
2047 vec
<struct partition
*> *partitions
,
2048 vec
<ddr_p
> *alias_ddrs
)
2050 int i
, j
, num_sccs
, num_sccs_no_alias
;
2051 /* Build partition dependence graph. */
2052 graph
*pg
= build_partition_graph (rdg
, partitions
, false);
2054 alias_ddrs
->truncate (0);
2055 /* Find strong connected components in the graph, with all dependence edges
2057 num_sccs
= graphds_scc (pg
, NULL
);
2058 /* All SCCs now can be broken by runtime alias checks because SCCs caused by
2059 compilation time known dependences are merged before this function. */
2060 if ((unsigned) num_sccs
< partitions
->length ())
2062 struct pg_edge_callback_data cbdata
;
2063 auto_bitmap sccs_to_merge
;
2064 auto_vec
<enum partition_type
> scc_types
;
2065 struct partition
*partition
, *first
;
2067 /* If all paritions in a SCC has the same type, we can simply merge the
2068 SCC. This loop finds out such SCCS and record them in bitmap. */
2069 bitmap_set_range (sccs_to_merge
, 0, (unsigned) num_sccs
);
2070 for (i
= 0; i
< num_sccs
; ++i
)
2072 for (j
= 0; partitions
->iterate (j
, &first
); ++j
)
2073 if (pg
->vertices
[j
].component
== i
)
2075 for (++j
; partitions
->iterate (j
, &partition
); ++j
)
2077 if (pg
->vertices
[j
].component
!= i
)
2080 if (first
->type
!= partition
->type
)
2082 bitmap_clear_bit (sccs_to_merge
, i
);
2088 /* Initialize callback data for traversing. */
2089 cbdata
.sccs_to_merge
= sccs_to_merge
;
2090 cbdata
.alias_ddrs
= alias_ddrs
;
2091 cbdata
.vertices_component
= XNEWVEC (int, pg
->n_vertices
);
2092 /* Record the component information which will be corrupted by next
2093 graph scc finding call. */
2094 for (i
= 0; i
< pg
->n_vertices
; ++i
)
2095 cbdata
.vertices_component
[i
] = pg
->vertices
[i
].component
;
2097 /* Collect data dependences for runtime alias checks to break SCCs. */
2098 if (bitmap_count_bits (sccs_to_merge
) != (unsigned) num_sccs
)
2100 /* Run SCC finding algorithm again, with alias dependence edges
2101 skipped. This is to topologically sort paritions according to
2102 compilation time known dependence. Note the topological order
2103 is stored in the form of pg's post order number. */
2104 num_sccs_no_alias
= graphds_scc (pg
, NULL
, pg_skip_alias_edge
);
2105 gcc_assert (partitions
->length () == (unsigned) num_sccs_no_alias
);
2106 /* With topological order, we can construct two subgraphs L and R.
2107 L contains edge <x, y> where x < y in terms of post order, while
2108 R contains edge <x, y> where x > y. Edges for compilation time
2109 known dependence all fall in R, so we break SCCs by removing all
2110 (alias) edges of in subgraph L. */
2111 for_each_edge (pg
, pg_collect_alias_ddrs
, &cbdata
);
2114 /* For SCC that doesn't need to be broken, merge it. */
2115 for (i
= 0; i
< num_sccs
; ++i
)
2117 if (!bitmap_bit_p (sccs_to_merge
, i
))
2120 for (j
= 0; partitions
->iterate (j
, &first
); ++j
)
2121 if (cbdata
.vertices_component
[j
] == i
)
2123 for (++j
; partitions
->iterate (j
, &partition
); ++j
)
2125 struct pg_vdata
*data
;
2127 if (cbdata
.vertices_component
[j
] != i
)
2130 partition_merge_into (NULL
, first
, partition
, FUSE_SAME_SCC
);
2131 (*partitions
)[j
] = NULL
;
2132 partition_free (partition
);
2133 data
= (struct pg_vdata
*)pg
->vertices
[j
].data
;
2134 gcc_assert (data
->id
== j
);
2135 data
->partition
= NULL
;
2140 sort_partitions_by_post_order (pg
, partitions
);
2141 free_partition_graph_vdata (pg
);
2142 for_each_edge (pg
, free_partition_graph_edata_cb
, NULL
);
2145 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2147 fprintf (dump_file
, "Possible alias data dependence to break:\n");
2148 dump_data_dependence_relations (dump_file
, *alias_ddrs
);
2152 /* Compute and return an expression whose value is the segment length which
2153 will be accessed by DR in NITERS iterations. */
2156 data_ref_segment_size (struct data_reference
*dr
, tree niters
)
2158 tree segment_length
;
2160 if (integer_zerop (DR_STEP (dr
)))
2161 segment_length
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
2163 segment_length
= size_binop (MULT_EXPR
,
2164 fold_convert (sizetype
, DR_STEP (dr
)),
2165 fold_convert (sizetype
, niters
));
2167 return segment_length
;
2170 /* Return true if LOOP's latch is dominated by statement for data reference
2174 latch_dominated_by_data_ref (struct loop
*loop
, data_reference
*dr
)
2176 return dominated_by_p (CDI_DOMINATORS
, single_exit (loop
)->src
,
2177 gimple_bb (DR_STMT (dr
)));
2180 /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's
2181 data dependence relations ALIAS_DDRS. */
2184 compute_alias_check_pairs (struct loop
*loop
, vec
<ddr_p
> *alias_ddrs
,
2185 vec
<dr_with_seg_len_pair_t
> *comp_alias_pairs
)
2188 unsigned HOST_WIDE_INT factor
= 1;
2189 tree niters_plus_one
, niters
= number_of_latch_executions (loop
);
2191 gcc_assert (niters
!= NULL_TREE
&& niters
!= chrec_dont_know
);
2192 niters
= fold_convert (sizetype
, niters
);
2193 niters_plus_one
= size_binop (PLUS_EXPR
, niters
, size_one_node
);
2195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2196 fprintf (dump_file
, "Creating alias check pairs:\n");
2198 /* Iterate all data dependence relations and compute alias check pairs. */
2199 for (i
= 0; i
< alias_ddrs
->length (); i
++)
2201 ddr_p ddr
= (*alias_ddrs
)[i
];
2202 struct data_reference
*dr_a
= DDR_A (ddr
);
2203 struct data_reference
*dr_b
= DDR_B (ddr
);
2204 tree seg_length_a
, seg_length_b
;
2205 int comp_res
= data_ref_compare_tree (DR_BASE_ADDRESS (dr_a
),
2206 DR_BASE_ADDRESS (dr_b
));
2209 comp_res
= data_ref_compare_tree (DR_OFFSET (dr_a
), DR_OFFSET (dr_b
));
2210 gcc_assert (comp_res
!= 0);
2212 if (latch_dominated_by_data_ref (loop
, dr_a
))
2213 seg_length_a
= data_ref_segment_size (dr_a
, niters_plus_one
);
2215 seg_length_a
= data_ref_segment_size (dr_a
, niters
);
2217 if (latch_dominated_by_data_ref (loop
, dr_b
))
2218 seg_length_b
= data_ref_segment_size (dr_b
, niters_plus_one
);
2220 seg_length_b
= data_ref_segment_size (dr_b
, niters
);
2222 dr_with_seg_len_pair_t dr_with_seg_len_pair
2223 (dr_with_seg_len (dr_a
, seg_length_a
),
2224 dr_with_seg_len (dr_b
, seg_length_b
));
2226 /* Canonicalize pairs by sorting the two DR members. */
2228 std::swap (dr_with_seg_len_pair
.first
, dr_with_seg_len_pair
.second
);
2230 comp_alias_pairs
->safe_push (dr_with_seg_len_pair
);
2233 if (tree_fits_uhwi_p (niters
))
2234 factor
= tree_to_uhwi (niters
);
2236 /* Prune alias check pairs. */
2237 prune_runtime_alias_test_list (comp_alias_pairs
, factor
);
2238 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2240 "Improved number of alias checks from %d to %d\n",
2241 alias_ddrs
->length (), comp_alias_pairs
->length ());
2244 /* Given data dependence relations in ALIAS_DDRS, generate runtime alias
2245 checks and version LOOP under condition of these runtime alias checks. */
2248 version_loop_by_alias_check (struct loop
*loop
, vec
<ddr_p
> *alias_ddrs
)
2250 profile_probability prob
;
2251 basic_block cond_bb
;
2253 tree lhs
, arg0
, cond_expr
= NULL_TREE
;
2254 gimple_seq cond_stmts
= NULL
;
2255 gimple
*call_stmt
= NULL
;
2256 auto_vec
<dr_with_seg_len_pair_t
> comp_alias_pairs
;
2258 /* Generate code for runtime alias checks if necessary. */
2259 gcc_assert (alias_ddrs
->length () > 0);
2261 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2263 "Version loop <%d> with runtime alias check\n", loop
->num
);
2265 compute_alias_check_pairs (loop
, alias_ddrs
, &comp_alias_pairs
);
2266 create_runtime_alias_checks (loop
, &comp_alias_pairs
, &cond_expr
);
2267 cond_expr
= force_gimple_operand_1 (cond_expr
, &cond_stmts
,
2268 is_gimple_condexpr
, NULL_TREE
);
2270 /* Depend on vectorizer to fold IFN_LOOP_DIST_ALIAS. */
2271 if (flag_tree_loop_vectorize
)
2273 /* Generate internal function call for loop distribution alias check. */
2274 call_stmt
= gimple_build_call_internal (IFN_LOOP_DIST_ALIAS
,
2275 2, NULL_TREE
, cond_expr
);
2276 lhs
= make_ssa_name (boolean_type_node
);
2277 gimple_call_set_lhs (call_stmt
, lhs
);
2282 prob
= profile_probability::guessed_always ().apply_scale (9, 10);
2283 initialize_original_copy_tables ();
2284 nloop
= loop_version (loop
, lhs
, &cond_bb
, prob
, prob
.invert (),
2285 prob
, prob
.invert (), true);
2286 free_original_copy_tables ();
2287 /* Record the original loop number in newly generated loops. In case of
2288 distribution, the original loop will be distributed and the new loop
2290 loop
->orig_loop_num
= nloop
->num
;
2291 nloop
->orig_loop_num
= nloop
->num
;
2292 nloop
->dont_vectorize
= true;
2293 nloop
->force_vectorize
= false;
2297 /* Record new loop's num in IFN_LOOP_DIST_ALIAS because the original
2298 loop could be destroyed. */
2299 arg0
= build_int_cst (integer_type_node
, loop
->orig_loop_num
);
2300 gimple_call_set_arg (call_stmt
, 0, arg0
);
2301 gimple_seq_add_stmt_without_update (&cond_stmts
, call_stmt
);
2306 gimple_stmt_iterator cond_gsi
= gsi_last_bb (cond_bb
);
2307 gsi_insert_seq_before (&cond_gsi
, cond_stmts
, GSI_SAME_STMT
);
2309 update_ssa (TODO_update_ssa
);
2312 /* Return true if loop versioning is needed to distrubute PARTITIONS.
2313 ALIAS_DDRS are data dependence relations for runtime alias check. */
2316 version_for_distribution_p (vec
<struct partition
*> *partitions
,
2317 vec
<ddr_p
> *alias_ddrs
)
2319 /* No need to version loop if we have only one partition. */
2320 if (partitions
->length () == 1)
2323 /* Need to version loop if runtime alias check is necessary. */
2324 return (alias_ddrs
->length () > 0);
2327 /* Fuse all partitions if necessary before finalizing distribution. */
2330 finalize_partitions (vec
<struct partition
*> *partitions
,
2331 vec
<ddr_p
> *alias_ddrs
)
2334 struct partition
*a
, *partition
;
2336 if (partitions
->length () == 1
2337 || alias_ddrs
->length () > 0)
2340 a
= (*partitions
)[0];
2341 if (a
->kind
!= PKIND_NORMAL
)
2344 for (i
= 1; partitions
->iterate (i
, &partition
); ++i
)
2346 /* Don't fuse if partition has different type or it is a builtin. */
2347 if (partition
->type
!= a
->type
2348 || partition
->kind
!= PKIND_NORMAL
)
2352 /* Fuse all partitions. */
2353 for (i
= 1; partitions
->iterate (i
, &partition
); ++i
)
2355 partition_merge_into (NULL
, a
, partition
, FUSE_FINALIZE
);
2356 partition_free (partition
);
2358 partitions
->truncate (1);
2361 /* Distributes the code from LOOP in such a way that producer statements
2362 are placed before consumer statements. Tries to separate only the
2363 statements from STMTS into separate loops. Returns the number of
2364 distributed loops. Set NB_CALLS to number of generated builtin calls.
2365 Set *DESTROY_P to whether LOOP needs to be destroyed. */
2368 distribute_loop (struct loop
*loop
, vec
<gimple
*> stmts
,
2369 control_dependences
*cd
, int *nb_calls
, bool *destroy_p
)
2372 partition
*partition
;
2378 loop_nest
.create (0);
2379 if (!find_loop_nest (loop
, &loop_nest
))
2381 loop_nest
.release ();
2385 datarefs_vec
.create (20);
2386 rdg
= build_rdg (loop
, cd
);
2389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2391 "Loop %d not distributed: failed to build the RDG.\n",
2394 loop_nest
.release ();
2395 free_data_refs (datarefs_vec
);
2399 if (datarefs_vec
.length () > MAX_DATAREFS_NUM
)
2401 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2403 "Loop %d not distributed: too many memory references.\n",
2407 loop_nest
.release ();
2408 free_data_refs (datarefs_vec
);
2412 data_reference_p dref
;
2413 for (i
= 0; datarefs_vec
.iterate (i
, &dref
); ++i
)
2414 dref
->aux
= (void *) (uintptr_t) i
;
2416 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2417 dump_rdg (dump_file
, rdg
);
2419 auto_vec
<struct partition
*, 3> partitions
;
2420 rdg_build_partitions (rdg
, stmts
, &partitions
);
2422 /* Can't do runtime alias check if loop niter is unknown. */
2423 tree niters
= number_of_latch_executions (loop
);
2424 bool rt_alias_check_p
= (niters
!= NULL_TREE
&& niters
!= chrec_dont_know
);
2425 auto_vec
<ddr_p
> alias_ddrs
;
2427 auto_bitmap stmt_in_all_partitions
;
2428 bitmap_copy (stmt_in_all_partitions
, partitions
[0]->stmts
);
2429 for (i
= 1; partitions
.iterate (i
, &partition
); ++i
)
2430 bitmap_and_into (stmt_in_all_partitions
, partitions
[i
]->stmts
);
2432 any_builtin
= false;
2433 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
2435 classify_partition (loop
, rdg
, partition
, stmt_in_all_partitions
);
2436 any_builtin
|= partition_builtin_p (partition
);
2439 /* If we are only distributing patterns but did not detect any,
2441 if (!flag_tree_loop_distribution
2448 /* If we are only distributing patterns fuse all partitions that
2449 were not classified as builtins. This also avoids chopping
2450 a loop into pieces, separated by builtin calls. That is, we
2451 only want no or a single loop body remaining. */
2452 struct partition
*into
;
2453 if (!flag_tree_loop_distribution
)
2455 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
2456 if (!partition_builtin_p (into
))
2458 for (++i
; partitions
.iterate (i
, &partition
); ++i
)
2459 if (!partition_builtin_p (partition
))
2461 partition_merge_into (NULL
, into
, partition
, FUSE_NON_BUILTIN
);
2462 partitions
.unordered_remove (i
);
2463 partition_free (partition
);
2468 /* Due to limitations in the transform phase we have to fuse all
2469 reduction partitions into the last partition so the existing
2470 loop will contain all loop-closed PHI nodes. */
2471 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
2472 if (partition_reduction_p (into
))
2474 for (i
= i
+ 1; partitions
.iterate (i
, &partition
); ++i
)
2475 if (partition_reduction_p (partition
))
2477 partition_merge_into (rdg
, into
, partition
, FUSE_REDUCTION
);
2478 partitions
.unordered_remove (i
);
2479 partition_free (partition
);
2483 /* Apply our simple cost model - fuse partitions with similar
2485 for (i
= 0; partitions
.iterate (i
, &into
); ++i
)
2487 bool changed
= false;
2488 if (partition_builtin_p (into
))
2491 partitions
.iterate (j
, &partition
); ++j
)
2493 if (share_memory_accesses (rdg
, into
, partition
))
2495 partition_merge_into (rdg
, into
, partition
, FUSE_SHARE_REF
);
2496 partitions
.unordered_remove (j
);
2497 partition_free (partition
);
2502 /* If we fused 0 1 2 in step 1 to 0,2 1 as 0 and 2 have similar
2503 accesses when 1 and 2 have similar accesses but not 0 and 1
2504 then in the next iteration we will fail to consider merging
2505 1 into 0,2. So try again if we did any merging into 0. */
2510 /* Build the partition dependency graph. */
2511 if (partitions
.length () > 1)
2513 merge_dep_scc_partitions (rdg
, &partitions
, rt_alias_check_p
);
2514 alias_ddrs
.truncate (0);
2515 if (rt_alias_check_p
&& partitions
.length () > 1)
2516 break_alias_scc_partitions (rdg
, &partitions
, &alias_ddrs
);
2519 finalize_partitions (&partitions
, &alias_ddrs
);
2521 nbp
= partitions
.length ();
2523 || (nbp
== 1 && !partition_builtin_p (partitions
[0]))
2524 || (nbp
> 1 && partition_contains_all_rw (rdg
, partitions
)))
2530 if (version_for_distribution_p (&partitions
, &alias_ddrs
))
2531 version_loop_by_alias_check (loop
, &alias_ddrs
);
2533 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2536 "distribute loop <%d> into partitions:\n", loop
->num
);
2537 dump_rdg_partitions (dump_file
, partitions
);
2540 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
2542 if (partition_builtin_p (partition
))
2544 *destroy_p
|= generate_code_for_partition (loop
, partition
, i
< nbp
- 1);
2548 loop_nest
.release ();
2549 free_data_refs (datarefs_vec
);
2550 for (hash_table
<ddr_hasher
>::iterator iter
= ddrs_table
.begin ();
2551 iter
!= ddrs_table
.end (); ++iter
)
2553 free_dependence_relation (*iter
);
2556 ddrs_table
.empty ();
2558 FOR_EACH_VEC_ELT (partitions
, i
, partition
)
2559 partition_free (partition
);
2562 return nbp
- *nb_calls
;
2565 /* Distribute all loops in the current function. */
2569 const pass_data pass_data_loop_distribution
=
2571 GIMPLE_PASS
, /* type */
2573 OPTGROUP_LOOP
, /* optinfo_flags */
2574 TV_TREE_LOOP_DISTRIBUTION
, /* tv_id */
2575 ( PROP_cfg
| PROP_ssa
), /* properties_required */
2576 0, /* properties_provided */
2577 0, /* properties_destroyed */
2578 0, /* todo_flags_start */
2579 0, /* todo_flags_finish */
2582 class pass_loop_distribution
: public gimple_opt_pass
2585 pass_loop_distribution (gcc::context
*ctxt
)
2586 : gimple_opt_pass (pass_data_loop_distribution
, ctxt
)
2589 /* opt_pass methods: */
2590 virtual bool gate (function
*)
2592 return flag_tree_loop_distribution
2593 || flag_tree_loop_distribute_patterns
;
2596 virtual unsigned int execute (function
*);
2598 }; // class pass_loop_distribution
2601 pass_loop_distribution::execute (function
*fun
)
2604 bool changed
= false;
2606 control_dependences
*cd
= NULL
;
2607 auto_vec
<loop_p
> loops_to_be_destroyed
;
2609 if (number_of_loops (fun
) <= 1)
2612 /* Compute topological order for basic blocks. Topological order is
2613 needed because data dependence is computed for data references in
2614 lexicographical order. */
2615 if (bb_top_order_index
== NULL
)
2618 int *rpo
= XNEWVEC (int, last_basic_block_for_fn (cfun
));
2620 bb_top_order_index
= XNEWVEC (int, last_basic_block_for_fn (cfun
));
2621 bb_top_order_index_size
= last_basic_block_for_fn (cfun
);
2622 rpo_num
= pre_and_rev_post_order_compute_fn (cfun
, NULL
, rpo
, true);
2623 for (int i
= 0; i
< rpo_num
; i
++)
2624 bb_top_order_index
[rpo
[i
]] = i
;
2629 FOR_ALL_BB_FN (bb
, fun
)
2631 gimple_stmt_iterator gsi
;
2632 for (gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2633 gimple_set_uid (gsi_stmt (gsi
), -1);
2634 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
2635 gimple_set_uid (gsi_stmt (gsi
), -1);
2638 /* We can at the moment only distribute non-nested loops, thus restrict
2639 walking to innermost loops. */
2640 FOR_EACH_LOOP (loop
, LI_ONLY_INNERMOST
)
2642 auto_vec
<gimple
*> work_list
;
2644 int num
= loop
->num
;
2647 /* If the loop doesn't have a single exit we will fail anyway,
2648 so do that early. */
2649 if (!single_exit (loop
))
2652 /* Only optimize hot loops. */
2653 if (!optimize_loop_for_speed_p (loop
))
2656 /* Initialize the worklist with stmts we seed the partitions with. */
2657 bbs
= get_loop_body_in_dom_order (loop
);
2658 for (i
= 0; i
< loop
->num_nodes
; ++i
)
2660 for (gphi_iterator gsi
= gsi_start_phis (bbs
[i
]);
2664 gphi
*phi
= gsi
.phi ();
2665 if (virtual_operand_p (gimple_phi_result (phi
)))
2667 /* Distribute stmts which have defs that are used outside of
2669 if (!stmt_has_scalar_dependences_outside_loop (loop
, phi
))
2671 work_list
.safe_push (phi
);
2673 for (gimple_stmt_iterator gsi
= gsi_start_bb (bbs
[i
]);
2677 gimple
*stmt
= gsi_stmt (gsi
);
2679 /* If there is a stmt with side-effects bail out - we
2680 cannot and should not distribute this loop. */
2681 if (gimple_has_side_effects (stmt
))
2683 work_list
.truncate (0);
2687 /* Distribute stmts which have defs that are used outside of
2689 if (stmt_has_scalar_dependences_outside_loop (loop
, stmt
))
2691 /* Otherwise only distribute stores for now. */
2692 else if (!gimple_vdef (stmt
))
2695 work_list
.safe_push (stmt
);
2701 int nb_generated_loops
= 0;
2702 int nb_generated_calls
= 0;
2703 location_t loc
= find_loop_location (loop
);
2704 if (work_list
.length () > 0)
2708 calculate_dominance_info (CDI_DOMINATORS
);
2709 calculate_dominance_info (CDI_POST_DOMINATORS
);
2710 cd
= new control_dependences ();
2711 free_dominance_info (CDI_POST_DOMINATORS
);
2714 nb_generated_loops
= distribute_loop (loop
, work_list
, cd
,
2715 &nb_generated_calls
,
2718 loops_to_be_destroyed
.safe_push (loop
);
2721 if (nb_generated_loops
+ nb_generated_calls
> 0)
2724 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS
,
2725 loc
, "Loop %d distributed: split to %d loops "
2726 "and %d library calls.\n",
2727 num
, nb_generated_loops
, nb_generated_calls
);
2729 else if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2730 fprintf (dump_file
, "Loop %d is the same.\n", num
);
2736 if (bb_top_order_index
!= NULL
)
2738 free (bb_top_order_index
);
2739 bb_top_order_index
= NULL
;
2740 bb_top_order_index_size
= 0;
2745 /* Destroy loop bodies that could not be reused. Do this late as we
2746 otherwise can end up refering to stale data in control dependences. */
2748 FOR_EACH_VEC_ELT (loops_to_be_destroyed
, i
, loop
)
2749 destroy_loop (loop
);
2751 /* Cached scalar evolutions now may refer to wrong or non-existing
2754 mark_virtual_operands_for_renaming (fun
);
2755 rewrite_into_loop_closed_ssa (NULL
, TODO_update_ssa
);
2758 checking_verify_loop_structure ();
2766 make_pass_loop_distribution (gcc::context
*ctxt
)
2768 return new pass_loop_distribution (ctxt
);