]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-parloops.c
Re-factor inclusion of tree.h.
[thirdparty/gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree.h"
26 #include "tree-ssa.h"
27 #include "cfgloop.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-pass.h"
32 #include "langhooks.h"
33 #include "tree-vectorizer.h"
34 #include "tree-hasher.h"
35 #include "tree-parloops.h"
36 #include "omp-low.h"
37
38 /* This pass tries to distribute iterations of loops into several threads.
39 The implementation is straightforward -- for each loop we test whether its
40 iterations are independent, and if it is the case (and some additional
41 conditions regarding profitability and correctness are satisfied), we
42 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
43 machinery do its job.
44
45 The most of the complexity is in bringing the code into shape expected
46 by the omp expanders:
47 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
48 variable and that the exit test is at the start of the loop body
49 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
50 variables by accesses through pointers, and breaking up ssa chains
51 by storing the values incoming to the parallelized loop to a structure
52 passed to the new function as an argument (something similar is done
53 in omp gimplification, unfortunately only a small part of the code
54 can be shared).
55
56 TODO:
57 -- if there are several parallelizable loops in a function, it may be
58 possible to generate the threads just once (using synchronization to
59 ensure that cross-loop dependences are obeyed).
60 -- handling of common reduction patterns for outer loops.
61
62 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
63 /*
64 Reduction handling:
65 currently we use vect_force_simple_reduction() to detect reduction patterns.
66 The code transformation will be introduced by an example.
67
68
69 parloop
70 {
71 int sum=1;
72
73 for (i = 0; i < N; i++)
74 {
75 x[i] = i + 3;
76 sum+=x[i];
77 }
78 }
79
80 gimple-like code:
81 header_bb:
82
83 # sum_29 = PHI <sum_11(5), 1(3)>
84 # i_28 = PHI <i_12(5), 0(3)>
85 D.1795_8 = i_28 + 3;
86 x[i_28] = D.1795_8;
87 sum_11 = D.1795_8 + sum_29;
88 i_12 = i_28 + 1;
89 if (N_6(D) > i_12)
90 goto header_bb;
91
92
93 exit_bb:
94
95 # sum_21 = PHI <sum_11(4)>
96 printf (&"%d"[0], sum_21);
97
98
99 after reduction transformation (only relevant parts):
100
101 parloop
102 {
103
104 ....
105
106
107 # Storing the initial value given by the user. #
108
109 .paral_data_store.32.sum.27 = 1;
110
111 #pragma omp parallel num_threads(4)
112
113 #pragma omp for schedule(static)
114
115 # The neutral element corresponding to the particular
116 reduction's operation, e.g. 0 for PLUS_EXPR,
117 1 for MULT_EXPR, etc. replaces the user's initial value. #
118
119 # sum.27_29 = PHI <sum.27_11, 0>
120
121 sum.27_11 = D.1827_8 + sum.27_29;
122
123 GIMPLE_OMP_CONTINUE
124
125 # Adding this reduction phi is done at create_phi_for_local_result() #
126 # sum.27_56 = PHI <sum.27_11, 0>
127 GIMPLE_OMP_RETURN
128
129 # Creating the atomic operation is done at
130 create_call_for_reduction_1() #
131
132 #pragma omp atomic_load
133 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
134 D.1840_60 = sum.27_56 + D.1839_59;
135 #pragma omp atomic_store (D.1840_60);
136
137 GIMPLE_OMP_RETURN
138
139 # collecting the result after the join of the threads is done at
140 create_loads_for_reductions().
141 The value computed by the threads is loaded from the
142 shared struct. #
143
144
145 .paral_data_load.33_52 = &.paral_data_store.32;
146 sum_37 = .paral_data_load.33_52->sum.27;
147 sum_43 = D.1795_41 + sum_37;
148
149 exit bb:
150 # sum_21 = PHI <sum_43, sum_26>
151 printf (&"%d"[0], sum_21);
152
153 ...
154
155 }
156
157 */
158
159 /* Minimal number of iterations of a loop that should be executed in each
160 thread. */
161 #define MIN_PER_THREAD 100
162
163 /* Element of the hashtable, representing a
164 reduction in the current loop. */
165 struct reduction_info
166 {
167 gimple reduc_stmt; /* reduction statement. */
168 gimple reduc_phi; /* The phi node defining the reduction. */
169 enum tree_code reduction_code;/* code for the reduction operation. */
170 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
171 result. */
172 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
173 of the reduction variable when existing the loop. */
174 tree initial_value; /* The initial value of the reduction var before entering the loop. */
175 tree field; /* the name of the field in the parloop data structure intended for reduction. */
176 tree init; /* reduction initialization value. */
177 gimple new_phi; /* (helper field) Newly created phi node whose result
178 will be passed to the atomic operation. Represents
179 the local result each thread computed for the reduction
180 operation. */
181 };
182
183 /* Reduction info hashtable helpers. */
184
185 struct reduction_hasher : typed_free_remove <reduction_info>
186 {
187 typedef reduction_info value_type;
188 typedef reduction_info compare_type;
189 static inline hashval_t hash (const value_type *);
190 static inline bool equal (const value_type *, const compare_type *);
191 };
192
193 /* Equality and hash functions for hashtab code. */
194
195 inline bool
196 reduction_hasher::equal (const value_type *a, const compare_type *b)
197 {
198 return (a->reduc_phi == b->reduc_phi);
199 }
200
201 inline hashval_t
202 reduction_hasher::hash (const value_type *a)
203 {
204 return a->reduc_version;
205 }
206
207 typedef hash_table <reduction_hasher> reduction_info_table_type;
208
209
210 static struct reduction_info *
211 reduction_phi (reduction_info_table_type reduction_list, gimple phi)
212 {
213 struct reduction_info tmpred, *red;
214
215 if (reduction_list.elements () == 0 || phi == NULL)
216 return NULL;
217
218 tmpred.reduc_phi = phi;
219 tmpred.reduc_version = gimple_uid (phi);
220 red = reduction_list.find (&tmpred);
221
222 return red;
223 }
224
225 /* Element of hashtable of names to copy. */
226
227 struct name_to_copy_elt
228 {
229 unsigned version; /* The version of the name to copy. */
230 tree new_name; /* The new name used in the copy. */
231 tree field; /* The field of the structure used to pass the
232 value. */
233 };
234
235 /* Name copies hashtable helpers. */
236
237 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
238 {
239 typedef name_to_copy_elt value_type;
240 typedef name_to_copy_elt compare_type;
241 static inline hashval_t hash (const value_type *);
242 static inline bool equal (const value_type *, const compare_type *);
243 };
244
245 /* Equality and hash functions for hashtab code. */
246
247 inline bool
248 name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
249 {
250 return a->version == b->version;
251 }
252
253 inline hashval_t
254 name_to_copy_hasher::hash (const value_type *a)
255 {
256 return (hashval_t) a->version;
257 }
258
259 typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
260
261 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
262 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
263 represents the denominator for every element in the matrix. */
264 typedef struct lambda_trans_matrix_s
265 {
266 lambda_matrix matrix;
267 int rowsize;
268 int colsize;
269 int denominator;
270 } *lambda_trans_matrix;
271 #define LTM_MATRIX(T) ((T)->matrix)
272 #define LTM_ROWSIZE(T) ((T)->rowsize)
273 #define LTM_COLSIZE(T) ((T)->colsize)
274 #define LTM_DENOMINATOR(T) ((T)->denominator)
275
276 /* Allocate a new transformation matrix. */
277
278 static lambda_trans_matrix
279 lambda_trans_matrix_new (int colsize, int rowsize,
280 struct obstack * lambda_obstack)
281 {
282 lambda_trans_matrix ret;
283
284 ret = (lambda_trans_matrix)
285 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
286 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
287 LTM_ROWSIZE (ret) = rowsize;
288 LTM_COLSIZE (ret) = colsize;
289 LTM_DENOMINATOR (ret) = 1;
290 return ret;
291 }
292
293 /* Multiply a vector VEC by a matrix MAT.
294 MAT is an M*N matrix, and VEC is a vector with length N. The result
295 is stored in DEST which must be a vector of length M. */
296
297 static void
298 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
299 lambda_vector vec, lambda_vector dest)
300 {
301 int i, j;
302
303 lambda_vector_clear (dest, m);
304 for (i = 0; i < m; i++)
305 for (j = 0; j < n; j++)
306 dest[i] += matrix[i][j] * vec[j];
307 }
308
309 /* Return true if TRANS is a legal transformation matrix that respects
310 the dependence vectors in DISTS and DIRS. The conservative answer
311 is false.
312
313 "Wolfe proves that a unimodular transformation represented by the
314 matrix T is legal when applied to a loop nest with a set of
315 lexicographically non-negative distance vectors RDG if and only if
316 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
317 i.e.: if and only if it transforms the lexicographically positive
318 distance vectors to lexicographically positive vectors. Note that
319 a unimodular matrix must transform the zero vector (and only it) to
320 the zero vector." S.Muchnick. */
321
322 static bool
323 lambda_transform_legal_p (lambda_trans_matrix trans,
324 int nb_loops,
325 vec<ddr_p> dependence_relations)
326 {
327 unsigned int i, j;
328 lambda_vector distres;
329 struct data_dependence_relation *ddr;
330
331 gcc_assert (LTM_COLSIZE (trans) == nb_loops
332 && LTM_ROWSIZE (trans) == nb_loops);
333
334 /* When there are no dependences, the transformation is correct. */
335 if (dependence_relations.length () == 0)
336 return true;
337
338 ddr = dependence_relations[0];
339 if (ddr == NULL)
340 return true;
341
342 /* When there is an unknown relation in the dependence_relations, we
343 know that it is no worth looking at this loop nest: give up. */
344 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
345 return false;
346
347 distres = lambda_vector_new (nb_loops);
348
349 /* For each distance vector in the dependence graph. */
350 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
351 {
352 /* Don't care about relations for which we know that there is no
353 dependence, nor about read-read (aka. output-dependences):
354 these data accesses can happen in any order. */
355 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
356 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
357 continue;
358
359 /* Conservatively answer: "this transformation is not valid". */
360 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
361 return false;
362
363 /* If the dependence could not be captured by a distance vector,
364 conservatively answer that the transform is not valid. */
365 if (DDR_NUM_DIST_VECTS (ddr) == 0)
366 return false;
367
368 /* Compute trans.dist_vect */
369 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
370 {
371 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
372 DDR_DIST_VECT (ddr, j), distres);
373
374 if (!lambda_vector_lexico_pos (distres, nb_loops))
375 return false;
376 }
377 }
378 return true;
379 }
380
381 /* Data dependency analysis. Returns true if the iterations of LOOP
382 are independent on each other (that is, if we can execute them
383 in parallel). */
384
385 static bool
386 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
387 {
388 vec<loop_p> loop_nest;
389 vec<ddr_p> dependence_relations;
390 vec<data_reference_p> datarefs;
391 lambda_trans_matrix trans;
392 bool ret = false;
393
394 if (dump_file && (dump_flags & TDF_DETAILS))
395 {
396 fprintf (dump_file, "Considering loop %d\n", loop->num);
397 if (!loop->inner)
398 fprintf (dump_file, "loop is innermost\n");
399 else
400 fprintf (dump_file, "loop NOT innermost\n");
401 }
402
403 /* Check for problems with dependences. If the loop can be reversed,
404 the iterations are independent. */
405 datarefs.create (10);
406 dependence_relations.create (10 * 10);
407 loop_nest.create (3);
408 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
409 &dependence_relations))
410 {
411 if (dump_file && (dump_flags & TDF_DETAILS))
412 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
413 ret = false;
414 goto end;
415 }
416 if (dump_file && (dump_flags & TDF_DETAILS))
417 dump_data_dependence_relations (dump_file, dependence_relations);
418
419 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
420 LTM_MATRIX (trans)[0][0] = -1;
421
422 if (lambda_transform_legal_p (trans, 1, dependence_relations))
423 {
424 ret = true;
425 if (dump_file && (dump_flags & TDF_DETAILS))
426 fprintf (dump_file, " SUCCESS: may be parallelized\n");
427 }
428 else if (dump_file && (dump_flags & TDF_DETAILS))
429 fprintf (dump_file,
430 " FAILED: data dependencies exist across iterations\n");
431
432 end:
433 loop_nest.release ();
434 free_dependence_relations (dependence_relations);
435 free_data_refs (datarefs);
436
437 return ret;
438 }
439
440 /* Return true when LOOP contains basic blocks marked with the
441 BB_IRREDUCIBLE_LOOP flag. */
442
443 static inline bool
444 loop_has_blocks_with_irreducible_flag (struct loop *loop)
445 {
446 unsigned i;
447 basic_block *bbs = get_loop_body_in_dom_order (loop);
448 bool res = true;
449
450 for (i = 0; i < loop->num_nodes; i++)
451 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
452 goto end;
453
454 res = false;
455 end:
456 free (bbs);
457 return res;
458 }
459
460 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
461 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
462 to their addresses that can be reused. The address of OBJ is known to
463 be invariant in the whole function. Other needed statements are placed
464 right before GSI. */
465
466 static tree
467 take_address_of (tree obj, tree type, edge entry,
468 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
469 {
470 int uid;
471 int_tree_map **dslot;
472 struct int_tree_map ielt, *nielt;
473 tree *var_p, name, addr;
474 gimple stmt;
475 gimple_seq stmts;
476
477 /* Since the address of OBJ is invariant, the trees may be shared.
478 Avoid rewriting unrelated parts of the code. */
479 obj = unshare_expr (obj);
480 for (var_p = &obj;
481 handled_component_p (*var_p);
482 var_p = &TREE_OPERAND (*var_p, 0))
483 continue;
484
485 /* Canonicalize the access to base on a MEM_REF. */
486 if (DECL_P (*var_p))
487 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
488
489 /* Assign a canonical SSA name to the address of the base decl used
490 in the address and share it for all accesses and addresses based
491 on it. */
492 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
493 ielt.uid = uid;
494 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
495 if (!*dslot)
496 {
497 if (gsi == NULL)
498 return NULL;
499 addr = TREE_OPERAND (*var_p, 0);
500 const char *obj_name
501 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
502 if (obj_name)
503 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
504 else
505 name = make_ssa_name (TREE_TYPE (addr), NULL);
506 stmt = gimple_build_assign (name, addr);
507 gsi_insert_on_edge_immediate (entry, stmt);
508
509 nielt = XNEW (struct int_tree_map);
510 nielt->uid = uid;
511 nielt->to = name;
512 *dslot = nielt;
513 }
514 else
515 name = (*dslot)->to;
516
517 /* Express the address in terms of the canonical SSA name. */
518 TREE_OPERAND (*var_p, 0) = name;
519 if (gsi == NULL)
520 return build_fold_addr_expr_with_type (obj, type);
521
522 name = force_gimple_operand (build_addr (obj, current_function_decl),
523 &stmts, true, NULL_TREE);
524 if (!gimple_seq_empty_p (stmts))
525 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
526
527 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
528 {
529 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
530 NULL_TREE);
531 if (!gimple_seq_empty_p (stmts))
532 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
533 }
534
535 return name;
536 }
537
538 /* Callback for htab_traverse. Create the initialization statement
539 for reduction described in SLOT, and place it at the preheader of
540 the loop described in DATA. */
541
542 int
543 initialize_reductions (reduction_info **slot, struct loop *loop)
544 {
545 tree init, c;
546 tree bvar, type, arg;
547 edge e;
548
549 struct reduction_info *const reduc = *slot;
550
551 /* Create initialization in preheader:
552 reduction_variable = initialization value of reduction. */
553
554 /* In the phi node at the header, replace the argument coming
555 from the preheader with the reduction initialization value. */
556
557 /* Create a new variable to initialize the reduction. */
558 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
559 bvar = create_tmp_var (type, "reduction");
560
561 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
562 OMP_CLAUSE_REDUCTION);
563 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
564 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
565
566 init = omp_reduction_init (c, TREE_TYPE (bvar));
567 reduc->init = init;
568
569 /* Replace the argument representing the initialization value
570 with the initialization value for the reduction (neutral
571 element for the particular operation, e.g. 0 for PLUS_EXPR,
572 1 for MULT_EXPR, etc).
573 Keep the old value in a new variable "reduction_initial",
574 that will be taken in consideration after the parallel
575 computing is done. */
576
577 e = loop_preheader_edge (loop);
578 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
579 /* Create new variable to hold the initial value. */
580
581 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
582 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
583 reduc->initial_value = arg;
584 return 1;
585 }
586
587 struct elv_data
588 {
589 struct walk_stmt_info info;
590 edge entry;
591 int_tree_htab_type decl_address;
592 gimple_stmt_iterator *gsi;
593 bool changed;
594 bool reset;
595 };
596
597 /* Eliminates references to local variables in *TP out of the single
598 entry single exit region starting at DTA->ENTRY.
599 DECL_ADDRESS contains addresses of the references that had their
600 address taken already. If the expression is changed, CHANGED is
601 set to true. Callback for walk_tree. */
602
603 static tree
604 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
605 {
606 struct elv_data *const dta = (struct elv_data *) data;
607 tree t = *tp, var, addr, addr_type, type, obj;
608
609 if (DECL_P (t))
610 {
611 *walk_subtrees = 0;
612
613 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
614 return NULL_TREE;
615
616 type = TREE_TYPE (t);
617 addr_type = build_pointer_type (type);
618 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
619 dta->gsi);
620 if (dta->gsi == NULL && addr == NULL_TREE)
621 {
622 dta->reset = true;
623 return NULL_TREE;
624 }
625
626 *tp = build_simple_mem_ref (addr);
627
628 dta->changed = true;
629 return NULL_TREE;
630 }
631
632 if (TREE_CODE (t) == ADDR_EXPR)
633 {
634 /* ADDR_EXPR may appear in two contexts:
635 -- as a gimple operand, when the address taken is a function invariant
636 -- as gimple rhs, when the resulting address in not a function
637 invariant
638 We do not need to do anything special in the latter case (the base of
639 the memory reference whose address is taken may be replaced in the
640 DECL_P case). The former case is more complicated, as we need to
641 ensure that the new address is still a gimple operand. Thus, it
642 is not sufficient to replace just the base of the memory reference --
643 we need to move the whole computation of the address out of the
644 loop. */
645 if (!is_gimple_val (t))
646 return NULL_TREE;
647
648 *walk_subtrees = 0;
649 obj = TREE_OPERAND (t, 0);
650 var = get_base_address (obj);
651 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
652 return NULL_TREE;
653
654 addr_type = TREE_TYPE (t);
655 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
656 dta->gsi);
657 if (dta->gsi == NULL && addr == NULL_TREE)
658 {
659 dta->reset = true;
660 return NULL_TREE;
661 }
662 *tp = addr;
663
664 dta->changed = true;
665 return NULL_TREE;
666 }
667
668 if (!EXPR_P (t))
669 *walk_subtrees = 0;
670
671 return NULL_TREE;
672 }
673
674 /* Moves the references to local variables in STMT at *GSI out of the single
675 entry single exit region starting at ENTRY. DECL_ADDRESS contains
676 addresses of the references that had their address taken
677 already. */
678
679 static void
680 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
681 int_tree_htab_type decl_address)
682 {
683 struct elv_data dta;
684 gimple stmt = gsi_stmt (*gsi);
685
686 memset (&dta.info, '\0', sizeof (dta.info));
687 dta.entry = entry;
688 dta.decl_address = decl_address;
689 dta.changed = false;
690 dta.reset = false;
691
692 if (gimple_debug_bind_p (stmt))
693 {
694 dta.gsi = NULL;
695 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
696 eliminate_local_variables_1, &dta.info, NULL);
697 if (dta.reset)
698 {
699 gimple_debug_bind_reset_value (stmt);
700 dta.changed = true;
701 }
702 }
703 else if (gimple_clobber_p (stmt))
704 {
705 stmt = gimple_build_nop ();
706 gsi_replace (gsi, stmt, false);
707 dta.changed = true;
708 }
709 else
710 {
711 dta.gsi = gsi;
712 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
713 }
714
715 if (dta.changed)
716 update_stmt (stmt);
717 }
718
719 /* Eliminates the references to local variables from the single entry
720 single exit region between the ENTRY and EXIT edges.
721
722 This includes:
723 1) Taking address of a local variable -- these are moved out of the
724 region (and temporary variable is created to hold the address if
725 necessary).
726
727 2) Dereferencing a local variable -- these are replaced with indirect
728 references. */
729
730 static void
731 eliminate_local_variables (edge entry, edge exit)
732 {
733 basic_block bb;
734 vec<basic_block> body;
735 body.create (3);
736 unsigned i;
737 gimple_stmt_iterator gsi;
738 bool has_debug_stmt = false;
739 int_tree_htab_type decl_address;
740 decl_address.create (10);
741 basic_block entry_bb = entry->src;
742 basic_block exit_bb = exit->dest;
743
744 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
745
746 FOR_EACH_VEC_ELT (body, i, bb)
747 if (bb != entry_bb && bb != exit_bb)
748 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
749 if (is_gimple_debug (gsi_stmt (gsi)))
750 {
751 if (gimple_debug_bind_p (gsi_stmt (gsi)))
752 has_debug_stmt = true;
753 }
754 else
755 eliminate_local_variables_stmt (entry, &gsi, decl_address);
756
757 if (has_debug_stmt)
758 FOR_EACH_VEC_ELT (body, i, bb)
759 if (bb != entry_bb && bb != exit_bb)
760 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
761 if (gimple_debug_bind_p (gsi_stmt (gsi)))
762 eliminate_local_variables_stmt (entry, &gsi, decl_address);
763
764 decl_address.dispose ();
765 body.release ();
766 }
767
768 /* Returns true if expression EXPR is not defined between ENTRY and
769 EXIT, i.e. if all its operands are defined outside of the region. */
770
771 static bool
772 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
773 {
774 basic_block entry_bb = entry->src;
775 basic_block exit_bb = exit->dest;
776 basic_block def_bb;
777
778 if (is_gimple_min_invariant (expr))
779 return true;
780
781 if (TREE_CODE (expr) == SSA_NAME)
782 {
783 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
784 if (def_bb
785 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
786 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
787 return false;
788
789 return true;
790 }
791
792 return false;
793 }
794
795 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
796 The copies are stored to NAME_COPIES, if NAME was already duplicated,
797 its duplicate stored in NAME_COPIES is returned.
798
799 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
800 duplicated, storing the copies in DECL_COPIES. */
801
802 static tree
803 separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
804 int_tree_htab_type decl_copies, bool copy_name_p)
805 {
806 tree copy, var, var_copy;
807 unsigned idx, uid, nuid;
808 struct int_tree_map ielt, *nielt;
809 struct name_to_copy_elt elt, *nelt;
810 name_to_copy_elt **slot;
811 int_tree_map **dslot;
812
813 if (TREE_CODE (name) != SSA_NAME)
814 return name;
815
816 idx = SSA_NAME_VERSION (name);
817 elt.version = idx;
818 slot = name_copies.find_slot_with_hash (&elt, idx,
819 copy_name_p ? INSERT : NO_INSERT);
820 if (slot && *slot)
821 return (*slot)->new_name;
822
823 if (copy_name_p)
824 {
825 copy = duplicate_ssa_name (name, NULL);
826 nelt = XNEW (struct name_to_copy_elt);
827 nelt->version = idx;
828 nelt->new_name = copy;
829 nelt->field = NULL_TREE;
830 *slot = nelt;
831 }
832 else
833 {
834 gcc_assert (!slot);
835 copy = name;
836 }
837
838 var = SSA_NAME_VAR (name);
839 if (!var)
840 return copy;
841
842 uid = DECL_UID (var);
843 ielt.uid = uid;
844 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
845 if (!*dslot)
846 {
847 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
848 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
849 nielt = XNEW (struct int_tree_map);
850 nielt->uid = uid;
851 nielt->to = var_copy;
852 *dslot = nielt;
853
854 /* Ensure that when we meet this decl next time, we won't duplicate
855 it again. */
856 nuid = DECL_UID (var_copy);
857 ielt.uid = nuid;
858 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
859 gcc_assert (!*dslot);
860 nielt = XNEW (struct int_tree_map);
861 nielt->uid = nuid;
862 nielt->to = var_copy;
863 *dslot = nielt;
864 }
865 else
866 var_copy = ((struct int_tree_map *) *dslot)->to;
867
868 replace_ssa_name_symbol (copy, var_copy);
869 return copy;
870 }
871
872 /* Finds the ssa names used in STMT that are defined outside the
873 region between ENTRY and EXIT and replaces such ssa names with
874 their duplicates. The duplicates are stored to NAME_COPIES. Base
875 decls of all ssa names used in STMT (including those defined in
876 LOOP) are replaced with the new temporary variables; the
877 replacement decls are stored in DECL_COPIES. */
878
879 static void
880 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
881 name_to_copy_table_type name_copies,
882 int_tree_htab_type decl_copies)
883 {
884 use_operand_p use;
885 def_operand_p def;
886 ssa_op_iter oi;
887 tree name, copy;
888 bool copy_name_p;
889
890 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
891 {
892 name = DEF_FROM_PTR (def);
893 gcc_assert (TREE_CODE (name) == SSA_NAME);
894 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
895 false);
896 gcc_assert (copy == name);
897 }
898
899 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
900 {
901 name = USE_FROM_PTR (use);
902 if (TREE_CODE (name) != SSA_NAME)
903 continue;
904
905 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
906 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
907 copy_name_p);
908 SET_USE (use, copy);
909 }
910 }
911
912 /* Finds the ssa names used in STMT that are defined outside the
913 region between ENTRY and EXIT and replaces such ssa names with
914 their duplicates. The duplicates are stored to NAME_COPIES. Base
915 decls of all ssa names used in STMT (including those defined in
916 LOOP) are replaced with the new temporary variables; the
917 replacement decls are stored in DECL_COPIES. */
918
919 static bool
920 separate_decls_in_region_debug (gimple stmt,
921 name_to_copy_table_type name_copies,
922 int_tree_htab_type decl_copies)
923 {
924 use_operand_p use;
925 ssa_op_iter oi;
926 tree var, name;
927 struct int_tree_map ielt;
928 struct name_to_copy_elt elt;
929 name_to_copy_elt **slot;
930 int_tree_map **dslot;
931
932 if (gimple_debug_bind_p (stmt))
933 var = gimple_debug_bind_get_var (stmt);
934 else if (gimple_debug_source_bind_p (stmt))
935 var = gimple_debug_source_bind_get_var (stmt);
936 else
937 return true;
938 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
939 return true;
940 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
941 ielt.uid = DECL_UID (var);
942 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
943 if (!dslot)
944 return true;
945 if (gimple_debug_bind_p (stmt))
946 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
947 else if (gimple_debug_source_bind_p (stmt))
948 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
949
950 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
951 {
952 name = USE_FROM_PTR (use);
953 if (TREE_CODE (name) != SSA_NAME)
954 continue;
955
956 elt.version = SSA_NAME_VERSION (name);
957 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
958 if (!slot)
959 {
960 gimple_debug_bind_reset_value (stmt);
961 update_stmt (stmt);
962 break;
963 }
964
965 SET_USE (use, (*slot)->new_name);
966 }
967
968 return false;
969 }
970
971 /* Callback for htab_traverse. Adds a field corresponding to the reduction
972 specified in SLOT. The type is passed in DATA. */
973
974 int
975 add_field_for_reduction (reduction_info **slot, tree type)
976 {
977
978 struct reduction_info *const red = *slot;
979 tree var = gimple_assign_lhs (red->reduc_stmt);
980 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
981 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
982
983 insert_field_into_struct (type, field);
984
985 red->field = field;
986
987 return 1;
988 }
989
990 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
991 described in SLOT. The type is passed in DATA. */
992
993 int
994 add_field_for_name (name_to_copy_elt **slot, tree type)
995 {
996 struct name_to_copy_elt *const elt = *slot;
997 tree name = ssa_name (elt->version);
998 tree field = build_decl (UNKNOWN_LOCATION,
999 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1000 TREE_TYPE (name));
1001
1002 insert_field_into_struct (type, field);
1003 elt->field = field;
1004
1005 return 1;
1006 }
1007
1008 /* Callback for htab_traverse. A local result is the intermediate result
1009 computed by a single
1010 thread, or the initial value in case no iteration was executed.
1011 This function creates a phi node reflecting these values.
1012 The phi's result will be stored in NEW_PHI field of the
1013 reduction's data structure. */
1014
1015 int
1016 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1017 {
1018 struct reduction_info *const reduc = *slot;
1019 edge e;
1020 gimple new_phi;
1021 basic_block store_bb;
1022 tree local_res;
1023 source_location locus;
1024
1025 /* STORE_BB is the block where the phi
1026 should be stored. It is the destination of the loop exit.
1027 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1028 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1029
1030 /* STORE_BB has two predecessors. One coming from the loop
1031 (the reduction's result is computed at the loop),
1032 and another coming from a block preceding the loop,
1033 when no iterations
1034 are executed (the initial value should be taken). */
1035 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1036 e = EDGE_PRED (store_bb, 1);
1037 else
1038 e = EDGE_PRED (store_bb, 0);
1039 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1040 locus = gimple_location (reduc->reduc_stmt);
1041 new_phi = create_phi_node (local_res, store_bb);
1042 add_phi_arg (new_phi, reduc->init, e, locus);
1043 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1044 FALLTHRU_EDGE (loop->latch), locus);
1045 reduc->new_phi = new_phi;
1046
1047 return 1;
1048 }
1049
1050 struct clsn_data
1051 {
1052 tree store;
1053 tree load;
1054
1055 basic_block store_bb;
1056 basic_block load_bb;
1057 };
1058
1059 /* Callback for htab_traverse. Create an atomic instruction for the
1060 reduction described in SLOT.
1061 DATA annotates the place in memory the atomic operation relates to,
1062 and the basic block it needs to be generated in. */
1063
1064 int
1065 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1066 {
1067 struct reduction_info *const reduc = *slot;
1068 gimple_stmt_iterator gsi;
1069 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1070 tree load_struct;
1071 basic_block bb;
1072 basic_block new_bb;
1073 edge e;
1074 tree t, addr, ref, x;
1075 tree tmp_load, name;
1076 gimple load;
1077
1078 load_struct = build_simple_mem_ref (clsn_data->load);
1079 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1080
1081 addr = build_addr (t, current_function_decl);
1082
1083 /* Create phi node. */
1084 bb = clsn_data->load_bb;
1085
1086 e = split_block (bb, t);
1087 new_bb = e->dest;
1088
1089 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1090 tmp_load = make_ssa_name (tmp_load, NULL);
1091 load = gimple_build_omp_atomic_load (tmp_load, addr);
1092 SSA_NAME_DEF_STMT (tmp_load) = load;
1093 gsi = gsi_start_bb (new_bb);
1094 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1095
1096 e = split_block (new_bb, load);
1097 new_bb = e->dest;
1098 gsi = gsi_start_bb (new_bb);
1099 ref = tmp_load;
1100 x = fold_build2 (reduc->reduction_code,
1101 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1102 PHI_RESULT (reduc->new_phi));
1103
1104 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1105 GSI_CONTINUE_LINKING);
1106
1107 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1108 return 1;
1109 }
1110
1111 /* Create the atomic operation at the join point of the threads.
1112 REDUCTION_LIST describes the reductions in the LOOP.
1113 LD_ST_DATA describes the shared data structure where
1114 shared data is stored in and loaded from. */
1115 static void
1116 create_call_for_reduction (struct loop *loop,
1117 reduction_info_table_type reduction_list,
1118 struct clsn_data *ld_st_data)
1119 {
1120 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
1121 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1122 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1123 reduction_list
1124 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1125 }
1126
1127 /* Callback for htab_traverse. Loads the final reduction value at the
1128 join point of all threads, and inserts it in the right place. */
1129
1130 int
1131 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1132 {
1133 struct reduction_info *const red = *slot;
1134 gimple stmt;
1135 gimple_stmt_iterator gsi;
1136 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1137 tree load_struct;
1138 tree name;
1139 tree x;
1140
1141 gsi = gsi_after_labels (clsn_data->load_bb);
1142 load_struct = build_simple_mem_ref (clsn_data->load);
1143 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1144 NULL_TREE);
1145
1146 x = load_struct;
1147 name = PHI_RESULT (red->keep_res);
1148 stmt = gimple_build_assign (name, x);
1149 SSA_NAME_DEF_STMT (name) = stmt;
1150
1151 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1152
1153 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1154 !gsi_end_p (gsi); gsi_next (&gsi))
1155 if (gsi_stmt (gsi) == red->keep_res)
1156 {
1157 remove_phi_node (&gsi, false);
1158 return 1;
1159 }
1160 gcc_unreachable ();
1161 }
1162
1163 /* Load the reduction result that was stored in LD_ST_DATA.
1164 REDUCTION_LIST describes the list of reductions that the
1165 loads should be generated for. */
1166 static void
1167 create_final_loads_for_reduction (reduction_info_table_type reduction_list,
1168 struct clsn_data *ld_st_data)
1169 {
1170 gimple_stmt_iterator gsi;
1171 tree t;
1172 gimple stmt;
1173
1174 gsi = gsi_after_labels (ld_st_data->load_bb);
1175 t = build_fold_addr_expr (ld_st_data->store);
1176 stmt = gimple_build_assign (ld_st_data->load, t);
1177
1178 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1179 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
1180
1181 reduction_list
1182 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1183
1184 }
1185
1186 /* Callback for htab_traverse. Store the neutral value for the
1187 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1188 1 for MULT_EXPR, etc. into the reduction field.
1189 The reduction is specified in SLOT. The store information is
1190 passed in DATA. */
1191
1192 int
1193 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1194 {
1195 struct reduction_info *const red = *slot;
1196 tree t;
1197 gimple stmt;
1198 gimple_stmt_iterator gsi;
1199 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1200
1201 gsi = gsi_last_bb (clsn_data->store_bb);
1202 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1203 stmt = gimple_build_assign (t, red->initial_value);
1204 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1205
1206 return 1;
1207 }
1208
1209 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1210 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1211 specified in SLOT. */
1212
1213 int
1214 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1215 struct clsn_data *clsn_data)
1216 {
1217 struct name_to_copy_elt *const elt = *slot;
1218 tree t;
1219 gimple stmt;
1220 gimple_stmt_iterator gsi;
1221 tree type = TREE_TYPE (elt->new_name);
1222 tree load_struct;
1223
1224 gsi = gsi_last_bb (clsn_data->store_bb);
1225 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1226 stmt = gimple_build_assign (t, ssa_name (elt->version));
1227 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1228
1229 gsi = gsi_last_bb (clsn_data->load_bb);
1230 load_struct = build_simple_mem_ref (clsn_data->load);
1231 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1232 stmt = gimple_build_assign (elt->new_name, t);
1233 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1234 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1235
1236 return 1;
1237 }
1238
1239 /* Moves all the variables used in LOOP and defined outside of it (including
1240 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1241 name) to a structure created for this purpose. The code
1242
1243 while (1)
1244 {
1245 use (a);
1246 use (b);
1247 }
1248
1249 is transformed this way:
1250
1251 bb0:
1252 old.a = a;
1253 old.b = b;
1254
1255 bb1:
1256 a' = new->a;
1257 b' = new->b;
1258 while (1)
1259 {
1260 use (a');
1261 use (b');
1262 }
1263
1264 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1265 pointer `new' is intentionally not initialized (the loop will be split to a
1266 separate function later, and `new' will be initialized from its arguments).
1267 LD_ST_DATA holds information about the shared data structure used to pass
1268 information among the threads. It is initialized here, and
1269 gen_parallel_loop will pass it to create_call_for_reduction that
1270 needs this information. REDUCTION_LIST describes the reductions
1271 in LOOP. */
1272
1273 static void
1274 separate_decls_in_region (edge entry, edge exit,
1275 reduction_info_table_type reduction_list,
1276 tree *arg_struct, tree *new_arg_struct,
1277 struct clsn_data *ld_st_data)
1278
1279 {
1280 basic_block bb1 = split_edge (entry);
1281 basic_block bb0 = single_pred (bb1);
1282 name_to_copy_table_type name_copies;
1283 name_copies.create (10);
1284 int_tree_htab_type decl_copies;
1285 decl_copies.create (10);
1286 unsigned i;
1287 tree type, type_name, nvar;
1288 gimple_stmt_iterator gsi;
1289 struct clsn_data clsn_data;
1290 vec<basic_block> body;
1291 body.create (3);
1292 basic_block bb;
1293 basic_block entry_bb = bb1;
1294 basic_block exit_bb = exit->dest;
1295 bool has_debug_stmt = false;
1296
1297 entry = single_succ_edge (entry_bb);
1298 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1299
1300 FOR_EACH_VEC_ELT (body, i, bb)
1301 {
1302 if (bb != entry_bb && bb != exit_bb)
1303 {
1304 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1305 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1306 name_copies, decl_copies);
1307
1308 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1309 {
1310 gimple stmt = gsi_stmt (gsi);
1311
1312 if (is_gimple_debug (stmt))
1313 has_debug_stmt = true;
1314 else
1315 separate_decls_in_region_stmt (entry, exit, stmt,
1316 name_copies, decl_copies);
1317 }
1318 }
1319 }
1320
1321 /* Now process debug bind stmts. We must not create decls while
1322 processing debug stmts, so we defer their processing so as to
1323 make sure we will have debug info for as many variables as
1324 possible (all of those that were dealt with in the loop above),
1325 and discard those for which we know there's nothing we can
1326 do. */
1327 if (has_debug_stmt)
1328 FOR_EACH_VEC_ELT (body, i, bb)
1329 if (bb != entry_bb && bb != exit_bb)
1330 {
1331 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1332 {
1333 gimple stmt = gsi_stmt (gsi);
1334
1335 if (is_gimple_debug (stmt))
1336 {
1337 if (separate_decls_in_region_debug (stmt, name_copies,
1338 decl_copies))
1339 {
1340 gsi_remove (&gsi, true);
1341 continue;
1342 }
1343 }
1344
1345 gsi_next (&gsi);
1346 }
1347 }
1348
1349 body.release ();
1350
1351 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
1352 {
1353 /* It may happen that there is nothing to copy (if there are only
1354 loop carried and external variables in the loop). */
1355 *arg_struct = NULL;
1356 *new_arg_struct = NULL;
1357 }
1358 else
1359 {
1360 /* Create the type for the structure to store the ssa names to. */
1361 type = lang_hooks.types.make_type (RECORD_TYPE);
1362 type_name = build_decl (UNKNOWN_LOCATION,
1363 TYPE_DECL, create_tmp_var_name (".paral_data"),
1364 type);
1365 TYPE_NAME (type) = type_name;
1366
1367 name_copies.traverse <tree, add_field_for_name> (type);
1368 if (reduction_list.is_created () && reduction_list.elements () > 0)
1369 {
1370 /* Create the fields for reductions. */
1371 reduction_list.traverse <tree, add_field_for_reduction> (type);
1372 }
1373 layout_type (type);
1374
1375 /* Create the loads and stores. */
1376 *arg_struct = create_tmp_var (type, ".paral_data_store");
1377 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1378 *new_arg_struct = make_ssa_name (nvar, NULL);
1379
1380 ld_st_data->store = *arg_struct;
1381 ld_st_data->load = *new_arg_struct;
1382 ld_st_data->store_bb = bb0;
1383 ld_st_data->load_bb = bb1;
1384
1385 name_copies
1386 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1387 (ld_st_data);
1388
1389 /* Load the calculation from memory (after the join of the threads). */
1390
1391 if (reduction_list.is_created () && reduction_list.elements () > 0)
1392 {
1393 reduction_list
1394 .traverse <struct clsn_data *, create_stores_for_reduction>
1395 (ld_st_data);
1396 clsn_data.load = make_ssa_name (nvar, NULL);
1397 clsn_data.load_bb = exit->dest;
1398 clsn_data.store = ld_st_data->store;
1399 create_final_loads_for_reduction (reduction_list, &clsn_data);
1400 }
1401 }
1402
1403 decl_copies.dispose ();
1404 name_copies.dispose ();
1405 }
1406
1407 /* Bitmap containing uids of functions created by parallelization. We cannot
1408 allocate it from the default obstack, as it must live across compilation
1409 of several functions; we make it gc allocated instead. */
1410
1411 static GTY(()) bitmap parallelized_functions;
1412
1413 /* Returns true if FN was created by create_loop_fn. */
1414
1415 bool
1416 parallelized_function_p (tree fn)
1417 {
1418 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1419 return false;
1420
1421 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1422 }
1423
1424 /* Creates and returns an empty function that will receive the body of
1425 a parallelized loop. */
1426
1427 static tree
1428 create_loop_fn (location_t loc)
1429 {
1430 char buf[100];
1431 char *tname;
1432 tree decl, type, name, t;
1433 struct function *act_cfun = cfun;
1434 static unsigned loopfn_num;
1435
1436 loc = LOCATION_LOCUS (loc);
1437 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1438 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1439 clean_symbol_name (tname);
1440 name = get_identifier (tname);
1441 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1442
1443 decl = build_decl (loc, FUNCTION_DECL, name, type);
1444 if (!parallelized_functions)
1445 parallelized_functions = BITMAP_GGC_ALLOC ();
1446 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1447
1448 TREE_STATIC (decl) = 1;
1449 TREE_USED (decl) = 1;
1450 DECL_ARTIFICIAL (decl) = 1;
1451 DECL_IGNORED_P (decl) = 0;
1452 TREE_PUBLIC (decl) = 0;
1453 DECL_UNINLINABLE (decl) = 1;
1454 DECL_EXTERNAL (decl) = 0;
1455 DECL_CONTEXT (decl) = NULL_TREE;
1456 DECL_INITIAL (decl) = make_node (BLOCK);
1457
1458 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1459 DECL_ARTIFICIAL (t) = 1;
1460 DECL_IGNORED_P (t) = 1;
1461 DECL_RESULT (decl) = t;
1462
1463 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1464 ptr_type_node);
1465 DECL_ARTIFICIAL (t) = 1;
1466 DECL_ARG_TYPE (t) = ptr_type_node;
1467 DECL_CONTEXT (t) = decl;
1468 TREE_USED (t) = 1;
1469 DECL_ARGUMENTS (decl) = t;
1470
1471 allocate_struct_function (decl, false);
1472
1473 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1474 it. */
1475 set_cfun (act_cfun);
1476
1477 return decl;
1478 }
1479
1480 /* Moves the exit condition of LOOP to the beginning of its header, and
1481 duplicates the part of the last iteration that gets disabled to the
1482 exit of the loop. NIT is the number of iterations of the loop
1483 (used to initialize the variables in the duplicated part).
1484
1485 TODO: the common case is that latch of the loop is empty and immediately
1486 follows the loop exit. In this case, it would be better not to copy the
1487 body of the loop, but only move the entry of the loop directly before the
1488 exit check and increase the number of iterations of the loop by one.
1489 This may need some additional preconditioning in case NIT = ~0.
1490 REDUCTION_LIST describes the reductions in LOOP. */
1491
1492 static void
1493 transform_to_exit_first_loop (struct loop *loop,
1494 reduction_info_table_type reduction_list,
1495 tree nit)
1496 {
1497 basic_block *bbs, *nbbs, ex_bb, orig_header;
1498 unsigned n;
1499 bool ok;
1500 edge exit = single_dom_exit (loop), hpred;
1501 tree control, control_name, res, t;
1502 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1503 gimple_stmt_iterator gsi;
1504 tree nit_1;
1505
1506 split_block_after_labels (loop->header);
1507 orig_header = single_succ (loop->header);
1508 hpred = single_succ_edge (loop->header);
1509
1510 cond_stmt = last_stmt (exit->src);
1511 control = gimple_cond_lhs (cond_stmt);
1512 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1513
1514 /* Make sure that we have phi nodes on exit for all loop header phis
1515 (create_parallel_loop requires that). */
1516 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1517 {
1518 phi = gsi_stmt (gsi);
1519 res = PHI_RESULT (phi);
1520 t = copy_ssa_name (res, phi);
1521 SET_PHI_RESULT (phi, t);
1522 nphi = create_phi_node (res, orig_header);
1523 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1524
1525 if (res == control)
1526 {
1527 gimple_cond_set_lhs (cond_stmt, t);
1528 update_stmt (cond_stmt);
1529 control = t;
1530 }
1531 }
1532
1533 bbs = get_loop_body_in_dom_order (loop);
1534
1535 for (n = 0; bbs[n] != exit->src; n++)
1536 continue;
1537 nbbs = XNEWVEC (basic_block, n);
1538 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1539 bbs + 1, n, nbbs);
1540 gcc_assert (ok);
1541 free (bbs);
1542 ex_bb = nbbs[0];
1543 free (nbbs);
1544
1545 /* Other than reductions, the only gimple reg that should be copied
1546 out of the loop is the control variable. */
1547 exit = single_dom_exit (loop);
1548 control_name = NULL_TREE;
1549 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1550 {
1551 phi = gsi_stmt (gsi);
1552 res = PHI_RESULT (phi);
1553 if (virtual_operand_p (res))
1554 {
1555 gsi_next (&gsi);
1556 continue;
1557 }
1558
1559 /* Check if it is a part of reduction. If it is,
1560 keep the phi at the reduction's keep_res field. The
1561 PHI_RESULT of this phi is the resulting value of the reduction
1562 variable when exiting the loop. */
1563
1564 if (reduction_list.elements () > 0)
1565 {
1566 struct reduction_info *red;
1567
1568 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1569 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1570 if (red)
1571 {
1572 red->keep_res = phi;
1573 gsi_next (&gsi);
1574 continue;
1575 }
1576 }
1577 gcc_assert (control_name == NULL_TREE
1578 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1579 control_name = res;
1580 remove_phi_node (&gsi, false);
1581 }
1582 gcc_assert (control_name != NULL_TREE);
1583
1584 /* Initialize the control variable to number of iterations
1585 according to the rhs of the exit condition. */
1586 gsi = gsi_after_labels (ex_bb);
1587 cond_nit = last_stmt (exit->src);
1588 nit_1 = gimple_cond_rhs (cond_nit);
1589 nit_1 = force_gimple_operand_gsi (&gsi,
1590 fold_convert (TREE_TYPE (control_name), nit_1),
1591 false, NULL_TREE, false, GSI_SAME_STMT);
1592 stmt = gimple_build_assign (control_name, nit_1);
1593 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1594 SSA_NAME_DEF_STMT (control_name) = stmt;
1595 }
1596
1597 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1598 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1599 NEW_DATA is the variable that should be initialized from the argument
1600 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1601 basic block containing GIMPLE_OMP_PARALLEL tree. */
1602
1603 static basic_block
1604 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1605 tree new_data, unsigned n_threads, location_t loc)
1606 {
1607 gimple_stmt_iterator gsi;
1608 basic_block bb, paral_bb, for_bb, ex_bb;
1609 tree t, param;
1610 gimple stmt, for_stmt, phi, cond_stmt;
1611 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1612 edge exit, nexit, guard, end, e;
1613
1614 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1615 bb = loop_preheader_edge (loop)->src;
1616 paral_bb = single_pred (bb);
1617 gsi = gsi_last_bb (paral_bb);
1618
1619 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1620 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1621 = build_int_cst (integer_type_node, n_threads);
1622 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1623 gimple_set_location (stmt, loc);
1624
1625 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1626
1627 /* Initialize NEW_DATA. */
1628 if (data)
1629 {
1630 gsi = gsi_after_labels (bb);
1631
1632 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1633 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1634 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1635 SSA_NAME_DEF_STMT (param) = stmt;
1636
1637 stmt = gimple_build_assign (new_data,
1638 fold_convert (TREE_TYPE (new_data), param));
1639 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1640 SSA_NAME_DEF_STMT (new_data) = stmt;
1641 }
1642
1643 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1644 bb = split_loop_exit_edge (single_dom_exit (loop));
1645 gsi = gsi_last_bb (bb);
1646 stmt = gimple_build_omp_return (false);
1647 gimple_set_location (stmt, loc);
1648 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1649
1650 /* Extract data for GIMPLE_OMP_FOR. */
1651 gcc_assert (loop->header == single_dom_exit (loop)->src);
1652 cond_stmt = last_stmt (loop->header);
1653
1654 cvar = gimple_cond_lhs (cond_stmt);
1655 cvar_base = SSA_NAME_VAR (cvar);
1656 phi = SSA_NAME_DEF_STMT (cvar);
1657 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1658 initvar = copy_ssa_name (cvar, NULL);
1659 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1660 initvar);
1661 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1662
1663 gsi = gsi_last_nondebug_bb (loop->latch);
1664 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1665 gsi_remove (&gsi, true);
1666
1667 /* Prepare cfg. */
1668 for_bb = split_edge (loop_preheader_edge (loop));
1669 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1670 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1671 gcc_assert (exit == single_dom_exit (loop));
1672
1673 guard = make_edge (for_bb, ex_bb, 0);
1674 single_succ_edge (loop->latch)->flags = 0;
1675 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1676 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1677 {
1678 source_location locus;
1679 tree def;
1680 phi = gsi_stmt (gsi);
1681 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1682
1683 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1684 locus = gimple_phi_arg_location_from_edge (stmt,
1685 loop_preheader_edge (loop));
1686 add_phi_arg (phi, def, guard, locus);
1687
1688 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1689 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1690 add_phi_arg (phi, def, end, locus);
1691 }
1692 e = redirect_edge_and_branch (exit, nexit->dest);
1693 PENDING_STMT (e) = NULL;
1694
1695 /* Emit GIMPLE_OMP_FOR. */
1696 gimple_cond_set_lhs (cond_stmt, cvar_base);
1697 type = TREE_TYPE (cvar);
1698 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1699 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1700
1701 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
1702 gimple_set_location (for_stmt, loc);
1703 gimple_omp_for_set_index (for_stmt, 0, initvar);
1704 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1705 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1706 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1707 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1708 cvar_base,
1709 build_int_cst (type, 1)));
1710
1711 gsi = gsi_last_bb (for_bb);
1712 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1713 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1714
1715 /* Emit GIMPLE_OMP_CONTINUE. */
1716 gsi = gsi_last_bb (loop->latch);
1717 stmt = gimple_build_omp_continue (cvar_next, cvar);
1718 gimple_set_location (stmt, loc);
1719 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1720 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1721
1722 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1723 gsi = gsi_last_bb (ex_bb);
1724 stmt = gimple_build_omp_return (true);
1725 gimple_set_location (stmt, loc);
1726 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1727
1728 /* After the above dom info is hosed. Re-compute it. */
1729 free_dominance_info (CDI_DOMINATORS);
1730 calculate_dominance_info (CDI_DOMINATORS);
1731
1732 return paral_bb;
1733 }
1734
1735 /* Generates code to execute the iterations of LOOP in N_THREADS
1736 threads in parallel.
1737
1738 NITER describes number of iterations of LOOP.
1739 REDUCTION_LIST describes the reductions existent in the LOOP. */
1740
1741 static void
1742 gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
1743 unsigned n_threads, struct tree_niter_desc *niter)
1744 {
1745 loop_iterator li;
1746 tree many_iterations_cond, type, nit;
1747 tree arg_struct, new_arg_struct;
1748 gimple_seq stmts;
1749 basic_block parallel_head;
1750 edge entry, exit;
1751 struct clsn_data clsn_data;
1752 unsigned prob;
1753 location_t loc;
1754 gimple cond_stmt;
1755 unsigned int m_p_thread=2;
1756
1757 /* From
1758
1759 ---------------------------------------------------------------------
1760 loop
1761 {
1762 IV = phi (INIT, IV + STEP)
1763 BODY1;
1764 if (COND)
1765 break;
1766 BODY2;
1767 }
1768 ---------------------------------------------------------------------
1769
1770 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1771 we generate the following code:
1772
1773 ---------------------------------------------------------------------
1774
1775 if (MAY_BE_ZERO
1776 || NITER < MIN_PER_THREAD * N_THREADS)
1777 goto original;
1778
1779 BODY1;
1780 store all local loop-invariant variables used in body of the loop to DATA.
1781 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1782 load the variables from DATA.
1783 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1784 BODY2;
1785 BODY1;
1786 GIMPLE_OMP_CONTINUE;
1787 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1788 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1789 goto end;
1790
1791 original:
1792 loop
1793 {
1794 IV = phi (INIT, IV + STEP)
1795 BODY1;
1796 if (COND)
1797 break;
1798 BODY2;
1799 }
1800
1801 end:
1802
1803 */
1804
1805 /* Create two versions of the loop -- in the old one, we know that the
1806 number of iterations is large enough, and we will transform it into the
1807 loop that will be split to loop_fn, the new one will be used for the
1808 remaining iterations. */
1809
1810 /* We should compute a better number-of-iterations value for outer loops.
1811 That is, if we have
1812
1813 for (i = 0; i < n; ++i)
1814 for (j = 0; j < m; ++j)
1815 ...
1816
1817 we should compute nit = n * m, not nit = n.
1818 Also may_be_zero handling would need to be adjusted. */
1819
1820 type = TREE_TYPE (niter->niter);
1821 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1822 NULL_TREE);
1823 if (stmts)
1824 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1825
1826 if (loop->inner)
1827 m_p_thread=2;
1828 else
1829 m_p_thread=MIN_PER_THREAD;
1830
1831 many_iterations_cond =
1832 fold_build2 (GE_EXPR, boolean_type_node,
1833 nit, build_int_cst (type, m_p_thread * n_threads));
1834
1835 many_iterations_cond
1836 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1837 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1838 many_iterations_cond);
1839 many_iterations_cond
1840 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1841 if (stmts)
1842 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1843 if (!is_gimple_condexpr (many_iterations_cond))
1844 {
1845 many_iterations_cond
1846 = force_gimple_operand (many_iterations_cond, &stmts,
1847 true, NULL_TREE);
1848 if (stmts)
1849 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1850 }
1851
1852 initialize_original_copy_tables ();
1853
1854 /* We assume that the loop usually iterates a lot. */
1855 prob = 4 * REG_BR_PROB_BASE / 5;
1856 loop_version (loop, many_iterations_cond, NULL,
1857 prob, prob, REG_BR_PROB_BASE - prob, true);
1858 update_ssa (TODO_update_ssa);
1859 free_original_copy_tables ();
1860
1861 /* Base all the induction variables in LOOP on a single control one. */
1862 canonicalize_loop_ivs (loop, &nit, true);
1863
1864 /* Ensure that the exit condition is the first statement in the loop. */
1865 transform_to_exit_first_loop (loop, reduction_list, nit);
1866
1867 /* Generate initializations for reductions. */
1868 if (reduction_list.elements () > 0)
1869 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
1870
1871 /* Eliminate the references to local variables from the loop. */
1872 gcc_assert (single_exit (loop));
1873 entry = loop_preheader_edge (loop);
1874 exit = single_dom_exit (loop);
1875
1876 eliminate_local_variables (entry, exit);
1877 /* In the old loop, move all variables non-local to the loop to a structure
1878 and back, and create separate decls for the variables used in loop. */
1879 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1880 &new_arg_struct, &clsn_data);
1881
1882 /* Create the parallel constructs. */
1883 loc = UNKNOWN_LOCATION;
1884 cond_stmt = last_stmt (loop->header);
1885 if (cond_stmt)
1886 loc = gimple_location (cond_stmt);
1887 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1888 new_arg_struct, n_threads, loc);
1889 if (reduction_list.elements () > 0)
1890 create_call_for_reduction (loop, reduction_list, &clsn_data);
1891
1892 scev_reset ();
1893
1894 /* Cancel the loop (it is simpler to do it here rather than to teach the
1895 expander to do it). */
1896 cancel_loop_tree (loop);
1897
1898 /* Free loop bound estimations that could contain references to
1899 removed statements. */
1900 FOR_EACH_LOOP (li, loop, 0)
1901 free_numbers_of_iterations_estimates_loop (loop);
1902
1903 /* Expand the parallel constructs. We do it directly here instead of running
1904 a separate expand_omp pass, since it is more efficient, and less likely to
1905 cause troubles with further analyses not being able to deal with the
1906 OMP trees. */
1907
1908 omp_expand_local (parallel_head);
1909 }
1910
1911 /* Returns true when LOOP contains vector phi nodes. */
1912
1913 static bool
1914 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1915 {
1916 unsigned i;
1917 basic_block *bbs = get_loop_body_in_dom_order (loop);
1918 gimple_stmt_iterator gsi;
1919 bool res = true;
1920
1921 for (i = 0; i < loop->num_nodes; i++)
1922 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1923 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1924 goto end;
1925
1926 res = false;
1927 end:
1928 free (bbs);
1929 return res;
1930 }
1931
1932 /* Create a reduction_info struct, initialize it with REDUC_STMT
1933 and PHI, insert it to the REDUCTION_LIST. */
1934
1935 static void
1936 build_new_reduction (reduction_info_table_type reduction_list,
1937 gimple reduc_stmt, gimple phi)
1938 {
1939 reduction_info **slot;
1940 struct reduction_info *new_reduction;
1941
1942 gcc_assert (reduc_stmt);
1943
1944 if (dump_file && (dump_flags & TDF_DETAILS))
1945 {
1946 fprintf (dump_file,
1947 "Detected reduction. reduction stmt is: \n");
1948 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1949 fprintf (dump_file, "\n");
1950 }
1951
1952 new_reduction = XCNEW (struct reduction_info);
1953
1954 new_reduction->reduc_stmt = reduc_stmt;
1955 new_reduction->reduc_phi = phi;
1956 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1957 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1958 slot = reduction_list.find_slot (new_reduction, INSERT);
1959 *slot = new_reduction;
1960 }
1961
1962 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1963
1964 int
1965 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
1966 {
1967 struct reduction_info *const red = *slot;
1968 gimple_set_uid (red->reduc_phi, red->reduc_version);
1969 return 1;
1970 }
1971
1972 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1973
1974 static void
1975 gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
1976 {
1977 gimple_stmt_iterator gsi;
1978 loop_vec_info simple_loop_info;
1979
1980 simple_loop_info = vect_analyze_loop_form (loop);
1981
1982 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1983 {
1984 gimple phi = gsi_stmt (gsi);
1985 affine_iv iv;
1986 tree res = PHI_RESULT (phi);
1987 bool double_reduc;
1988
1989 if (virtual_operand_p (res))
1990 continue;
1991
1992 if (!simple_iv (loop, loop, res, &iv, true)
1993 && simple_loop_info)
1994 {
1995 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1996 phi, true,
1997 &double_reduc);
1998 if (reduc_stmt && !double_reduc)
1999 build_new_reduction (reduction_list, reduc_stmt, phi);
2000 }
2001 }
2002 destroy_loop_vec_info (simple_loop_info, true);
2003
2004 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2005 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2006 only now. */
2007 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
2008 }
2009
2010 /* Try to initialize NITER for code generation part. */
2011
2012 static bool
2013 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2014 {
2015 edge exit = single_dom_exit (loop);
2016
2017 gcc_assert (exit);
2018
2019 /* We need to know # of iterations, and there should be no uses of values
2020 defined inside loop outside of it, unless the values are invariants of
2021 the loop. */
2022 if (!number_of_iterations_exit (loop, exit, niter, false))
2023 {
2024 if (dump_file && (dump_flags & TDF_DETAILS))
2025 fprintf (dump_file, " FAILED: number of iterations not known\n");
2026 return false;
2027 }
2028
2029 return true;
2030 }
2031
2032 /* Try to initialize REDUCTION_LIST for code generation part.
2033 REDUCTION_LIST describes the reductions. */
2034
2035 static bool
2036 try_create_reduction_list (loop_p loop,
2037 reduction_info_table_type reduction_list)
2038 {
2039 edge exit = single_dom_exit (loop);
2040 gimple_stmt_iterator gsi;
2041
2042 gcc_assert (exit);
2043
2044 gather_scalar_reductions (loop, reduction_list);
2045
2046
2047 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2048 {
2049 gimple phi = gsi_stmt (gsi);
2050 struct reduction_info *red;
2051 imm_use_iterator imm_iter;
2052 use_operand_p use_p;
2053 gimple reduc_phi;
2054 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2055
2056 if (!virtual_operand_p (val))
2057 {
2058 if (dump_file && (dump_flags & TDF_DETAILS))
2059 {
2060 fprintf (dump_file, "phi is ");
2061 print_gimple_stmt (dump_file, phi, 0, 0);
2062 fprintf (dump_file, "arg of phi to exit: value ");
2063 print_generic_expr (dump_file, val, 0);
2064 fprintf (dump_file, " used outside loop\n");
2065 fprintf (dump_file,
2066 " checking if it a part of reduction pattern: \n");
2067 }
2068 if (reduction_list.elements () == 0)
2069 {
2070 if (dump_file && (dump_flags & TDF_DETAILS))
2071 fprintf (dump_file,
2072 " FAILED: it is not a part of reduction.\n");
2073 return false;
2074 }
2075 reduc_phi = NULL;
2076 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2077 {
2078 if (!gimple_debug_bind_p (USE_STMT (use_p))
2079 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2080 {
2081 reduc_phi = USE_STMT (use_p);
2082 break;
2083 }
2084 }
2085 red = reduction_phi (reduction_list, reduc_phi);
2086 if (red == NULL)
2087 {
2088 if (dump_file && (dump_flags & TDF_DETAILS))
2089 fprintf (dump_file,
2090 " FAILED: it is not a part of reduction.\n");
2091 return false;
2092 }
2093 if (dump_file && (dump_flags & TDF_DETAILS))
2094 {
2095 fprintf (dump_file, "reduction phi is ");
2096 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2097 fprintf (dump_file, "reduction stmt is ");
2098 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2099 }
2100 }
2101 }
2102
2103 /* The iterations of the loop may communicate only through bivs whose
2104 iteration space can be distributed efficiently. */
2105 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2106 {
2107 gimple phi = gsi_stmt (gsi);
2108 tree def = PHI_RESULT (phi);
2109 affine_iv iv;
2110
2111 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2112 {
2113 struct reduction_info *red;
2114
2115 red = reduction_phi (reduction_list, phi);
2116 if (red == NULL)
2117 {
2118 if (dump_file && (dump_flags & TDF_DETAILS))
2119 fprintf (dump_file,
2120 " FAILED: scalar dependency between iterations\n");
2121 return false;
2122 }
2123 }
2124 }
2125
2126
2127 return true;
2128 }
2129
2130 /* Detect parallel loops and generate parallel code using libgomp
2131 primitives. Returns true if some loop was parallelized, false
2132 otherwise. */
2133
2134 bool
2135 parallelize_loops (void)
2136 {
2137 unsigned n_threads = flag_tree_parallelize_loops;
2138 bool changed = false;
2139 struct loop *loop;
2140 struct tree_niter_desc niter_desc;
2141 loop_iterator li;
2142 reduction_info_table_type reduction_list;
2143 struct obstack parloop_obstack;
2144 HOST_WIDE_INT estimated;
2145 LOC loop_loc;
2146
2147 /* Do not parallelize loops in the functions created by parallelization. */
2148 if (parallelized_function_p (cfun->decl))
2149 return false;
2150 if (cfun->has_nonlocal_label)
2151 return false;
2152
2153 gcc_obstack_init (&parloop_obstack);
2154 reduction_list.create (10);
2155 init_stmt_vec_info_vec ();
2156
2157 FOR_EACH_LOOP (li, loop, 0)
2158 {
2159 reduction_list.empty ();
2160 if (dump_file && (dump_flags & TDF_DETAILS))
2161 {
2162 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2163 if (loop->inner)
2164 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2165 else
2166 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2167 }
2168
2169 /* If we use autopar in graphite pass, we use its marked dependency
2170 checking results. */
2171 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2172 {
2173 if (dump_file && (dump_flags & TDF_DETAILS))
2174 fprintf (dump_file, "loop is not parallel according to graphite\n");
2175 continue;
2176 }
2177
2178 if (!single_dom_exit (loop))
2179 {
2180
2181 if (dump_file && (dump_flags & TDF_DETAILS))
2182 fprintf (dump_file, "loop is !single_dom_exit\n");
2183
2184 continue;
2185 }
2186
2187 if (/* And of course, the loop must be parallelizable. */
2188 !can_duplicate_loop_p (loop)
2189 || loop_has_blocks_with_irreducible_flag (loop)
2190 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2191 /* FIXME: the check for vector phi nodes could be removed. */
2192 || loop_has_vector_phi_nodes (loop))
2193 continue;
2194
2195 estimated = estimated_stmt_executions_int (loop);
2196 if (estimated == -1)
2197 estimated = max_stmt_executions_int (loop);
2198 /* FIXME: Bypass this check as graphite doesn't update the
2199 count and frequency correctly now. */
2200 if (!flag_loop_parallelize_all
2201 && ((estimated != -1
2202 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2203 /* Do not bother with loops in cold areas. */
2204 || optimize_loop_nest_for_size_p (loop)))
2205 continue;
2206
2207 if (!try_get_loop_niter (loop, &niter_desc))
2208 continue;
2209
2210 if (!try_create_reduction_list (loop, reduction_list))
2211 continue;
2212
2213 if (!flag_loop_parallelize_all
2214 && !loop_parallel_p (loop, &parloop_obstack))
2215 continue;
2216
2217 changed = true;
2218 if (dump_file && (dump_flags & TDF_DETAILS))
2219 {
2220 if (loop->inner)
2221 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2222 else
2223 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2224 loop_loc = find_loop_location (loop);
2225 if (loop_loc != UNKNOWN_LOC)
2226 fprintf (dump_file, "\nloop at %s:%d: ",
2227 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2228 }
2229 gen_parallel_loop (loop, reduction_list,
2230 n_threads, &niter_desc);
2231 }
2232
2233 free_stmt_vec_info_vec ();
2234 reduction_list.dispose ();
2235 obstack_free (&parloop_obstack, NULL);
2236
2237 /* Parallelization will cause new function calls to be inserted through
2238 which local variables will escape. Reset the points-to solution
2239 for ESCAPED. */
2240 if (changed)
2241 pt_solution_reset (&cfun->gimple_df->escaped);
2242
2243 return changed;
2244 }
2245
2246 /* Parallelization. */
2247
2248 static bool
2249 gate_tree_parallelize_loops (void)
2250 {
2251 return flag_tree_parallelize_loops > 1;
2252 }
2253
2254 static unsigned
2255 tree_parallelize_loops (void)
2256 {
2257 if (number_of_loops (cfun) <= 1)
2258 return 0;
2259
2260 if (parallelize_loops ())
2261 return TODO_cleanup_cfg | TODO_rebuild_alias;
2262 return 0;
2263 }
2264
2265 namespace {
2266
2267 const pass_data pass_data_parallelize_loops =
2268 {
2269 GIMPLE_PASS, /* type */
2270 "parloops", /* name */
2271 OPTGROUP_LOOP, /* optinfo_flags */
2272 true, /* has_gate */
2273 true, /* has_execute */
2274 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2275 ( PROP_cfg | PROP_ssa ), /* properties_required */
2276 0, /* properties_provided */
2277 0, /* properties_destroyed */
2278 0, /* todo_flags_start */
2279 TODO_verify_flow, /* todo_flags_finish */
2280 };
2281
2282 class pass_parallelize_loops : public gimple_opt_pass
2283 {
2284 public:
2285 pass_parallelize_loops (gcc::context *ctxt)
2286 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2287 {}
2288
2289 /* opt_pass methods: */
2290 bool gate () { return gate_tree_parallelize_loops (); }
2291 unsigned int execute () { return tree_parallelize_loops (); }
2292
2293 }; // class pass_parallelize_loops
2294
2295 } // anon namespace
2296
2297 gimple_opt_pass *
2298 make_pass_parallelize_loops (gcc::context *ctxt)
2299 {
2300 return new pass_parallelize_loops (ctxt);
2301 }
2302
2303
2304 #include "gt-tree-parloops.h"