]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-parloops.c
Add missing update_stmt in transform_to_exit_first_loop_alt
[thirdparty/gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "alias.h"
26 #include "symtab.h"
27 #include "options.h"
28 #include "tree.h"
29 #include "fold-const.h"
30 #include "predict.h"
31 #include "tm.h"
32 #include "hard-reg-set.h"
33 #include "function.h"
34 #include "dominance.h"
35 #include "cfg.h"
36 #include "basic-block.h"
37 #include "tree-ssa-alias.h"
38 #include "internal-fn.h"
39 #include "gimple-expr.h"
40 #include "gimple.h"
41 #include "gimplify.h"
42 #include "gimple-iterator.h"
43 #include "gimplify-me.h"
44 #include "gimple-walk.h"
45 #include "stor-layout.h"
46 #include "tree-nested.h"
47 #include "gimple-ssa.h"
48 #include "tree-cfg.h"
49 #include "tree-phinodes.h"
50 #include "ssa-iterators.h"
51 #include "stringpool.h"
52 #include "tree-ssanames.h"
53 #include "tree-ssa-loop-ivopts.h"
54 #include "tree-ssa-loop-manip.h"
55 #include "tree-ssa-loop-niter.h"
56 #include "tree-ssa-loop.h"
57 #include "tree-into-ssa.h"
58 #include "cfgloop.h"
59 #include "tree-data-ref.h"
60 #include "tree-scalar-evolution.h"
61 #include "gimple-pretty-print.h"
62 #include "tree-pass.h"
63 #include "langhooks.h"
64 #include "tree-vectorizer.h"
65 #include "tree-hasher.h"
66 #include "tree-parloops.h"
67 #include "omp-low.h"
68 #include "tree-nested.h"
69 #include "plugin-api.h"
70 #include "ipa-ref.h"
71 #include "cgraph.h"
72 #include "tree-ssa.h"
73
74 /* This pass tries to distribute iterations of loops into several threads.
75 The implementation is straightforward -- for each loop we test whether its
76 iterations are independent, and if it is the case (and some additional
77 conditions regarding profitability and correctness are satisfied), we
78 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
79 machinery do its job.
80
81 The most of the complexity is in bringing the code into shape expected
82 by the omp expanders:
83 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
84 variable and that the exit test is at the start of the loop body
85 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
86 variables by accesses through pointers, and breaking up ssa chains
87 by storing the values incoming to the parallelized loop to a structure
88 passed to the new function as an argument (something similar is done
89 in omp gimplification, unfortunately only a small part of the code
90 can be shared).
91
92 TODO:
93 -- if there are several parallelizable loops in a function, it may be
94 possible to generate the threads just once (using synchronization to
95 ensure that cross-loop dependences are obeyed).
96 -- handling of common reduction patterns for outer loops.
97
98 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
99 /*
100 Reduction handling:
101 currently we use vect_force_simple_reduction() to detect reduction patterns.
102 The code transformation will be introduced by an example.
103
104
105 parloop
106 {
107 int sum=1;
108
109 for (i = 0; i < N; i++)
110 {
111 x[i] = i + 3;
112 sum+=x[i];
113 }
114 }
115
116 gimple-like code:
117 header_bb:
118
119 # sum_29 = PHI <sum_11(5), 1(3)>
120 # i_28 = PHI <i_12(5), 0(3)>
121 D.1795_8 = i_28 + 3;
122 x[i_28] = D.1795_8;
123 sum_11 = D.1795_8 + sum_29;
124 i_12 = i_28 + 1;
125 if (N_6(D) > i_12)
126 goto header_bb;
127
128
129 exit_bb:
130
131 # sum_21 = PHI <sum_11(4)>
132 printf (&"%d"[0], sum_21);
133
134
135 after reduction transformation (only relevant parts):
136
137 parloop
138 {
139
140 ....
141
142
143 # Storing the initial value given by the user. #
144
145 .paral_data_store.32.sum.27 = 1;
146
147 #pragma omp parallel num_threads(4)
148
149 #pragma omp for schedule(static)
150
151 # The neutral element corresponding to the particular
152 reduction's operation, e.g. 0 for PLUS_EXPR,
153 1 for MULT_EXPR, etc. replaces the user's initial value. #
154
155 # sum.27_29 = PHI <sum.27_11, 0>
156
157 sum.27_11 = D.1827_8 + sum.27_29;
158
159 GIMPLE_OMP_CONTINUE
160
161 # Adding this reduction phi is done at create_phi_for_local_result() #
162 # sum.27_56 = PHI <sum.27_11, 0>
163 GIMPLE_OMP_RETURN
164
165 # Creating the atomic operation is done at
166 create_call_for_reduction_1() #
167
168 #pragma omp atomic_load
169 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
170 D.1840_60 = sum.27_56 + D.1839_59;
171 #pragma omp atomic_store (D.1840_60);
172
173 GIMPLE_OMP_RETURN
174
175 # collecting the result after the join of the threads is done at
176 create_loads_for_reductions().
177 The value computed by the threads is loaded from the
178 shared struct. #
179
180
181 .paral_data_load.33_52 = &.paral_data_store.32;
182 sum_37 = .paral_data_load.33_52->sum.27;
183 sum_43 = D.1795_41 + sum_37;
184
185 exit bb:
186 # sum_21 = PHI <sum_43, sum_26>
187 printf (&"%d"[0], sum_21);
188
189 ...
190
191 }
192
193 */
194
195 /* Minimal number of iterations of a loop that should be executed in each
196 thread. */
197 #define MIN_PER_THREAD 100
198
199 /* Element of the hashtable, representing a
200 reduction in the current loop. */
201 struct reduction_info
202 {
203 gimple reduc_stmt; /* reduction statement. */
204 gimple reduc_phi; /* The phi node defining the reduction. */
205 enum tree_code reduction_code;/* code for the reduction operation. */
206 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
207 result. */
208 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
209 of the reduction variable when existing the loop. */
210 tree initial_value; /* The initial value of the reduction var before entering the loop. */
211 tree field; /* the name of the field in the parloop data structure intended for reduction. */
212 tree init; /* reduction initialization value. */
213 gphi *new_phi; /* (helper field) Newly created phi node whose result
214 will be passed to the atomic operation. Represents
215 the local result each thread computed for the reduction
216 operation. */
217 };
218
219 /* Reduction info hashtable helpers. */
220
221 struct reduction_hasher : typed_free_remove <reduction_info>
222 {
223 typedef reduction_info *value_type;
224 typedef reduction_info *compare_type;
225 static inline hashval_t hash (const reduction_info *);
226 static inline bool equal (const reduction_info *, const reduction_info *);
227 };
228
229 /* Equality and hash functions for hashtab code. */
230
231 inline bool
232 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
233 {
234 return (a->reduc_phi == b->reduc_phi);
235 }
236
237 inline hashval_t
238 reduction_hasher::hash (const reduction_info *a)
239 {
240 return a->reduc_version;
241 }
242
243 typedef hash_table<reduction_hasher> reduction_info_table_type;
244
245
246 static struct reduction_info *
247 reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
248 {
249 struct reduction_info tmpred, *red;
250
251 if (reduction_list->elements () == 0 || phi == NULL)
252 return NULL;
253
254 tmpred.reduc_phi = phi;
255 tmpred.reduc_version = gimple_uid (phi);
256 red = reduction_list->find (&tmpred);
257
258 return red;
259 }
260
261 /* Element of hashtable of names to copy. */
262
263 struct name_to_copy_elt
264 {
265 unsigned version; /* The version of the name to copy. */
266 tree new_name; /* The new name used in the copy. */
267 tree field; /* The field of the structure used to pass the
268 value. */
269 };
270
271 /* Name copies hashtable helpers. */
272
273 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
274 {
275 typedef name_to_copy_elt *value_type;
276 typedef name_to_copy_elt *compare_type;
277 static inline hashval_t hash (const name_to_copy_elt *);
278 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
279 };
280
281 /* Equality and hash functions for hashtab code. */
282
283 inline bool
284 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
285 {
286 return a->version == b->version;
287 }
288
289 inline hashval_t
290 name_to_copy_hasher::hash (const name_to_copy_elt *a)
291 {
292 return (hashval_t) a->version;
293 }
294
295 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
296
297 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
298 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
299 represents the denominator for every element in the matrix. */
300 typedef struct lambda_trans_matrix_s
301 {
302 lambda_matrix matrix;
303 int rowsize;
304 int colsize;
305 int denominator;
306 } *lambda_trans_matrix;
307 #define LTM_MATRIX(T) ((T)->matrix)
308 #define LTM_ROWSIZE(T) ((T)->rowsize)
309 #define LTM_COLSIZE(T) ((T)->colsize)
310 #define LTM_DENOMINATOR(T) ((T)->denominator)
311
312 /* Allocate a new transformation matrix. */
313
314 static lambda_trans_matrix
315 lambda_trans_matrix_new (int colsize, int rowsize,
316 struct obstack * lambda_obstack)
317 {
318 lambda_trans_matrix ret;
319
320 ret = (lambda_trans_matrix)
321 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
322 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
323 LTM_ROWSIZE (ret) = rowsize;
324 LTM_COLSIZE (ret) = colsize;
325 LTM_DENOMINATOR (ret) = 1;
326 return ret;
327 }
328
329 /* Multiply a vector VEC by a matrix MAT.
330 MAT is an M*N matrix, and VEC is a vector with length N. The result
331 is stored in DEST which must be a vector of length M. */
332
333 static void
334 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
335 lambda_vector vec, lambda_vector dest)
336 {
337 int i, j;
338
339 lambda_vector_clear (dest, m);
340 for (i = 0; i < m; i++)
341 for (j = 0; j < n; j++)
342 dest[i] += matrix[i][j] * vec[j];
343 }
344
345 /* Return true if TRANS is a legal transformation matrix that respects
346 the dependence vectors in DISTS and DIRS. The conservative answer
347 is false.
348
349 "Wolfe proves that a unimodular transformation represented by the
350 matrix T is legal when applied to a loop nest with a set of
351 lexicographically non-negative distance vectors RDG if and only if
352 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
353 i.e.: if and only if it transforms the lexicographically positive
354 distance vectors to lexicographically positive vectors. Note that
355 a unimodular matrix must transform the zero vector (and only it) to
356 the zero vector." S.Muchnick. */
357
358 static bool
359 lambda_transform_legal_p (lambda_trans_matrix trans,
360 int nb_loops,
361 vec<ddr_p> dependence_relations)
362 {
363 unsigned int i, j;
364 lambda_vector distres;
365 struct data_dependence_relation *ddr;
366
367 gcc_assert (LTM_COLSIZE (trans) == nb_loops
368 && LTM_ROWSIZE (trans) == nb_loops);
369
370 /* When there are no dependences, the transformation is correct. */
371 if (dependence_relations.length () == 0)
372 return true;
373
374 ddr = dependence_relations[0];
375 if (ddr == NULL)
376 return true;
377
378 /* When there is an unknown relation in the dependence_relations, we
379 know that it is no worth looking at this loop nest: give up. */
380 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
381 return false;
382
383 distres = lambda_vector_new (nb_loops);
384
385 /* For each distance vector in the dependence graph. */
386 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
387 {
388 /* Don't care about relations for which we know that there is no
389 dependence, nor about read-read (aka. output-dependences):
390 these data accesses can happen in any order. */
391 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
392 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
393 continue;
394
395 /* Conservatively answer: "this transformation is not valid". */
396 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
397 return false;
398
399 /* If the dependence could not be captured by a distance vector,
400 conservatively answer that the transform is not valid. */
401 if (DDR_NUM_DIST_VECTS (ddr) == 0)
402 return false;
403
404 /* Compute trans.dist_vect */
405 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
406 {
407 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
408 DDR_DIST_VECT (ddr, j), distres);
409
410 if (!lambda_vector_lexico_pos (distres, nb_loops))
411 return false;
412 }
413 }
414 return true;
415 }
416
417 /* Data dependency analysis. Returns true if the iterations of LOOP
418 are independent on each other (that is, if we can execute them
419 in parallel). */
420
421 static bool
422 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
423 {
424 vec<ddr_p> dependence_relations;
425 vec<data_reference_p> datarefs;
426 lambda_trans_matrix trans;
427 bool ret = false;
428
429 if (dump_file && (dump_flags & TDF_DETAILS))
430 {
431 fprintf (dump_file, "Considering loop %d\n", loop->num);
432 if (!loop->inner)
433 fprintf (dump_file, "loop is innermost\n");
434 else
435 fprintf (dump_file, "loop NOT innermost\n");
436 }
437
438 /* Check for problems with dependences. If the loop can be reversed,
439 the iterations are independent. */
440 auto_vec<loop_p, 3> loop_nest;
441 datarefs.create (10);
442 dependence_relations.create (100);
443 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
444 &dependence_relations))
445 {
446 if (dump_file && (dump_flags & TDF_DETAILS))
447 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
448 ret = false;
449 goto end;
450 }
451 if (dump_file && (dump_flags & TDF_DETAILS))
452 dump_data_dependence_relations (dump_file, dependence_relations);
453
454 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
455 LTM_MATRIX (trans)[0][0] = -1;
456
457 if (lambda_transform_legal_p (trans, 1, dependence_relations))
458 {
459 ret = true;
460 if (dump_file && (dump_flags & TDF_DETAILS))
461 fprintf (dump_file, " SUCCESS: may be parallelized\n");
462 }
463 else if (dump_file && (dump_flags & TDF_DETAILS))
464 fprintf (dump_file,
465 " FAILED: data dependencies exist across iterations\n");
466
467 end:
468 free_dependence_relations (dependence_relations);
469 free_data_refs (datarefs);
470
471 return ret;
472 }
473
474 /* Return true when LOOP contains basic blocks marked with the
475 BB_IRREDUCIBLE_LOOP flag. */
476
477 static inline bool
478 loop_has_blocks_with_irreducible_flag (struct loop *loop)
479 {
480 unsigned i;
481 basic_block *bbs = get_loop_body_in_dom_order (loop);
482 bool res = true;
483
484 for (i = 0; i < loop->num_nodes; i++)
485 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
486 goto end;
487
488 res = false;
489 end:
490 free (bbs);
491 return res;
492 }
493
494 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
495 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
496 to their addresses that can be reused. The address of OBJ is known to
497 be invariant in the whole function. Other needed statements are placed
498 right before GSI. */
499
500 static tree
501 take_address_of (tree obj, tree type, edge entry,
502 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
503 {
504 int uid;
505 tree *var_p, name, addr;
506 gassign *stmt;
507 gimple_seq stmts;
508
509 /* Since the address of OBJ is invariant, the trees may be shared.
510 Avoid rewriting unrelated parts of the code. */
511 obj = unshare_expr (obj);
512 for (var_p = &obj;
513 handled_component_p (*var_p);
514 var_p = &TREE_OPERAND (*var_p, 0))
515 continue;
516
517 /* Canonicalize the access to base on a MEM_REF. */
518 if (DECL_P (*var_p))
519 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
520
521 /* Assign a canonical SSA name to the address of the base decl used
522 in the address and share it for all accesses and addresses based
523 on it. */
524 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
525 int_tree_map elt;
526 elt.uid = uid;
527 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
528 if (!slot->to)
529 {
530 if (gsi == NULL)
531 return NULL;
532 addr = TREE_OPERAND (*var_p, 0);
533 const char *obj_name
534 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
535 if (obj_name)
536 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
537 else
538 name = make_ssa_name (TREE_TYPE (addr));
539 stmt = gimple_build_assign (name, addr);
540 gsi_insert_on_edge_immediate (entry, stmt);
541
542 slot->uid = uid;
543 slot->to = name;
544 }
545 else
546 name = slot->to;
547
548 /* Express the address in terms of the canonical SSA name. */
549 TREE_OPERAND (*var_p, 0) = name;
550 if (gsi == NULL)
551 return build_fold_addr_expr_with_type (obj, type);
552
553 name = force_gimple_operand (build_addr (obj, current_function_decl),
554 &stmts, true, NULL_TREE);
555 if (!gimple_seq_empty_p (stmts))
556 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
557
558 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
559 {
560 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
561 NULL_TREE);
562 if (!gimple_seq_empty_p (stmts))
563 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
564 }
565
566 return name;
567 }
568
569 /* Callback for htab_traverse. Create the initialization statement
570 for reduction described in SLOT, and place it at the preheader of
571 the loop described in DATA. */
572
573 int
574 initialize_reductions (reduction_info **slot, struct loop *loop)
575 {
576 tree init, c;
577 tree bvar, type, arg;
578 edge e;
579
580 struct reduction_info *const reduc = *slot;
581
582 /* Create initialization in preheader:
583 reduction_variable = initialization value of reduction. */
584
585 /* In the phi node at the header, replace the argument coming
586 from the preheader with the reduction initialization value. */
587
588 /* Create a new variable to initialize the reduction. */
589 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
590 bvar = create_tmp_var (type, "reduction");
591
592 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
593 OMP_CLAUSE_REDUCTION);
594 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
595 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
596
597 init = omp_reduction_init (c, TREE_TYPE (bvar));
598 reduc->init = init;
599
600 /* Replace the argument representing the initialization value
601 with the initialization value for the reduction (neutral
602 element for the particular operation, e.g. 0 for PLUS_EXPR,
603 1 for MULT_EXPR, etc).
604 Keep the old value in a new variable "reduction_initial",
605 that will be taken in consideration after the parallel
606 computing is done. */
607
608 e = loop_preheader_edge (loop);
609 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
610 /* Create new variable to hold the initial value. */
611
612 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
613 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
614 reduc->initial_value = arg;
615 return 1;
616 }
617
618 struct elv_data
619 {
620 struct walk_stmt_info info;
621 edge entry;
622 int_tree_htab_type *decl_address;
623 gimple_stmt_iterator *gsi;
624 bool changed;
625 bool reset;
626 };
627
628 /* Eliminates references to local variables in *TP out of the single
629 entry single exit region starting at DTA->ENTRY.
630 DECL_ADDRESS contains addresses of the references that had their
631 address taken already. If the expression is changed, CHANGED is
632 set to true. Callback for walk_tree. */
633
634 static tree
635 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
636 {
637 struct elv_data *const dta = (struct elv_data *) data;
638 tree t = *tp, var, addr, addr_type, type, obj;
639
640 if (DECL_P (t))
641 {
642 *walk_subtrees = 0;
643
644 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
645 return NULL_TREE;
646
647 type = TREE_TYPE (t);
648 addr_type = build_pointer_type (type);
649 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
650 dta->gsi);
651 if (dta->gsi == NULL && addr == NULL_TREE)
652 {
653 dta->reset = true;
654 return NULL_TREE;
655 }
656
657 *tp = build_simple_mem_ref (addr);
658
659 dta->changed = true;
660 return NULL_TREE;
661 }
662
663 if (TREE_CODE (t) == ADDR_EXPR)
664 {
665 /* ADDR_EXPR may appear in two contexts:
666 -- as a gimple operand, when the address taken is a function invariant
667 -- as gimple rhs, when the resulting address in not a function
668 invariant
669 We do not need to do anything special in the latter case (the base of
670 the memory reference whose address is taken may be replaced in the
671 DECL_P case). The former case is more complicated, as we need to
672 ensure that the new address is still a gimple operand. Thus, it
673 is not sufficient to replace just the base of the memory reference --
674 we need to move the whole computation of the address out of the
675 loop. */
676 if (!is_gimple_val (t))
677 return NULL_TREE;
678
679 *walk_subtrees = 0;
680 obj = TREE_OPERAND (t, 0);
681 var = get_base_address (obj);
682 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
683 return NULL_TREE;
684
685 addr_type = TREE_TYPE (t);
686 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
687 dta->gsi);
688 if (dta->gsi == NULL && addr == NULL_TREE)
689 {
690 dta->reset = true;
691 return NULL_TREE;
692 }
693 *tp = addr;
694
695 dta->changed = true;
696 return NULL_TREE;
697 }
698
699 if (!EXPR_P (t))
700 *walk_subtrees = 0;
701
702 return NULL_TREE;
703 }
704
705 /* Moves the references to local variables in STMT at *GSI out of the single
706 entry single exit region starting at ENTRY. DECL_ADDRESS contains
707 addresses of the references that had their address taken
708 already. */
709
710 static void
711 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
712 int_tree_htab_type *decl_address)
713 {
714 struct elv_data dta;
715 gimple stmt = gsi_stmt (*gsi);
716
717 memset (&dta.info, '\0', sizeof (dta.info));
718 dta.entry = entry;
719 dta.decl_address = decl_address;
720 dta.changed = false;
721 dta.reset = false;
722
723 if (gimple_debug_bind_p (stmt))
724 {
725 dta.gsi = NULL;
726 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
727 eliminate_local_variables_1, &dta.info, NULL);
728 if (dta.reset)
729 {
730 gimple_debug_bind_reset_value (stmt);
731 dta.changed = true;
732 }
733 }
734 else if (gimple_clobber_p (stmt))
735 {
736 stmt = gimple_build_nop ();
737 gsi_replace (gsi, stmt, false);
738 dta.changed = true;
739 }
740 else
741 {
742 dta.gsi = gsi;
743 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
744 }
745
746 if (dta.changed)
747 update_stmt (stmt);
748 }
749
750 /* Eliminates the references to local variables from the single entry
751 single exit region between the ENTRY and EXIT edges.
752
753 This includes:
754 1) Taking address of a local variable -- these are moved out of the
755 region (and temporary variable is created to hold the address if
756 necessary).
757
758 2) Dereferencing a local variable -- these are replaced with indirect
759 references. */
760
761 static void
762 eliminate_local_variables (edge entry, edge exit)
763 {
764 basic_block bb;
765 auto_vec<basic_block, 3> body;
766 unsigned i;
767 gimple_stmt_iterator gsi;
768 bool has_debug_stmt = false;
769 int_tree_htab_type decl_address (10);
770 basic_block entry_bb = entry->src;
771 basic_block exit_bb = exit->dest;
772
773 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
774
775 FOR_EACH_VEC_ELT (body, i, bb)
776 if (bb != entry_bb && bb != exit_bb)
777 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
778 if (is_gimple_debug (gsi_stmt (gsi)))
779 {
780 if (gimple_debug_bind_p (gsi_stmt (gsi)))
781 has_debug_stmt = true;
782 }
783 else
784 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
785
786 if (has_debug_stmt)
787 FOR_EACH_VEC_ELT (body, i, bb)
788 if (bb != entry_bb && bb != exit_bb)
789 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
790 if (gimple_debug_bind_p (gsi_stmt (gsi)))
791 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
792 }
793
794 /* Returns true if expression EXPR is not defined between ENTRY and
795 EXIT, i.e. if all its operands are defined outside of the region. */
796
797 static bool
798 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
799 {
800 basic_block entry_bb = entry->src;
801 basic_block exit_bb = exit->dest;
802 basic_block def_bb;
803
804 if (is_gimple_min_invariant (expr))
805 return true;
806
807 if (TREE_CODE (expr) == SSA_NAME)
808 {
809 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
810 if (def_bb
811 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
812 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
813 return false;
814
815 return true;
816 }
817
818 return false;
819 }
820
821 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
822 The copies are stored to NAME_COPIES, if NAME was already duplicated,
823 its duplicate stored in NAME_COPIES is returned.
824
825 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
826 duplicated, storing the copies in DECL_COPIES. */
827
828 static tree
829 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
830 int_tree_htab_type *decl_copies,
831 bool copy_name_p)
832 {
833 tree copy, var, var_copy;
834 unsigned idx, uid, nuid;
835 struct int_tree_map ielt;
836 struct name_to_copy_elt elt, *nelt;
837 name_to_copy_elt **slot;
838 int_tree_map *dslot;
839
840 if (TREE_CODE (name) != SSA_NAME)
841 return name;
842
843 idx = SSA_NAME_VERSION (name);
844 elt.version = idx;
845 slot = name_copies->find_slot_with_hash (&elt, idx,
846 copy_name_p ? INSERT : NO_INSERT);
847 if (slot && *slot)
848 return (*slot)->new_name;
849
850 if (copy_name_p)
851 {
852 copy = duplicate_ssa_name (name, NULL);
853 nelt = XNEW (struct name_to_copy_elt);
854 nelt->version = idx;
855 nelt->new_name = copy;
856 nelt->field = NULL_TREE;
857 *slot = nelt;
858 }
859 else
860 {
861 gcc_assert (!slot);
862 copy = name;
863 }
864
865 var = SSA_NAME_VAR (name);
866 if (!var)
867 return copy;
868
869 uid = DECL_UID (var);
870 ielt.uid = uid;
871 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
872 if (!dslot->to)
873 {
874 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
875 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
876 dslot->uid = uid;
877 dslot->to = var_copy;
878
879 /* Ensure that when we meet this decl next time, we won't duplicate
880 it again. */
881 nuid = DECL_UID (var_copy);
882 ielt.uid = nuid;
883 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
884 gcc_assert (!dslot->to);
885 dslot->uid = nuid;
886 dslot->to = var_copy;
887 }
888 else
889 var_copy = dslot->to;
890
891 replace_ssa_name_symbol (copy, var_copy);
892 return copy;
893 }
894
895 /* Finds the ssa names used in STMT that are defined outside the
896 region between ENTRY and EXIT and replaces such ssa names with
897 their duplicates. The duplicates are stored to NAME_COPIES. Base
898 decls of all ssa names used in STMT (including those defined in
899 LOOP) are replaced with the new temporary variables; the
900 replacement decls are stored in DECL_COPIES. */
901
902 static void
903 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
904 name_to_copy_table_type *name_copies,
905 int_tree_htab_type *decl_copies)
906 {
907 use_operand_p use;
908 def_operand_p def;
909 ssa_op_iter oi;
910 tree name, copy;
911 bool copy_name_p;
912
913 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
914 {
915 name = DEF_FROM_PTR (def);
916 gcc_assert (TREE_CODE (name) == SSA_NAME);
917 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
918 false);
919 gcc_assert (copy == name);
920 }
921
922 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
923 {
924 name = USE_FROM_PTR (use);
925 if (TREE_CODE (name) != SSA_NAME)
926 continue;
927
928 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
929 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
930 copy_name_p);
931 SET_USE (use, copy);
932 }
933 }
934
935 /* Finds the ssa names used in STMT that are defined outside the
936 region between ENTRY and EXIT and replaces such ssa names with
937 their duplicates. The duplicates are stored to NAME_COPIES. Base
938 decls of all ssa names used in STMT (including those defined in
939 LOOP) are replaced with the new temporary variables; the
940 replacement decls are stored in DECL_COPIES. */
941
942 static bool
943 separate_decls_in_region_debug (gimple stmt,
944 name_to_copy_table_type *name_copies,
945 int_tree_htab_type *decl_copies)
946 {
947 use_operand_p use;
948 ssa_op_iter oi;
949 tree var, name;
950 struct int_tree_map ielt;
951 struct name_to_copy_elt elt;
952 name_to_copy_elt **slot;
953 int_tree_map *dslot;
954
955 if (gimple_debug_bind_p (stmt))
956 var = gimple_debug_bind_get_var (stmt);
957 else if (gimple_debug_source_bind_p (stmt))
958 var = gimple_debug_source_bind_get_var (stmt);
959 else
960 return true;
961 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
962 return true;
963 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
964 ielt.uid = DECL_UID (var);
965 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
966 if (!dslot)
967 return true;
968 if (gimple_debug_bind_p (stmt))
969 gimple_debug_bind_set_var (stmt, dslot->to);
970 else if (gimple_debug_source_bind_p (stmt))
971 gimple_debug_source_bind_set_var (stmt, dslot->to);
972
973 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
974 {
975 name = USE_FROM_PTR (use);
976 if (TREE_CODE (name) != SSA_NAME)
977 continue;
978
979 elt.version = SSA_NAME_VERSION (name);
980 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
981 if (!slot)
982 {
983 gimple_debug_bind_reset_value (stmt);
984 update_stmt (stmt);
985 break;
986 }
987
988 SET_USE (use, (*slot)->new_name);
989 }
990
991 return false;
992 }
993
994 /* Callback for htab_traverse. Adds a field corresponding to the reduction
995 specified in SLOT. The type is passed in DATA. */
996
997 int
998 add_field_for_reduction (reduction_info **slot, tree type)
999 {
1000
1001 struct reduction_info *const red = *slot;
1002 tree var = gimple_assign_lhs (red->reduc_stmt);
1003 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1004 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1005
1006 insert_field_into_struct (type, field);
1007
1008 red->field = field;
1009
1010 return 1;
1011 }
1012
1013 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1014 described in SLOT. The type is passed in DATA. */
1015
1016 int
1017 add_field_for_name (name_to_copy_elt **slot, tree type)
1018 {
1019 struct name_to_copy_elt *const elt = *slot;
1020 tree name = ssa_name (elt->version);
1021 tree field = build_decl (UNKNOWN_LOCATION,
1022 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1023 TREE_TYPE (name));
1024
1025 insert_field_into_struct (type, field);
1026 elt->field = field;
1027
1028 return 1;
1029 }
1030
1031 /* Callback for htab_traverse. A local result is the intermediate result
1032 computed by a single
1033 thread, or the initial value in case no iteration was executed.
1034 This function creates a phi node reflecting these values.
1035 The phi's result will be stored in NEW_PHI field of the
1036 reduction's data structure. */
1037
1038 int
1039 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1040 {
1041 struct reduction_info *const reduc = *slot;
1042 edge e;
1043 gphi *new_phi;
1044 basic_block store_bb;
1045 tree local_res;
1046 source_location locus;
1047
1048 /* STORE_BB is the block where the phi
1049 should be stored. It is the destination of the loop exit.
1050 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1051 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1052
1053 /* STORE_BB has two predecessors. One coming from the loop
1054 (the reduction's result is computed at the loop),
1055 and another coming from a block preceding the loop,
1056 when no iterations
1057 are executed (the initial value should be taken). */
1058 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1059 e = EDGE_PRED (store_bb, 1);
1060 else
1061 e = EDGE_PRED (store_bb, 0);
1062 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
1063 locus = gimple_location (reduc->reduc_stmt);
1064 new_phi = create_phi_node (local_res, store_bb);
1065 add_phi_arg (new_phi, reduc->init, e, locus);
1066 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1067 FALLTHRU_EDGE (loop->latch), locus);
1068 reduc->new_phi = new_phi;
1069
1070 return 1;
1071 }
1072
1073 struct clsn_data
1074 {
1075 tree store;
1076 tree load;
1077
1078 basic_block store_bb;
1079 basic_block load_bb;
1080 };
1081
1082 /* Callback for htab_traverse. Create an atomic instruction for the
1083 reduction described in SLOT.
1084 DATA annotates the place in memory the atomic operation relates to,
1085 and the basic block it needs to be generated in. */
1086
1087 int
1088 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1089 {
1090 struct reduction_info *const reduc = *slot;
1091 gimple_stmt_iterator gsi;
1092 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1093 tree load_struct;
1094 basic_block bb;
1095 basic_block new_bb;
1096 edge e;
1097 tree t, addr, ref, x;
1098 tree tmp_load, name;
1099 gimple load;
1100
1101 load_struct = build_simple_mem_ref (clsn_data->load);
1102 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1103
1104 addr = build_addr (t, current_function_decl);
1105
1106 /* Create phi node. */
1107 bb = clsn_data->load_bb;
1108
1109 gsi = gsi_last_bb (bb);
1110 e = split_block (bb, gsi_stmt (gsi));
1111 new_bb = e->dest;
1112
1113 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1114 tmp_load = make_ssa_name (tmp_load);
1115 load = gimple_build_omp_atomic_load (tmp_load, addr);
1116 SSA_NAME_DEF_STMT (tmp_load) = load;
1117 gsi = gsi_start_bb (new_bb);
1118 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1119
1120 e = split_block (new_bb, load);
1121 new_bb = e->dest;
1122 gsi = gsi_start_bb (new_bb);
1123 ref = tmp_load;
1124 x = fold_build2 (reduc->reduction_code,
1125 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1126 PHI_RESULT (reduc->new_phi));
1127
1128 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1129 GSI_CONTINUE_LINKING);
1130
1131 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1132 return 1;
1133 }
1134
1135 /* Create the atomic operation at the join point of the threads.
1136 REDUCTION_LIST describes the reductions in the LOOP.
1137 LD_ST_DATA describes the shared data structure where
1138 shared data is stored in and loaded from. */
1139 static void
1140 create_call_for_reduction (struct loop *loop,
1141 reduction_info_table_type *reduction_list,
1142 struct clsn_data *ld_st_data)
1143 {
1144 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1145 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1146 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1147 reduction_list
1148 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1149 }
1150
1151 /* Callback for htab_traverse. Loads the final reduction value at the
1152 join point of all threads, and inserts it in the right place. */
1153
1154 int
1155 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1156 {
1157 struct reduction_info *const red = *slot;
1158 gimple stmt;
1159 gimple_stmt_iterator gsi;
1160 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1161 tree load_struct;
1162 tree name;
1163 tree x;
1164
1165 gsi = gsi_after_labels (clsn_data->load_bb);
1166 load_struct = build_simple_mem_ref (clsn_data->load);
1167 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1168 NULL_TREE);
1169
1170 x = load_struct;
1171 name = PHI_RESULT (red->keep_res);
1172 stmt = gimple_build_assign (name, x);
1173
1174 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1175
1176 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1177 !gsi_end_p (gsi); gsi_next (&gsi))
1178 if (gsi_stmt (gsi) == red->keep_res)
1179 {
1180 remove_phi_node (&gsi, false);
1181 return 1;
1182 }
1183 gcc_unreachable ();
1184 }
1185
1186 /* Load the reduction result that was stored in LD_ST_DATA.
1187 REDUCTION_LIST describes the list of reductions that the
1188 loads should be generated for. */
1189 static void
1190 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1191 struct clsn_data *ld_st_data)
1192 {
1193 gimple_stmt_iterator gsi;
1194 tree t;
1195 gimple stmt;
1196
1197 gsi = gsi_after_labels (ld_st_data->load_bb);
1198 t = build_fold_addr_expr (ld_st_data->store);
1199 stmt = gimple_build_assign (ld_st_data->load, t);
1200
1201 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1202
1203 reduction_list
1204 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1205
1206 }
1207
1208 /* Callback for htab_traverse. Store the neutral value for the
1209 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1210 1 for MULT_EXPR, etc. into the reduction field.
1211 The reduction is specified in SLOT. The store information is
1212 passed in DATA. */
1213
1214 int
1215 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1216 {
1217 struct reduction_info *const red = *slot;
1218 tree t;
1219 gimple stmt;
1220 gimple_stmt_iterator gsi;
1221 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1222
1223 gsi = gsi_last_bb (clsn_data->store_bb);
1224 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1225 stmt = gimple_build_assign (t, red->initial_value);
1226 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1227
1228 return 1;
1229 }
1230
1231 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1232 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1233 specified in SLOT. */
1234
1235 int
1236 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1237 struct clsn_data *clsn_data)
1238 {
1239 struct name_to_copy_elt *const elt = *slot;
1240 tree t;
1241 gimple stmt;
1242 gimple_stmt_iterator gsi;
1243 tree type = TREE_TYPE (elt->new_name);
1244 tree load_struct;
1245
1246 gsi = gsi_last_bb (clsn_data->store_bb);
1247 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1248 stmt = gimple_build_assign (t, ssa_name (elt->version));
1249 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1250
1251 gsi = gsi_last_bb (clsn_data->load_bb);
1252 load_struct = build_simple_mem_ref (clsn_data->load);
1253 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1254 stmt = gimple_build_assign (elt->new_name, t);
1255 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1256
1257 return 1;
1258 }
1259
1260 /* Moves all the variables used in LOOP and defined outside of it (including
1261 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1262 name) to a structure created for this purpose. The code
1263
1264 while (1)
1265 {
1266 use (a);
1267 use (b);
1268 }
1269
1270 is transformed this way:
1271
1272 bb0:
1273 old.a = a;
1274 old.b = b;
1275
1276 bb1:
1277 a' = new->a;
1278 b' = new->b;
1279 while (1)
1280 {
1281 use (a');
1282 use (b');
1283 }
1284
1285 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1286 pointer `new' is intentionally not initialized (the loop will be split to a
1287 separate function later, and `new' will be initialized from its arguments).
1288 LD_ST_DATA holds information about the shared data structure used to pass
1289 information among the threads. It is initialized here, and
1290 gen_parallel_loop will pass it to create_call_for_reduction that
1291 needs this information. REDUCTION_LIST describes the reductions
1292 in LOOP. */
1293
1294 static void
1295 separate_decls_in_region (edge entry, edge exit,
1296 reduction_info_table_type *reduction_list,
1297 tree *arg_struct, tree *new_arg_struct,
1298 struct clsn_data *ld_st_data)
1299
1300 {
1301 basic_block bb1 = split_edge (entry);
1302 basic_block bb0 = single_pred (bb1);
1303 name_to_copy_table_type name_copies (10);
1304 int_tree_htab_type decl_copies (10);
1305 unsigned i;
1306 tree type, type_name, nvar;
1307 gimple_stmt_iterator gsi;
1308 struct clsn_data clsn_data;
1309 auto_vec<basic_block, 3> body;
1310 basic_block bb;
1311 basic_block entry_bb = bb1;
1312 basic_block exit_bb = exit->dest;
1313 bool has_debug_stmt = false;
1314
1315 entry = single_succ_edge (entry_bb);
1316 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1317
1318 FOR_EACH_VEC_ELT (body, i, bb)
1319 {
1320 if (bb != entry_bb && bb != exit_bb)
1321 {
1322 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1323 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1324 &name_copies, &decl_copies);
1325
1326 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1327 {
1328 gimple stmt = gsi_stmt (gsi);
1329
1330 if (is_gimple_debug (stmt))
1331 has_debug_stmt = true;
1332 else
1333 separate_decls_in_region_stmt (entry, exit, stmt,
1334 &name_copies, &decl_copies);
1335 }
1336 }
1337 }
1338
1339 /* Now process debug bind stmts. We must not create decls while
1340 processing debug stmts, so we defer their processing so as to
1341 make sure we will have debug info for as many variables as
1342 possible (all of those that were dealt with in the loop above),
1343 and discard those for which we know there's nothing we can
1344 do. */
1345 if (has_debug_stmt)
1346 FOR_EACH_VEC_ELT (body, i, bb)
1347 if (bb != entry_bb && bb != exit_bb)
1348 {
1349 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1350 {
1351 gimple stmt = gsi_stmt (gsi);
1352
1353 if (is_gimple_debug (stmt))
1354 {
1355 if (separate_decls_in_region_debug (stmt, &name_copies,
1356 &decl_copies))
1357 {
1358 gsi_remove (&gsi, true);
1359 continue;
1360 }
1361 }
1362
1363 gsi_next (&gsi);
1364 }
1365 }
1366
1367 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1368 {
1369 /* It may happen that there is nothing to copy (if there are only
1370 loop carried and external variables in the loop). */
1371 *arg_struct = NULL;
1372 *new_arg_struct = NULL;
1373 }
1374 else
1375 {
1376 /* Create the type for the structure to store the ssa names to. */
1377 type = lang_hooks.types.make_type (RECORD_TYPE);
1378 type_name = build_decl (UNKNOWN_LOCATION,
1379 TYPE_DECL, create_tmp_var_name (".paral_data"),
1380 type);
1381 TYPE_NAME (type) = type_name;
1382
1383 name_copies.traverse <tree, add_field_for_name> (type);
1384 if (reduction_list && reduction_list->elements () > 0)
1385 {
1386 /* Create the fields for reductions. */
1387 reduction_list->traverse <tree, add_field_for_reduction> (type);
1388 }
1389 layout_type (type);
1390
1391 /* Create the loads and stores. */
1392 *arg_struct = create_tmp_var (type, ".paral_data_store");
1393 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1394 *new_arg_struct = make_ssa_name (nvar);
1395
1396 ld_st_data->store = *arg_struct;
1397 ld_st_data->load = *new_arg_struct;
1398 ld_st_data->store_bb = bb0;
1399 ld_st_data->load_bb = bb1;
1400
1401 name_copies
1402 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1403 (ld_st_data);
1404
1405 /* Load the calculation from memory (after the join of the threads). */
1406
1407 if (reduction_list && reduction_list->elements () > 0)
1408 {
1409 reduction_list
1410 ->traverse <struct clsn_data *, create_stores_for_reduction>
1411 (ld_st_data);
1412 clsn_data.load = make_ssa_name (nvar);
1413 clsn_data.load_bb = exit->dest;
1414 clsn_data.store = ld_st_data->store;
1415 create_final_loads_for_reduction (reduction_list, &clsn_data);
1416 }
1417 }
1418 }
1419
1420 /* Returns true if FN was created to run in parallel. */
1421
1422 bool
1423 parallelized_function_p (tree fndecl)
1424 {
1425 cgraph_node *node = cgraph_node::get (fndecl);
1426 gcc_assert (node != NULL);
1427 return node->parallelized_function;
1428 }
1429
1430 /* Creates and returns an empty function that will receive the body of
1431 a parallelized loop. */
1432
1433 static tree
1434 create_loop_fn (location_t loc)
1435 {
1436 char buf[100];
1437 char *tname;
1438 tree decl, type, name, t;
1439 struct function *act_cfun = cfun;
1440 static unsigned loopfn_num;
1441
1442 loc = LOCATION_LOCUS (loc);
1443 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1444 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1445 clean_symbol_name (tname);
1446 name = get_identifier (tname);
1447 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1448
1449 decl = build_decl (loc, FUNCTION_DECL, name, type);
1450 TREE_STATIC (decl) = 1;
1451 TREE_USED (decl) = 1;
1452 DECL_ARTIFICIAL (decl) = 1;
1453 DECL_IGNORED_P (decl) = 0;
1454 TREE_PUBLIC (decl) = 0;
1455 DECL_UNINLINABLE (decl) = 1;
1456 DECL_EXTERNAL (decl) = 0;
1457 DECL_CONTEXT (decl) = NULL_TREE;
1458 DECL_INITIAL (decl) = make_node (BLOCK);
1459
1460 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1461 DECL_ARTIFICIAL (t) = 1;
1462 DECL_IGNORED_P (t) = 1;
1463 DECL_RESULT (decl) = t;
1464
1465 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1466 ptr_type_node);
1467 DECL_ARTIFICIAL (t) = 1;
1468 DECL_ARG_TYPE (t) = ptr_type_node;
1469 DECL_CONTEXT (t) = decl;
1470 TREE_USED (t) = 1;
1471 DECL_ARGUMENTS (decl) = t;
1472
1473 allocate_struct_function (decl, false);
1474
1475 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1476 it. */
1477 set_cfun (act_cfun);
1478
1479 return decl;
1480 }
1481
1482 /* Replace uses of NAME by VAL in block BB. */
1483
1484 static void
1485 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1486 {
1487 gimple use_stmt;
1488 imm_use_iterator imm_iter;
1489
1490 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1491 {
1492 if (gimple_bb (use_stmt) != bb)
1493 continue;
1494
1495 use_operand_p use_p;
1496 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1497 SET_USE (use_p, val);
1498 }
1499 }
1500
1501 /* Replace uses of NAME by VAL in blocks BBS. */
1502
1503 static void
1504 replace_uses_in_bbs_by (tree name, tree val, bitmap bbs)
1505 {
1506 gimple use_stmt;
1507 imm_use_iterator imm_iter;
1508
1509 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1510 {
1511 if (!bitmap_bit_p (bbs, gimple_bb (use_stmt)->index))
1512 continue;
1513
1514 use_operand_p use_p;
1515 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1516 SET_USE (use_p, val);
1517 }
1518 }
1519
1520 /* Do transformation from:
1521
1522 <bb preheader>:
1523 ...
1524 goto <bb header>
1525
1526 <bb header>:
1527 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1528 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1529 ...
1530 use (ivtmp_a)
1531 ...
1532 sum_b = sum_a + sum_update
1533 ...
1534 if (ivtmp_a < n)
1535 goto <bb latch>;
1536 else
1537 goto <bb exit>;
1538
1539 <bb latch>:
1540 ivtmp_b = ivtmp_a + 1;
1541 goto <bb header>
1542
1543 <bb exit>:
1544 sum_z = PHI <sum_b (cond[1])>
1545
1546 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1547 that's <bb header>.
1548
1549 to:
1550
1551 <bb preheader>:
1552 ...
1553 goto <bb newheader>
1554
1555 <bb header>:
1556 ivtmp_a = PHI <ivtmp_c (latch)>
1557 sum_a = PHI <sum_c (latch)>
1558 ...
1559 use (ivtmp_a)
1560 ...
1561 sum_b = sum_a + sum_update
1562 ...
1563 goto <bb latch>;
1564
1565 <bb newheader>:
1566 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1567 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1568 if (ivtmp_c < n + 1)
1569 goto <bb header>;
1570 else
1571 goto <bb exit>;
1572
1573 <bb latch>:
1574 ivtmp_b = ivtmp_a + 1;
1575 goto <bb newheader>
1576
1577 <bb exit>:
1578 sum_z = PHI <sum_c (newheader)>
1579
1580
1581 In unified diff format:
1582
1583 <bb preheader>:
1584 ...
1585 - goto <bb header>
1586 + goto <bb newheader>
1587
1588 <bb header>:
1589 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1590 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1591 + ivtmp_a = PHI <ivtmp_c (latch)>
1592 + sum_a = PHI <sum_c (latch)>
1593 ...
1594 use (ivtmp_a)
1595 ...
1596 sum_b = sum_a + sum_update
1597 ...
1598 - if (ivtmp_a < n)
1599 - goto <bb latch>;
1600 + goto <bb latch>;
1601 +
1602 + <bb newheader>:
1603 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1604 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1605 + if (ivtmp_c < n + 1)
1606 + goto <bb header>;
1607 else
1608 goto <bb exit>;
1609
1610 <bb latch>:
1611 ivtmp_b = ivtmp_a + 1;
1612 - goto <bb header>
1613 + goto <bb newheader>
1614
1615 <bb exit>:
1616 - sum_z = PHI <sum_b (cond[1])>
1617 + sum_z = PHI <sum_c (newheader)>
1618
1619 Note: the example does not show any virtual phis, but these are handled more
1620 or less as reductions.
1621
1622
1623 Moves the exit condition of LOOP to the beginning of its header.
1624 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1625 bound. */
1626
1627 static void
1628 transform_to_exit_first_loop_alt (struct loop *loop,
1629 reduction_info_table_type *reduction_list,
1630 tree bound)
1631 {
1632 basic_block header = loop->header;
1633 basic_block latch = loop->latch;
1634 edge exit = single_dom_exit (loop);
1635 basic_block exit_block = exit->dest;
1636 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1637 tree control = gimple_cond_lhs (cond_stmt);
1638 edge e;
1639
1640 /* Gather the bbs dominated by the exit block. */
1641 bitmap exit_dominated = BITMAP_ALLOC (NULL);
1642 bitmap_set_bit (exit_dominated, exit_block->index);
1643 vec<basic_block> exit_dominated_vec
1644 = get_dominated_by (CDI_DOMINATORS, exit_block);
1645
1646 int i;
1647 basic_block dom_bb;
1648 FOR_EACH_VEC_ELT (exit_dominated_vec, i, dom_bb)
1649 bitmap_set_bit (exit_dominated, dom_bb->index);
1650
1651 exit_dominated_vec.release ();
1652
1653 /* Create the new_header block. */
1654 basic_block new_header = split_block_before_cond_jump (exit->src);
1655 edge split_edge = single_pred_edge (new_header);
1656
1657 /* Redirect entry edge to new_header. */
1658 edge entry = loop_preheader_edge (loop);
1659 e = redirect_edge_and_branch (entry, new_header);
1660 gcc_assert (e == entry);
1661
1662 /* Redirect post_inc_edge to new_header. */
1663 edge post_inc_edge = single_succ_edge (latch);
1664 e = redirect_edge_and_branch (post_inc_edge, new_header);
1665 gcc_assert (e == post_inc_edge);
1666
1667 /* Redirect post_cond_edge to header. */
1668 edge post_cond_edge = single_pred_edge (latch);
1669 e = redirect_edge_and_branch (post_cond_edge, header);
1670 gcc_assert (e == post_cond_edge);
1671
1672 /* Redirect split_edge to latch. */
1673 e = redirect_edge_and_branch (split_edge, latch);
1674 gcc_assert (e == split_edge);
1675
1676 /* Set the new loop bound. */
1677 gimple_cond_set_rhs (cond_stmt, bound);
1678 update_stmt (cond_stmt);
1679
1680 /* Repair the ssa. */
1681 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1682 edge_var_map *vm;
1683 gphi_iterator gsi;
1684 for (gsi = gsi_start_phis (header), i = 0;
1685 !gsi_end_p (gsi) && v->iterate (i, &vm);
1686 gsi_next (&gsi), i++)
1687 {
1688 gphi *phi = gsi.phi ();
1689 tree res_a = PHI_RESULT (phi);
1690
1691 /* Create new phi. */
1692 tree res_c = copy_ssa_name (res_a, phi);
1693 gphi *nphi = create_phi_node (res_c, new_header);
1694
1695 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1696 replace_uses_in_bb_by (res_a, res_c, new_header);
1697
1698 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1699 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1700
1701 /* Replace sum_b with sum_c in exit phi. Loop-closed ssa does not hold
1702 for virtuals, so we cannot get away with exit_block only. */
1703 tree res_b = redirect_edge_var_map_def (vm);
1704 replace_uses_in_bbs_by (res_b, res_c, exit_dominated);
1705
1706 struct reduction_info *red = reduction_phi (reduction_list, phi);
1707 gcc_assert (virtual_operand_p (res_a)
1708 || res_a == control
1709 || red != NULL);
1710
1711 if (red)
1712 {
1713 /* Register the new reduction phi. */
1714 red->reduc_phi = nphi;
1715 gimple_set_uid (red->reduc_phi, red->reduc_version);
1716 }
1717 }
1718 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1719 BITMAP_FREE (exit_dominated);
1720
1721 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1722 flush_pending_stmts (entry);
1723
1724 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1725 flush_pending_stmts (post_inc_edge);
1726
1727 /* Register the reduction exit phis. */
1728 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1729 !gsi_end_p (gsi);
1730 gsi_next (&gsi))
1731 {
1732 gphi *phi = gsi.phi ();
1733 tree res_z = PHI_RESULT (phi);
1734 if (virtual_operand_p (res_z))
1735 continue;
1736
1737 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1738 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1739 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1740 if (red != NULL)
1741 red->keep_res = phi;
1742 }
1743
1744 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1745 then we're still using some fields, so only bother about fields that are
1746 still used: header and latch.
1747 The loop has a new header bb, so we update it. The latch bb stays the
1748 same. */
1749 loop->header = new_header;
1750
1751 /* Recalculate dominance info. */
1752 free_dominance_info (CDI_DOMINATORS);
1753 calculate_dominance_info (CDI_DOMINATORS);
1754 }
1755
1756 /* Tries to moves the exit condition of LOOP to the beginning of its header
1757 without duplication of the loop body. NIT is the number of iterations of the
1758 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1759 transformation is successful. */
1760
1761 static bool
1762 try_transform_to_exit_first_loop_alt (struct loop *loop,
1763 reduction_info_table_type *reduction_list,
1764 tree nit)
1765 {
1766 /* Check whether the latch contains a single statement. */
1767 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1768 return false;
1769
1770 /* Check whether the latch contains the loop iv increment. */
1771 edge back = single_succ_edge (loop->latch);
1772 edge exit = single_dom_exit (loop);
1773 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1774 tree control = gimple_cond_lhs (cond_stmt);
1775 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1776 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1777 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1778 return false;
1779
1780 /* Check whether there's no code between the loop condition and the latch. */
1781 if (!single_pred_p (loop->latch)
1782 || single_pred (loop->latch) != exit->src)
1783 return false;
1784
1785 tree alt_bound = NULL_TREE;
1786 tree nit_type = TREE_TYPE (nit);
1787
1788 /* Figure out whether nit + 1 overflows. */
1789 if (TREE_CODE (nit) == INTEGER_CST)
1790 {
1791 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1792 {
1793 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1794 nit, build_one_cst (nit_type));
1795
1796 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1797 }
1798 else
1799 {
1800 /* Todo: Figure out if we can trigger this, if it's worth to handle
1801 optimally, and if we can handle it optimally. */
1802 }
1803 }
1804 else
1805 {
1806 gcc_assert (TREE_CODE (nit) == SSA_NAME);
1807
1808 gimple def = SSA_NAME_DEF_STMT (nit);
1809
1810 if (def
1811 && is_gimple_assign (def)
1812 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1813 {
1814 tree op1 = gimple_assign_rhs1 (def);
1815 tree op2 = gimple_assign_rhs2 (def);
1816 if (integer_minus_onep (op1))
1817 alt_bound = op2;
1818 else if (integer_minus_onep (op2))
1819 alt_bound = op1;
1820 }
1821
1822 /* There is a number of test-cases for which we don't get an alt_bound
1823 here: they're listed here, with the lhs of the last stmt as the nit:
1824
1825 libgomp.graphite/force-parallel-1.c:
1826 _21 = (signed long) N_6(D);
1827 _19 = _21 + -1;
1828 _7 = (unsigned long) _19;
1829
1830 libgomp.graphite/force-parallel-2.c:
1831 _33 = (signed long) N_9(D);
1832 _16 = _33 + -1;
1833 _37 = (unsigned long) _16;
1834
1835 libgomp.graphite/force-parallel-5.c:
1836 <bb 6>:
1837 # graphite_IV.5_46 = PHI <0(5), graphite_IV.5_47(11)>
1838 <bb 7>:
1839 _33 = (unsigned long) graphite_IV.5_46;
1840
1841 g++.dg/tree-ssa/pr34355.C:
1842 _2 = (unsigned int) i_9;
1843 _3 = 4 - _2;
1844
1845 gcc.dg/pr53849.c:
1846 _5 = d.0_11 + -2;
1847 _18 = (unsigned int) _5;
1848
1849 We will be able to handle some of these cases, if we can determine when
1850 it's safe to look past casts. */
1851 }
1852
1853 if (alt_bound == NULL_TREE)
1854 return false;
1855
1856 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1857 return true;
1858 }
1859
1860 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1861 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1862 LOOP. */
1863
1864 static void
1865 transform_to_exit_first_loop (struct loop *loop,
1866 reduction_info_table_type *reduction_list,
1867 tree nit)
1868 {
1869 basic_block *bbs, *nbbs, ex_bb, orig_header;
1870 unsigned n;
1871 bool ok;
1872 edge exit = single_dom_exit (loop), hpred;
1873 tree control, control_name, res, t;
1874 gphi *phi, *nphi;
1875 gassign *stmt;
1876 gcond *cond_stmt, *cond_nit;
1877 tree nit_1;
1878
1879 split_block_after_labels (loop->header);
1880 orig_header = single_succ (loop->header);
1881 hpred = single_succ_edge (loop->header);
1882
1883 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1884 control = gimple_cond_lhs (cond_stmt);
1885 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1886
1887 /* Make sure that we have phi nodes on exit for all loop header phis
1888 (create_parallel_loop requires that). */
1889 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1890 !gsi_end_p (gsi);
1891 gsi_next (&gsi))
1892 {
1893 phi = gsi.phi ();
1894 res = PHI_RESULT (phi);
1895 t = copy_ssa_name (res, phi);
1896 SET_PHI_RESULT (phi, t);
1897 nphi = create_phi_node (res, orig_header);
1898 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1899
1900 if (res == control)
1901 {
1902 gimple_cond_set_lhs (cond_stmt, t);
1903 update_stmt (cond_stmt);
1904 control = t;
1905 }
1906 }
1907
1908 bbs = get_loop_body_in_dom_order (loop);
1909
1910 for (n = 0; bbs[n] != exit->src; n++)
1911 continue;
1912 nbbs = XNEWVEC (basic_block, n);
1913 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1914 bbs + 1, n, nbbs);
1915 gcc_assert (ok);
1916 free (bbs);
1917 ex_bb = nbbs[0];
1918 free (nbbs);
1919
1920 /* Other than reductions, the only gimple reg that should be copied
1921 out of the loop is the control variable. */
1922 exit = single_dom_exit (loop);
1923 control_name = NULL_TREE;
1924 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1925 !gsi_end_p (gsi); )
1926 {
1927 phi = gsi.phi ();
1928 res = PHI_RESULT (phi);
1929 if (virtual_operand_p (res))
1930 {
1931 gsi_next (&gsi);
1932 continue;
1933 }
1934
1935 /* Check if it is a part of reduction. If it is,
1936 keep the phi at the reduction's keep_res field. The
1937 PHI_RESULT of this phi is the resulting value of the reduction
1938 variable when exiting the loop. */
1939
1940 if (reduction_list->elements () > 0)
1941 {
1942 struct reduction_info *red;
1943
1944 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1945 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1946 if (red)
1947 {
1948 red->keep_res = phi;
1949 gsi_next (&gsi);
1950 continue;
1951 }
1952 }
1953 gcc_assert (control_name == NULL_TREE
1954 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1955 control_name = res;
1956 remove_phi_node (&gsi, false);
1957 }
1958 gcc_assert (control_name != NULL_TREE);
1959
1960 /* Initialize the control variable to number of iterations
1961 according to the rhs of the exit condition. */
1962 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1963 cond_nit = as_a <gcond *> (last_stmt (exit->src));
1964 nit_1 = gimple_cond_rhs (cond_nit);
1965 nit_1 = force_gimple_operand_gsi (&gsi,
1966 fold_convert (TREE_TYPE (control_name), nit_1),
1967 false, NULL_TREE, false, GSI_SAME_STMT);
1968 stmt = gimple_build_assign (control_name, nit_1);
1969 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1970 }
1971
1972 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1973 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1974 NEW_DATA is the variable that should be initialized from the argument
1975 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1976 basic block containing GIMPLE_OMP_PARALLEL tree. */
1977
1978 static basic_block
1979 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1980 tree new_data, unsigned n_threads, location_t loc)
1981 {
1982 gimple_stmt_iterator gsi;
1983 basic_block bb, paral_bb, for_bb, ex_bb;
1984 tree t, param;
1985 gomp_parallel *omp_par_stmt;
1986 gimple omp_return_stmt1, omp_return_stmt2;
1987 gimple phi;
1988 gcond *cond_stmt;
1989 gomp_for *for_stmt;
1990 gomp_continue *omp_cont_stmt;
1991 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1992 edge exit, nexit, guard, end, e;
1993
1994 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1995 bb = loop_preheader_edge (loop)->src;
1996 paral_bb = single_pred (bb);
1997 gsi = gsi_last_bb (paral_bb);
1998
1999 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2000 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2001 = build_int_cst (integer_type_node, n_threads);
2002 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2003 gimple_set_location (omp_par_stmt, loc);
2004
2005 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2006
2007 /* Initialize NEW_DATA. */
2008 if (data)
2009 {
2010 gassign *assign_stmt;
2011
2012 gsi = gsi_after_labels (bb);
2013
2014 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2015 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2016 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2017
2018 assign_stmt = gimple_build_assign (new_data,
2019 fold_convert (TREE_TYPE (new_data), param));
2020 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2021 }
2022
2023 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2024 bb = split_loop_exit_edge (single_dom_exit (loop));
2025 gsi = gsi_last_bb (bb);
2026 omp_return_stmt1 = gimple_build_omp_return (false);
2027 gimple_set_location (omp_return_stmt1, loc);
2028 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2029
2030 /* Extract data for GIMPLE_OMP_FOR. */
2031 gcc_assert (loop->header == single_dom_exit (loop)->src);
2032 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2033
2034 cvar = gimple_cond_lhs (cond_stmt);
2035 cvar_base = SSA_NAME_VAR (cvar);
2036 phi = SSA_NAME_DEF_STMT (cvar);
2037 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2038 initvar = copy_ssa_name (cvar);
2039 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2040 initvar);
2041 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2042
2043 gsi = gsi_last_nondebug_bb (loop->latch);
2044 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2045 gsi_remove (&gsi, true);
2046
2047 /* Prepare cfg. */
2048 for_bb = split_edge (loop_preheader_edge (loop));
2049 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2050 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2051 gcc_assert (exit == single_dom_exit (loop));
2052
2053 guard = make_edge (for_bb, ex_bb, 0);
2054 single_succ_edge (loop->latch)->flags = 0;
2055 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
2056 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2057 !gsi_end_p (gpi); gsi_next (&gpi))
2058 {
2059 source_location locus;
2060 tree def;
2061 gphi *phi = gpi.phi ();
2062 gphi *stmt;
2063
2064 stmt = as_a <gphi *> (
2065 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)));
2066
2067 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2068 locus = gimple_phi_arg_location_from_edge (stmt,
2069 loop_preheader_edge (loop));
2070 add_phi_arg (phi, def, guard, locus);
2071
2072 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2073 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2074 add_phi_arg (phi, def, end, locus);
2075 }
2076 e = redirect_edge_and_branch (exit, nexit->dest);
2077 PENDING_STMT (e) = NULL;
2078
2079 /* Emit GIMPLE_OMP_FOR. */
2080 gimple_cond_set_lhs (cond_stmt, cvar_base);
2081 type = TREE_TYPE (cvar);
2082 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2083 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2084
2085 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
2086 gimple_set_location (for_stmt, loc);
2087 gimple_omp_for_set_index (for_stmt, 0, initvar);
2088 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2089 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2090 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2091 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2092 cvar_base,
2093 build_int_cst (type, 1)));
2094
2095 gsi = gsi_last_bb (for_bb);
2096 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2097 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2098
2099 /* Emit GIMPLE_OMP_CONTINUE. */
2100 gsi = gsi_last_bb (loop->latch);
2101 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2102 gimple_set_location (omp_cont_stmt, loc);
2103 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2104 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2105
2106 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2107 gsi = gsi_last_bb (ex_bb);
2108 omp_return_stmt2 = gimple_build_omp_return (true);
2109 gimple_set_location (omp_return_stmt2, loc);
2110 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2111
2112 /* After the above dom info is hosed. Re-compute it. */
2113 free_dominance_info (CDI_DOMINATORS);
2114 calculate_dominance_info (CDI_DOMINATORS);
2115
2116 return paral_bb;
2117 }
2118
2119 /* Generates code to execute the iterations of LOOP in N_THREADS
2120 threads in parallel.
2121
2122 NITER describes number of iterations of LOOP.
2123 REDUCTION_LIST describes the reductions existent in the LOOP. */
2124
2125 static void
2126 gen_parallel_loop (struct loop *loop,
2127 reduction_info_table_type *reduction_list,
2128 unsigned n_threads, struct tree_niter_desc *niter)
2129 {
2130 tree many_iterations_cond, type, nit;
2131 tree arg_struct, new_arg_struct;
2132 gimple_seq stmts;
2133 edge entry, exit;
2134 struct clsn_data clsn_data;
2135 unsigned prob;
2136 location_t loc;
2137 gimple cond_stmt;
2138 unsigned int m_p_thread=2;
2139
2140 /* From
2141
2142 ---------------------------------------------------------------------
2143 loop
2144 {
2145 IV = phi (INIT, IV + STEP)
2146 BODY1;
2147 if (COND)
2148 break;
2149 BODY2;
2150 }
2151 ---------------------------------------------------------------------
2152
2153 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2154 we generate the following code:
2155
2156 ---------------------------------------------------------------------
2157
2158 if (MAY_BE_ZERO
2159 || NITER < MIN_PER_THREAD * N_THREADS)
2160 goto original;
2161
2162 BODY1;
2163 store all local loop-invariant variables used in body of the loop to DATA.
2164 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2165 load the variables from DATA.
2166 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2167 BODY2;
2168 BODY1;
2169 GIMPLE_OMP_CONTINUE;
2170 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2171 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2172 goto end;
2173
2174 original:
2175 loop
2176 {
2177 IV = phi (INIT, IV + STEP)
2178 BODY1;
2179 if (COND)
2180 break;
2181 BODY2;
2182 }
2183
2184 end:
2185
2186 */
2187
2188 /* Create two versions of the loop -- in the old one, we know that the
2189 number of iterations is large enough, and we will transform it into the
2190 loop that will be split to loop_fn, the new one will be used for the
2191 remaining iterations. */
2192
2193 /* We should compute a better number-of-iterations value for outer loops.
2194 That is, if we have
2195
2196 for (i = 0; i < n; ++i)
2197 for (j = 0; j < m; ++j)
2198 ...
2199
2200 we should compute nit = n * m, not nit = n.
2201 Also may_be_zero handling would need to be adjusted. */
2202
2203 type = TREE_TYPE (niter->niter);
2204 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2205 NULL_TREE);
2206 if (stmts)
2207 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2208
2209 if (loop->inner)
2210 m_p_thread=2;
2211 else
2212 m_p_thread=MIN_PER_THREAD;
2213
2214 many_iterations_cond =
2215 fold_build2 (GE_EXPR, boolean_type_node,
2216 nit, build_int_cst (type, m_p_thread * n_threads));
2217
2218 many_iterations_cond
2219 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2220 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2221 many_iterations_cond);
2222 many_iterations_cond
2223 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2224 if (stmts)
2225 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2226 if (!is_gimple_condexpr (many_iterations_cond))
2227 {
2228 many_iterations_cond
2229 = force_gimple_operand (many_iterations_cond, &stmts,
2230 true, NULL_TREE);
2231 if (stmts)
2232 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2233 }
2234
2235 initialize_original_copy_tables ();
2236
2237 /* We assume that the loop usually iterates a lot. */
2238 prob = 4 * REG_BR_PROB_BASE / 5;
2239 loop_version (loop, many_iterations_cond, NULL,
2240 prob, prob, REG_BR_PROB_BASE - prob, true);
2241 update_ssa (TODO_update_ssa);
2242 free_original_copy_tables ();
2243
2244 /* Base all the induction variables in LOOP on a single control one. */
2245 canonicalize_loop_ivs (loop, &nit, true);
2246
2247 /* Ensure that the exit condition is the first statement in the loop.
2248 The common case is that latch of the loop is empty (apart from the
2249 increment) and immediately follows the loop exit test. Attempt to move the
2250 entry of the loop directly before the exit check and increase the number of
2251 iterations of the loop by one. */
2252 if (!try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2253 {
2254 /* Fall back on the method that handles more cases, but duplicates the
2255 loop body: move the exit condition of LOOP to the beginning of its
2256 header, and duplicate the part of the last iteration that gets disabled
2257 to the exit of the loop. */
2258 transform_to_exit_first_loop (loop, reduction_list, nit);
2259 }
2260
2261 /* Generate initializations for reductions. */
2262 if (reduction_list->elements () > 0)
2263 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2264
2265 /* Eliminate the references to local variables from the loop. */
2266 gcc_assert (single_exit (loop));
2267 entry = loop_preheader_edge (loop);
2268 exit = single_dom_exit (loop);
2269
2270 eliminate_local_variables (entry, exit);
2271 /* In the old loop, move all variables non-local to the loop to a structure
2272 and back, and create separate decls for the variables used in loop. */
2273 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2274 &new_arg_struct, &clsn_data);
2275
2276 /* Create the parallel constructs. */
2277 loc = UNKNOWN_LOCATION;
2278 cond_stmt = last_stmt (loop->header);
2279 if (cond_stmt)
2280 loc = gimple_location (cond_stmt);
2281 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2282 new_arg_struct, n_threads, loc);
2283 if (reduction_list->elements () > 0)
2284 create_call_for_reduction (loop, reduction_list, &clsn_data);
2285
2286 scev_reset ();
2287
2288 /* Cancel the loop (it is simpler to do it here rather than to teach the
2289 expander to do it). */
2290 cancel_loop_tree (loop);
2291
2292 /* Free loop bound estimations that could contain references to
2293 removed statements. */
2294 FOR_EACH_LOOP (loop, 0)
2295 free_numbers_of_iterations_estimates_loop (loop);
2296 }
2297
2298 /* Returns true when LOOP contains vector phi nodes. */
2299
2300 static bool
2301 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2302 {
2303 unsigned i;
2304 basic_block *bbs = get_loop_body_in_dom_order (loop);
2305 gphi_iterator gsi;
2306 bool res = true;
2307
2308 for (i = 0; i < loop->num_nodes; i++)
2309 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2310 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2311 goto end;
2312
2313 res = false;
2314 end:
2315 free (bbs);
2316 return res;
2317 }
2318
2319 /* Create a reduction_info struct, initialize it with REDUC_STMT
2320 and PHI, insert it to the REDUCTION_LIST. */
2321
2322 static void
2323 build_new_reduction (reduction_info_table_type *reduction_list,
2324 gimple reduc_stmt, gphi *phi)
2325 {
2326 reduction_info **slot;
2327 struct reduction_info *new_reduction;
2328
2329 gcc_assert (reduc_stmt);
2330
2331 if (dump_file && (dump_flags & TDF_DETAILS))
2332 {
2333 fprintf (dump_file,
2334 "Detected reduction. reduction stmt is: \n");
2335 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2336 fprintf (dump_file, "\n");
2337 }
2338
2339 new_reduction = XCNEW (struct reduction_info);
2340
2341 new_reduction->reduc_stmt = reduc_stmt;
2342 new_reduction->reduc_phi = phi;
2343 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2344 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
2345 slot = reduction_list->find_slot (new_reduction, INSERT);
2346 *slot = new_reduction;
2347 }
2348
2349 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2350
2351 int
2352 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2353 {
2354 struct reduction_info *const red = *slot;
2355 gimple_set_uid (red->reduc_phi, red->reduc_version);
2356 return 1;
2357 }
2358
2359 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2360
2361 static void
2362 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2363 {
2364 gphi_iterator gsi;
2365 loop_vec_info simple_loop_info;
2366
2367 simple_loop_info = vect_analyze_loop_form (loop);
2368
2369 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2370 {
2371 gphi *phi = gsi.phi ();
2372 affine_iv iv;
2373 tree res = PHI_RESULT (phi);
2374 bool double_reduc;
2375
2376 if (virtual_operand_p (res))
2377 continue;
2378
2379 if (!simple_iv (loop, loop, res, &iv, true)
2380 && simple_loop_info)
2381 {
2382 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2383 phi, true,
2384 &double_reduc);
2385 if (reduc_stmt && !double_reduc)
2386 build_new_reduction (reduction_list, reduc_stmt, phi);
2387 }
2388 }
2389 destroy_loop_vec_info (simple_loop_info, true);
2390
2391 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2392 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2393 only now. */
2394 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2395 }
2396
2397 /* Try to initialize NITER for code generation part. */
2398
2399 static bool
2400 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2401 {
2402 edge exit = single_dom_exit (loop);
2403
2404 gcc_assert (exit);
2405
2406 /* We need to know # of iterations, and there should be no uses of values
2407 defined inside loop outside of it, unless the values are invariants of
2408 the loop. */
2409 if (!number_of_iterations_exit (loop, exit, niter, false))
2410 {
2411 if (dump_file && (dump_flags & TDF_DETAILS))
2412 fprintf (dump_file, " FAILED: number of iterations not known\n");
2413 return false;
2414 }
2415
2416 return true;
2417 }
2418
2419 /* Try to initialize REDUCTION_LIST for code generation part.
2420 REDUCTION_LIST describes the reductions. */
2421
2422 static bool
2423 try_create_reduction_list (loop_p loop,
2424 reduction_info_table_type *reduction_list)
2425 {
2426 edge exit = single_dom_exit (loop);
2427 gphi_iterator gsi;
2428
2429 gcc_assert (exit);
2430
2431 gather_scalar_reductions (loop, reduction_list);
2432
2433
2434 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2435 {
2436 gphi *phi = gsi.phi ();
2437 struct reduction_info *red;
2438 imm_use_iterator imm_iter;
2439 use_operand_p use_p;
2440 gimple reduc_phi;
2441 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2442
2443 if (!virtual_operand_p (val))
2444 {
2445 if (dump_file && (dump_flags & TDF_DETAILS))
2446 {
2447 fprintf (dump_file, "phi is ");
2448 print_gimple_stmt (dump_file, phi, 0, 0);
2449 fprintf (dump_file, "arg of phi to exit: value ");
2450 print_generic_expr (dump_file, val, 0);
2451 fprintf (dump_file, " used outside loop\n");
2452 fprintf (dump_file,
2453 " checking if it a part of reduction pattern: \n");
2454 }
2455 if (reduction_list->elements () == 0)
2456 {
2457 if (dump_file && (dump_flags & TDF_DETAILS))
2458 fprintf (dump_file,
2459 " FAILED: it is not a part of reduction.\n");
2460 return false;
2461 }
2462 reduc_phi = NULL;
2463 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2464 {
2465 if (!gimple_debug_bind_p (USE_STMT (use_p))
2466 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2467 {
2468 reduc_phi = USE_STMT (use_p);
2469 break;
2470 }
2471 }
2472 red = reduction_phi (reduction_list, reduc_phi);
2473 if (red == NULL)
2474 {
2475 if (dump_file && (dump_flags & TDF_DETAILS))
2476 fprintf (dump_file,
2477 " FAILED: it is not a part of reduction.\n");
2478 return false;
2479 }
2480 if (dump_file && (dump_flags & TDF_DETAILS))
2481 {
2482 fprintf (dump_file, "reduction phi is ");
2483 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2484 fprintf (dump_file, "reduction stmt is ");
2485 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2486 }
2487 }
2488 }
2489
2490 /* The iterations of the loop may communicate only through bivs whose
2491 iteration space can be distributed efficiently. */
2492 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2493 {
2494 gphi *phi = gsi.phi ();
2495 tree def = PHI_RESULT (phi);
2496 affine_iv iv;
2497
2498 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2499 {
2500 struct reduction_info *red;
2501
2502 red = reduction_phi (reduction_list, phi);
2503 if (red == NULL)
2504 {
2505 if (dump_file && (dump_flags & TDF_DETAILS))
2506 fprintf (dump_file,
2507 " FAILED: scalar dependency between iterations\n");
2508 return false;
2509 }
2510 }
2511 }
2512
2513
2514 return true;
2515 }
2516
2517 /* Detect parallel loops and generate parallel code using libgomp
2518 primitives. Returns true if some loop was parallelized, false
2519 otherwise. */
2520
2521 static bool
2522 parallelize_loops (void)
2523 {
2524 unsigned n_threads = flag_tree_parallelize_loops;
2525 bool changed = false;
2526 struct loop *loop;
2527 struct tree_niter_desc niter_desc;
2528 struct obstack parloop_obstack;
2529 HOST_WIDE_INT estimated;
2530 source_location loop_loc;
2531
2532 /* Do not parallelize loops in the functions created by parallelization. */
2533 if (parallelized_function_p (cfun->decl))
2534 return false;
2535 if (cfun->has_nonlocal_label)
2536 return false;
2537
2538 gcc_obstack_init (&parloop_obstack);
2539 reduction_info_table_type reduction_list (10);
2540 init_stmt_vec_info_vec ();
2541
2542 FOR_EACH_LOOP (loop, 0)
2543 {
2544 reduction_list.empty ();
2545 if (dump_file && (dump_flags & TDF_DETAILS))
2546 {
2547 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2548 if (loop->inner)
2549 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2550 else
2551 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2552 }
2553
2554 /* If we use autopar in graphite pass, we use its marked dependency
2555 checking results. */
2556 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2557 {
2558 if (dump_file && (dump_flags & TDF_DETAILS))
2559 fprintf (dump_file, "loop is not parallel according to graphite\n");
2560 continue;
2561 }
2562
2563 if (!single_dom_exit (loop))
2564 {
2565
2566 if (dump_file && (dump_flags & TDF_DETAILS))
2567 fprintf (dump_file, "loop is !single_dom_exit\n");
2568
2569 continue;
2570 }
2571
2572 if (/* And of course, the loop must be parallelizable. */
2573 !can_duplicate_loop_p (loop)
2574 || loop_has_blocks_with_irreducible_flag (loop)
2575 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2576 /* FIXME: the check for vector phi nodes could be removed. */
2577 || loop_has_vector_phi_nodes (loop))
2578 continue;
2579
2580 estimated = estimated_stmt_executions_int (loop);
2581 if (estimated == -1)
2582 estimated = max_stmt_executions_int (loop);
2583 /* FIXME: Bypass this check as graphite doesn't update the
2584 count and frequency correctly now. */
2585 if (!flag_loop_parallelize_all
2586 && ((estimated != -1
2587 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2588 /* Do not bother with loops in cold areas. */
2589 || optimize_loop_nest_for_size_p (loop)))
2590 continue;
2591
2592 if (!try_get_loop_niter (loop, &niter_desc))
2593 continue;
2594
2595 if (!try_create_reduction_list (loop, &reduction_list))
2596 continue;
2597
2598 if (!flag_loop_parallelize_all
2599 && !loop_parallel_p (loop, &parloop_obstack))
2600 continue;
2601
2602 changed = true;
2603 if (dump_file && (dump_flags & TDF_DETAILS))
2604 {
2605 if (loop->inner)
2606 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2607 else
2608 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2609 loop_loc = find_loop_location (loop);
2610 if (loop_loc != UNKNOWN_LOCATION)
2611 fprintf (dump_file, "\nloop at %s:%d: ",
2612 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
2613 }
2614 gen_parallel_loop (loop, &reduction_list,
2615 n_threads, &niter_desc);
2616 }
2617
2618 free_stmt_vec_info_vec ();
2619 obstack_free (&parloop_obstack, NULL);
2620
2621 /* Parallelization will cause new function calls to be inserted through
2622 which local variables will escape. Reset the points-to solution
2623 for ESCAPED. */
2624 if (changed)
2625 pt_solution_reset (&cfun->gimple_df->escaped);
2626
2627 return changed;
2628 }
2629
2630 /* Parallelization. */
2631
2632 namespace {
2633
2634 const pass_data pass_data_parallelize_loops =
2635 {
2636 GIMPLE_PASS, /* type */
2637 "parloops", /* name */
2638 OPTGROUP_LOOP, /* optinfo_flags */
2639 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2640 ( PROP_cfg | PROP_ssa ), /* properties_required */
2641 0, /* properties_provided */
2642 0, /* properties_destroyed */
2643 0, /* todo_flags_start */
2644 0, /* todo_flags_finish */
2645 };
2646
2647 class pass_parallelize_loops : public gimple_opt_pass
2648 {
2649 public:
2650 pass_parallelize_loops (gcc::context *ctxt)
2651 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2652 {}
2653
2654 /* opt_pass methods: */
2655 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
2656 virtual unsigned int execute (function *);
2657
2658 }; // class pass_parallelize_loops
2659
2660 unsigned
2661 pass_parallelize_loops::execute (function *fun)
2662 {
2663 if (number_of_loops (fun) <= 1)
2664 return 0;
2665
2666 if (parallelize_loops ())
2667 {
2668 fun->curr_properties &= ~(PROP_gimple_eomp);
2669 return TODO_update_ssa;
2670 }
2671
2672 return 0;
2673 }
2674
2675 } // anon namespace
2676
2677 gimple_opt_pass *
2678 make_pass_parallelize_loops (gcc::context *ctxt)
2679 {
2680 return new pass_parallelize_loops (ctxt);
2681 }