]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-parloops.c
tree-core.h: Include symtab.h.
[thirdparty/gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "alias.h"
26 #include "backend.h"
27 #include "tree.h"
28 #include "gimple.h"
29 #include "hard-reg-set.h"
30 #include "ssa.h"
31 #include "options.h"
32 #include "fold-const.h"
33 #include "internal-fn.h"
34 #include "gimplify.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
40 #include "tree-cfg.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
46 #include "cfgloop.h"
47 #include "tree-data-ref.h"
48 #include "tree-scalar-evolution.h"
49 #include "gimple-pretty-print.h"
50 #include "tree-pass.h"
51 #include "langhooks.h"
52 #include "tree-vectorizer.h"
53 #include "tree-hasher.h"
54 #include "tree-parloops.h"
55 #include "omp-low.h"
56 #include "tree-nested.h"
57 #include "cgraph.h"
58 #include "tree-ssa.h"
59
60 /* This pass tries to distribute iterations of loops into several threads.
61 The implementation is straightforward -- for each loop we test whether its
62 iterations are independent, and if it is the case (and some additional
63 conditions regarding profitability and correctness are satisfied), we
64 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
65 machinery do its job.
66
67 The most of the complexity is in bringing the code into shape expected
68 by the omp expanders:
69 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
70 variable and that the exit test is at the start of the loop body
71 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
72 variables by accesses through pointers, and breaking up ssa chains
73 by storing the values incoming to the parallelized loop to a structure
74 passed to the new function as an argument (something similar is done
75 in omp gimplification, unfortunately only a small part of the code
76 can be shared).
77
78 TODO:
79 -- if there are several parallelizable loops in a function, it may be
80 possible to generate the threads just once (using synchronization to
81 ensure that cross-loop dependences are obeyed).
82 -- handling of common reduction patterns for outer loops.
83
84 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
85 /*
86 Reduction handling:
87 currently we use vect_force_simple_reduction() to detect reduction patterns.
88 The code transformation will be introduced by an example.
89
90
91 parloop
92 {
93 int sum=1;
94
95 for (i = 0; i < N; i++)
96 {
97 x[i] = i + 3;
98 sum+=x[i];
99 }
100 }
101
102 gimple-like code:
103 header_bb:
104
105 # sum_29 = PHI <sum_11(5), 1(3)>
106 # i_28 = PHI <i_12(5), 0(3)>
107 D.1795_8 = i_28 + 3;
108 x[i_28] = D.1795_8;
109 sum_11 = D.1795_8 + sum_29;
110 i_12 = i_28 + 1;
111 if (N_6(D) > i_12)
112 goto header_bb;
113
114
115 exit_bb:
116
117 # sum_21 = PHI <sum_11(4)>
118 printf (&"%d"[0], sum_21);
119
120
121 after reduction transformation (only relevant parts):
122
123 parloop
124 {
125
126 ....
127
128
129 # Storing the initial value given by the user. #
130
131 .paral_data_store.32.sum.27 = 1;
132
133 #pragma omp parallel num_threads(4)
134
135 #pragma omp for schedule(static)
136
137 # The neutral element corresponding to the particular
138 reduction's operation, e.g. 0 for PLUS_EXPR,
139 1 for MULT_EXPR, etc. replaces the user's initial value. #
140
141 # sum.27_29 = PHI <sum.27_11, 0>
142
143 sum.27_11 = D.1827_8 + sum.27_29;
144
145 GIMPLE_OMP_CONTINUE
146
147 # Adding this reduction phi is done at create_phi_for_local_result() #
148 # sum.27_56 = PHI <sum.27_11, 0>
149 GIMPLE_OMP_RETURN
150
151 # Creating the atomic operation is done at
152 create_call_for_reduction_1() #
153
154 #pragma omp atomic_load
155 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
156 D.1840_60 = sum.27_56 + D.1839_59;
157 #pragma omp atomic_store (D.1840_60);
158
159 GIMPLE_OMP_RETURN
160
161 # collecting the result after the join of the threads is done at
162 create_loads_for_reductions().
163 The value computed by the threads is loaded from the
164 shared struct. #
165
166
167 .paral_data_load.33_52 = &.paral_data_store.32;
168 sum_37 = .paral_data_load.33_52->sum.27;
169 sum_43 = D.1795_41 + sum_37;
170
171 exit bb:
172 # sum_21 = PHI <sum_43, sum_26>
173 printf (&"%d"[0], sum_21);
174
175 ...
176
177 }
178
179 */
180
181 /* Minimal number of iterations of a loop that should be executed in each
182 thread. */
183 #define MIN_PER_THREAD 100
184
185 /* Element of the hashtable, representing a
186 reduction in the current loop. */
187 struct reduction_info
188 {
189 gimple reduc_stmt; /* reduction statement. */
190 gimple reduc_phi; /* The phi node defining the reduction. */
191 enum tree_code reduction_code;/* code for the reduction operation. */
192 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
193 result. */
194 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
195 of the reduction variable when existing the loop. */
196 tree initial_value; /* The initial value of the reduction var before entering the loop. */
197 tree field; /* the name of the field in the parloop data structure intended for reduction. */
198 tree init; /* reduction initialization value. */
199 gphi *new_phi; /* (helper field) Newly created phi node whose result
200 will be passed to the atomic operation. Represents
201 the local result each thread computed for the reduction
202 operation. */
203 };
204
205 /* Reduction info hashtable helpers. */
206
207 struct reduction_hasher : free_ptr_hash <reduction_info>
208 {
209 static inline hashval_t hash (const reduction_info *);
210 static inline bool equal (const reduction_info *, const reduction_info *);
211 };
212
213 /* Equality and hash functions for hashtab code. */
214
215 inline bool
216 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
217 {
218 return (a->reduc_phi == b->reduc_phi);
219 }
220
221 inline hashval_t
222 reduction_hasher::hash (const reduction_info *a)
223 {
224 return a->reduc_version;
225 }
226
227 typedef hash_table<reduction_hasher> reduction_info_table_type;
228
229
230 static struct reduction_info *
231 reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
232 {
233 struct reduction_info tmpred, *red;
234
235 if (reduction_list->elements () == 0 || phi == NULL)
236 return NULL;
237
238 tmpred.reduc_phi = phi;
239 tmpred.reduc_version = gimple_uid (phi);
240 red = reduction_list->find (&tmpred);
241
242 return red;
243 }
244
245 /* Element of hashtable of names to copy. */
246
247 struct name_to_copy_elt
248 {
249 unsigned version; /* The version of the name to copy. */
250 tree new_name; /* The new name used in the copy. */
251 tree field; /* The field of the structure used to pass the
252 value. */
253 };
254
255 /* Name copies hashtable helpers. */
256
257 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
258 {
259 static inline hashval_t hash (const name_to_copy_elt *);
260 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
261 };
262
263 /* Equality and hash functions for hashtab code. */
264
265 inline bool
266 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
267 {
268 return a->version == b->version;
269 }
270
271 inline hashval_t
272 name_to_copy_hasher::hash (const name_to_copy_elt *a)
273 {
274 return (hashval_t) a->version;
275 }
276
277 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
278
279 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
280 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
281 represents the denominator for every element in the matrix. */
282 typedef struct lambda_trans_matrix_s
283 {
284 lambda_matrix matrix;
285 int rowsize;
286 int colsize;
287 int denominator;
288 } *lambda_trans_matrix;
289 #define LTM_MATRIX(T) ((T)->matrix)
290 #define LTM_ROWSIZE(T) ((T)->rowsize)
291 #define LTM_COLSIZE(T) ((T)->colsize)
292 #define LTM_DENOMINATOR(T) ((T)->denominator)
293
294 /* Allocate a new transformation matrix. */
295
296 static lambda_trans_matrix
297 lambda_trans_matrix_new (int colsize, int rowsize,
298 struct obstack * lambda_obstack)
299 {
300 lambda_trans_matrix ret;
301
302 ret = (lambda_trans_matrix)
303 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
304 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
305 LTM_ROWSIZE (ret) = rowsize;
306 LTM_COLSIZE (ret) = colsize;
307 LTM_DENOMINATOR (ret) = 1;
308 return ret;
309 }
310
311 /* Multiply a vector VEC by a matrix MAT.
312 MAT is an M*N matrix, and VEC is a vector with length N. The result
313 is stored in DEST which must be a vector of length M. */
314
315 static void
316 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
317 lambda_vector vec, lambda_vector dest)
318 {
319 int i, j;
320
321 lambda_vector_clear (dest, m);
322 for (i = 0; i < m; i++)
323 for (j = 0; j < n; j++)
324 dest[i] += matrix[i][j] * vec[j];
325 }
326
327 /* Return true if TRANS is a legal transformation matrix that respects
328 the dependence vectors in DISTS and DIRS. The conservative answer
329 is false.
330
331 "Wolfe proves that a unimodular transformation represented by the
332 matrix T is legal when applied to a loop nest with a set of
333 lexicographically non-negative distance vectors RDG if and only if
334 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
335 i.e.: if and only if it transforms the lexicographically positive
336 distance vectors to lexicographically positive vectors. Note that
337 a unimodular matrix must transform the zero vector (and only it) to
338 the zero vector." S.Muchnick. */
339
340 static bool
341 lambda_transform_legal_p (lambda_trans_matrix trans,
342 int nb_loops,
343 vec<ddr_p> dependence_relations)
344 {
345 unsigned int i, j;
346 lambda_vector distres;
347 struct data_dependence_relation *ddr;
348
349 gcc_assert (LTM_COLSIZE (trans) == nb_loops
350 && LTM_ROWSIZE (trans) == nb_loops);
351
352 /* When there are no dependences, the transformation is correct. */
353 if (dependence_relations.length () == 0)
354 return true;
355
356 ddr = dependence_relations[0];
357 if (ddr == NULL)
358 return true;
359
360 /* When there is an unknown relation in the dependence_relations, we
361 know that it is no worth looking at this loop nest: give up. */
362 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
363 return false;
364
365 distres = lambda_vector_new (nb_loops);
366
367 /* For each distance vector in the dependence graph. */
368 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
369 {
370 /* Don't care about relations for which we know that there is no
371 dependence, nor about read-read (aka. output-dependences):
372 these data accesses can happen in any order. */
373 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
374 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
375 continue;
376
377 /* Conservatively answer: "this transformation is not valid". */
378 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
379 return false;
380
381 /* If the dependence could not be captured by a distance vector,
382 conservatively answer that the transform is not valid. */
383 if (DDR_NUM_DIST_VECTS (ddr) == 0)
384 return false;
385
386 /* Compute trans.dist_vect */
387 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
388 {
389 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
390 DDR_DIST_VECT (ddr, j), distres);
391
392 if (!lambda_vector_lexico_pos (distres, nb_loops))
393 return false;
394 }
395 }
396 return true;
397 }
398
399 /* Data dependency analysis. Returns true if the iterations of LOOP
400 are independent on each other (that is, if we can execute them
401 in parallel). */
402
403 static bool
404 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
405 {
406 vec<ddr_p> dependence_relations;
407 vec<data_reference_p> datarefs;
408 lambda_trans_matrix trans;
409 bool ret = false;
410
411 if (dump_file && (dump_flags & TDF_DETAILS))
412 {
413 fprintf (dump_file, "Considering loop %d\n", loop->num);
414 if (!loop->inner)
415 fprintf (dump_file, "loop is innermost\n");
416 else
417 fprintf (dump_file, "loop NOT innermost\n");
418 }
419
420 /* Check for problems with dependences. If the loop can be reversed,
421 the iterations are independent. */
422 auto_vec<loop_p, 3> loop_nest;
423 datarefs.create (10);
424 dependence_relations.create (100);
425 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
426 &dependence_relations))
427 {
428 if (dump_file && (dump_flags & TDF_DETAILS))
429 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
430 ret = false;
431 goto end;
432 }
433 if (dump_file && (dump_flags & TDF_DETAILS))
434 dump_data_dependence_relations (dump_file, dependence_relations);
435
436 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
437 LTM_MATRIX (trans)[0][0] = -1;
438
439 if (lambda_transform_legal_p (trans, 1, dependence_relations))
440 {
441 ret = true;
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (dump_file, " SUCCESS: may be parallelized\n");
444 }
445 else if (dump_file && (dump_flags & TDF_DETAILS))
446 fprintf (dump_file,
447 " FAILED: data dependencies exist across iterations\n");
448
449 end:
450 free_dependence_relations (dependence_relations);
451 free_data_refs (datarefs);
452
453 return ret;
454 }
455
456 /* Return true when LOOP contains basic blocks marked with the
457 BB_IRREDUCIBLE_LOOP flag. */
458
459 static inline bool
460 loop_has_blocks_with_irreducible_flag (struct loop *loop)
461 {
462 unsigned i;
463 basic_block *bbs = get_loop_body_in_dom_order (loop);
464 bool res = true;
465
466 for (i = 0; i < loop->num_nodes; i++)
467 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
468 goto end;
469
470 res = false;
471 end:
472 free (bbs);
473 return res;
474 }
475
476 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
477 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
478 to their addresses that can be reused. The address of OBJ is known to
479 be invariant in the whole function. Other needed statements are placed
480 right before GSI. */
481
482 static tree
483 take_address_of (tree obj, tree type, edge entry,
484 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
485 {
486 int uid;
487 tree *var_p, name, addr;
488 gassign *stmt;
489 gimple_seq stmts;
490
491 /* Since the address of OBJ is invariant, the trees may be shared.
492 Avoid rewriting unrelated parts of the code. */
493 obj = unshare_expr (obj);
494 for (var_p = &obj;
495 handled_component_p (*var_p);
496 var_p = &TREE_OPERAND (*var_p, 0))
497 continue;
498
499 /* Canonicalize the access to base on a MEM_REF. */
500 if (DECL_P (*var_p))
501 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
502
503 /* Assign a canonical SSA name to the address of the base decl used
504 in the address and share it for all accesses and addresses based
505 on it. */
506 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
507 int_tree_map elt;
508 elt.uid = uid;
509 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
510 if (!slot->to)
511 {
512 if (gsi == NULL)
513 return NULL;
514 addr = TREE_OPERAND (*var_p, 0);
515 const char *obj_name
516 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
517 if (obj_name)
518 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
519 else
520 name = make_ssa_name (TREE_TYPE (addr));
521 stmt = gimple_build_assign (name, addr);
522 gsi_insert_on_edge_immediate (entry, stmt);
523
524 slot->uid = uid;
525 slot->to = name;
526 }
527 else
528 name = slot->to;
529
530 /* Express the address in terms of the canonical SSA name. */
531 TREE_OPERAND (*var_p, 0) = name;
532 if (gsi == NULL)
533 return build_fold_addr_expr_with_type (obj, type);
534
535 name = force_gimple_operand (build_addr (obj, current_function_decl),
536 &stmts, true, NULL_TREE);
537 if (!gimple_seq_empty_p (stmts))
538 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
539
540 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
541 {
542 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
543 NULL_TREE);
544 if (!gimple_seq_empty_p (stmts))
545 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
546 }
547
548 return name;
549 }
550
551 /* Callback for htab_traverse. Create the initialization statement
552 for reduction described in SLOT, and place it at the preheader of
553 the loop described in DATA. */
554
555 int
556 initialize_reductions (reduction_info **slot, struct loop *loop)
557 {
558 tree init, c;
559 tree bvar, type, arg;
560 edge e;
561
562 struct reduction_info *const reduc = *slot;
563
564 /* Create initialization in preheader:
565 reduction_variable = initialization value of reduction. */
566
567 /* In the phi node at the header, replace the argument coming
568 from the preheader with the reduction initialization value. */
569
570 /* Create a new variable to initialize the reduction. */
571 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
572 bvar = create_tmp_var (type, "reduction");
573
574 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
575 OMP_CLAUSE_REDUCTION);
576 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
577 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
578
579 init = omp_reduction_init (c, TREE_TYPE (bvar));
580 reduc->init = init;
581
582 /* Replace the argument representing the initialization value
583 with the initialization value for the reduction (neutral
584 element for the particular operation, e.g. 0 for PLUS_EXPR,
585 1 for MULT_EXPR, etc).
586 Keep the old value in a new variable "reduction_initial",
587 that will be taken in consideration after the parallel
588 computing is done. */
589
590 e = loop_preheader_edge (loop);
591 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
592 /* Create new variable to hold the initial value. */
593
594 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
595 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
596 reduc->initial_value = arg;
597 return 1;
598 }
599
600 struct elv_data
601 {
602 struct walk_stmt_info info;
603 edge entry;
604 int_tree_htab_type *decl_address;
605 gimple_stmt_iterator *gsi;
606 bool changed;
607 bool reset;
608 };
609
610 /* Eliminates references to local variables in *TP out of the single
611 entry single exit region starting at DTA->ENTRY.
612 DECL_ADDRESS contains addresses of the references that had their
613 address taken already. If the expression is changed, CHANGED is
614 set to true. Callback for walk_tree. */
615
616 static tree
617 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
618 {
619 struct elv_data *const dta = (struct elv_data *) data;
620 tree t = *tp, var, addr, addr_type, type, obj;
621
622 if (DECL_P (t))
623 {
624 *walk_subtrees = 0;
625
626 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
627 return NULL_TREE;
628
629 type = TREE_TYPE (t);
630 addr_type = build_pointer_type (type);
631 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
632 dta->gsi);
633 if (dta->gsi == NULL && addr == NULL_TREE)
634 {
635 dta->reset = true;
636 return NULL_TREE;
637 }
638
639 *tp = build_simple_mem_ref (addr);
640
641 dta->changed = true;
642 return NULL_TREE;
643 }
644
645 if (TREE_CODE (t) == ADDR_EXPR)
646 {
647 /* ADDR_EXPR may appear in two contexts:
648 -- as a gimple operand, when the address taken is a function invariant
649 -- as gimple rhs, when the resulting address in not a function
650 invariant
651 We do not need to do anything special in the latter case (the base of
652 the memory reference whose address is taken may be replaced in the
653 DECL_P case). The former case is more complicated, as we need to
654 ensure that the new address is still a gimple operand. Thus, it
655 is not sufficient to replace just the base of the memory reference --
656 we need to move the whole computation of the address out of the
657 loop. */
658 if (!is_gimple_val (t))
659 return NULL_TREE;
660
661 *walk_subtrees = 0;
662 obj = TREE_OPERAND (t, 0);
663 var = get_base_address (obj);
664 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
665 return NULL_TREE;
666
667 addr_type = TREE_TYPE (t);
668 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
669 dta->gsi);
670 if (dta->gsi == NULL && addr == NULL_TREE)
671 {
672 dta->reset = true;
673 return NULL_TREE;
674 }
675 *tp = addr;
676
677 dta->changed = true;
678 return NULL_TREE;
679 }
680
681 if (!EXPR_P (t))
682 *walk_subtrees = 0;
683
684 return NULL_TREE;
685 }
686
687 /* Moves the references to local variables in STMT at *GSI out of the single
688 entry single exit region starting at ENTRY. DECL_ADDRESS contains
689 addresses of the references that had their address taken
690 already. */
691
692 static void
693 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
694 int_tree_htab_type *decl_address)
695 {
696 struct elv_data dta;
697 gimple stmt = gsi_stmt (*gsi);
698
699 memset (&dta.info, '\0', sizeof (dta.info));
700 dta.entry = entry;
701 dta.decl_address = decl_address;
702 dta.changed = false;
703 dta.reset = false;
704
705 if (gimple_debug_bind_p (stmt))
706 {
707 dta.gsi = NULL;
708 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
709 eliminate_local_variables_1, &dta.info, NULL);
710 if (dta.reset)
711 {
712 gimple_debug_bind_reset_value (stmt);
713 dta.changed = true;
714 }
715 }
716 else if (gimple_clobber_p (stmt))
717 {
718 stmt = gimple_build_nop ();
719 gsi_replace (gsi, stmt, false);
720 dta.changed = true;
721 }
722 else
723 {
724 dta.gsi = gsi;
725 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
726 }
727
728 if (dta.changed)
729 update_stmt (stmt);
730 }
731
732 /* Eliminates the references to local variables from the single entry
733 single exit region between the ENTRY and EXIT edges.
734
735 This includes:
736 1) Taking address of a local variable -- these are moved out of the
737 region (and temporary variable is created to hold the address if
738 necessary).
739
740 2) Dereferencing a local variable -- these are replaced with indirect
741 references. */
742
743 static void
744 eliminate_local_variables (edge entry, edge exit)
745 {
746 basic_block bb;
747 auto_vec<basic_block, 3> body;
748 unsigned i;
749 gimple_stmt_iterator gsi;
750 bool has_debug_stmt = false;
751 int_tree_htab_type decl_address (10);
752 basic_block entry_bb = entry->src;
753 basic_block exit_bb = exit->dest;
754
755 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
756
757 FOR_EACH_VEC_ELT (body, i, bb)
758 if (bb != entry_bb && bb != exit_bb)
759 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
760 if (is_gimple_debug (gsi_stmt (gsi)))
761 {
762 if (gimple_debug_bind_p (gsi_stmt (gsi)))
763 has_debug_stmt = true;
764 }
765 else
766 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
767
768 if (has_debug_stmt)
769 FOR_EACH_VEC_ELT (body, i, bb)
770 if (bb != entry_bb && bb != exit_bb)
771 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
772 if (gimple_debug_bind_p (gsi_stmt (gsi)))
773 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
774 }
775
776 /* Returns true if expression EXPR is not defined between ENTRY and
777 EXIT, i.e. if all its operands are defined outside of the region. */
778
779 static bool
780 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
781 {
782 basic_block entry_bb = entry->src;
783 basic_block exit_bb = exit->dest;
784 basic_block def_bb;
785
786 if (is_gimple_min_invariant (expr))
787 return true;
788
789 if (TREE_CODE (expr) == SSA_NAME)
790 {
791 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
792 if (def_bb
793 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
794 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
795 return false;
796
797 return true;
798 }
799
800 return false;
801 }
802
803 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
804 The copies are stored to NAME_COPIES, if NAME was already duplicated,
805 its duplicate stored in NAME_COPIES is returned.
806
807 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
808 duplicated, storing the copies in DECL_COPIES. */
809
810 static tree
811 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
812 int_tree_htab_type *decl_copies,
813 bool copy_name_p)
814 {
815 tree copy, var, var_copy;
816 unsigned idx, uid, nuid;
817 struct int_tree_map ielt;
818 struct name_to_copy_elt elt, *nelt;
819 name_to_copy_elt **slot;
820 int_tree_map *dslot;
821
822 if (TREE_CODE (name) != SSA_NAME)
823 return name;
824
825 idx = SSA_NAME_VERSION (name);
826 elt.version = idx;
827 slot = name_copies->find_slot_with_hash (&elt, idx,
828 copy_name_p ? INSERT : NO_INSERT);
829 if (slot && *slot)
830 return (*slot)->new_name;
831
832 if (copy_name_p)
833 {
834 copy = duplicate_ssa_name (name, NULL);
835 nelt = XNEW (struct name_to_copy_elt);
836 nelt->version = idx;
837 nelt->new_name = copy;
838 nelt->field = NULL_TREE;
839 *slot = nelt;
840 }
841 else
842 {
843 gcc_assert (!slot);
844 copy = name;
845 }
846
847 var = SSA_NAME_VAR (name);
848 if (!var)
849 return copy;
850
851 uid = DECL_UID (var);
852 ielt.uid = uid;
853 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
854 if (!dslot->to)
855 {
856 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
857 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
858 dslot->uid = uid;
859 dslot->to = var_copy;
860
861 /* Ensure that when we meet this decl next time, we won't duplicate
862 it again. */
863 nuid = DECL_UID (var_copy);
864 ielt.uid = nuid;
865 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
866 gcc_assert (!dslot->to);
867 dslot->uid = nuid;
868 dslot->to = var_copy;
869 }
870 else
871 var_copy = dslot->to;
872
873 replace_ssa_name_symbol (copy, var_copy);
874 return copy;
875 }
876
877 /* Finds the ssa names used in STMT that are defined outside the
878 region between ENTRY and EXIT and replaces such ssa names with
879 their duplicates. The duplicates are stored to NAME_COPIES. Base
880 decls of all ssa names used in STMT (including those defined in
881 LOOP) are replaced with the new temporary variables; the
882 replacement decls are stored in DECL_COPIES. */
883
884 static void
885 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
886 name_to_copy_table_type *name_copies,
887 int_tree_htab_type *decl_copies)
888 {
889 use_operand_p use;
890 def_operand_p def;
891 ssa_op_iter oi;
892 tree name, copy;
893 bool copy_name_p;
894
895 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
896 {
897 name = DEF_FROM_PTR (def);
898 gcc_assert (TREE_CODE (name) == SSA_NAME);
899 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
900 false);
901 gcc_assert (copy == name);
902 }
903
904 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
905 {
906 name = USE_FROM_PTR (use);
907 if (TREE_CODE (name) != SSA_NAME)
908 continue;
909
910 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
911 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
912 copy_name_p);
913 SET_USE (use, copy);
914 }
915 }
916
917 /* Finds the ssa names used in STMT that are defined outside the
918 region between ENTRY and EXIT and replaces such ssa names with
919 their duplicates. The duplicates are stored to NAME_COPIES. Base
920 decls of all ssa names used in STMT (including those defined in
921 LOOP) are replaced with the new temporary variables; the
922 replacement decls are stored in DECL_COPIES. */
923
924 static bool
925 separate_decls_in_region_debug (gimple stmt,
926 name_to_copy_table_type *name_copies,
927 int_tree_htab_type *decl_copies)
928 {
929 use_operand_p use;
930 ssa_op_iter oi;
931 tree var, name;
932 struct int_tree_map ielt;
933 struct name_to_copy_elt elt;
934 name_to_copy_elt **slot;
935 int_tree_map *dslot;
936
937 if (gimple_debug_bind_p (stmt))
938 var = gimple_debug_bind_get_var (stmt);
939 else if (gimple_debug_source_bind_p (stmt))
940 var = gimple_debug_source_bind_get_var (stmt);
941 else
942 return true;
943 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
944 return true;
945 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
946 ielt.uid = DECL_UID (var);
947 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
948 if (!dslot)
949 return true;
950 if (gimple_debug_bind_p (stmt))
951 gimple_debug_bind_set_var (stmt, dslot->to);
952 else if (gimple_debug_source_bind_p (stmt))
953 gimple_debug_source_bind_set_var (stmt, dslot->to);
954
955 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
956 {
957 name = USE_FROM_PTR (use);
958 if (TREE_CODE (name) != SSA_NAME)
959 continue;
960
961 elt.version = SSA_NAME_VERSION (name);
962 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
963 if (!slot)
964 {
965 gimple_debug_bind_reset_value (stmt);
966 update_stmt (stmt);
967 break;
968 }
969
970 SET_USE (use, (*slot)->new_name);
971 }
972
973 return false;
974 }
975
976 /* Callback for htab_traverse. Adds a field corresponding to the reduction
977 specified in SLOT. The type is passed in DATA. */
978
979 int
980 add_field_for_reduction (reduction_info **slot, tree type)
981 {
982
983 struct reduction_info *const red = *slot;
984 tree var = gimple_assign_lhs (red->reduc_stmt);
985 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
986 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
987
988 insert_field_into_struct (type, field);
989
990 red->field = field;
991
992 return 1;
993 }
994
995 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
996 described in SLOT. The type is passed in DATA. */
997
998 int
999 add_field_for_name (name_to_copy_elt **slot, tree type)
1000 {
1001 struct name_to_copy_elt *const elt = *slot;
1002 tree name = ssa_name (elt->version);
1003 tree field = build_decl (UNKNOWN_LOCATION,
1004 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1005 TREE_TYPE (name));
1006
1007 insert_field_into_struct (type, field);
1008 elt->field = field;
1009
1010 return 1;
1011 }
1012
1013 /* Callback for htab_traverse. A local result is the intermediate result
1014 computed by a single
1015 thread, or the initial value in case no iteration was executed.
1016 This function creates a phi node reflecting these values.
1017 The phi's result will be stored in NEW_PHI field of the
1018 reduction's data structure. */
1019
1020 int
1021 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1022 {
1023 struct reduction_info *const reduc = *slot;
1024 edge e;
1025 gphi *new_phi;
1026 basic_block store_bb;
1027 tree local_res;
1028 source_location locus;
1029
1030 /* STORE_BB is the block where the phi
1031 should be stored. It is the destination of the loop exit.
1032 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1033 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1034
1035 /* STORE_BB has two predecessors. One coming from the loop
1036 (the reduction's result is computed at the loop),
1037 and another coming from a block preceding the loop,
1038 when no iterations
1039 are executed (the initial value should be taken). */
1040 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1041 e = EDGE_PRED (store_bb, 1);
1042 else
1043 e = EDGE_PRED (store_bb, 0);
1044 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
1045 locus = gimple_location (reduc->reduc_stmt);
1046 new_phi = create_phi_node (local_res, store_bb);
1047 add_phi_arg (new_phi, reduc->init, e, locus);
1048 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1049 FALLTHRU_EDGE (loop->latch), locus);
1050 reduc->new_phi = new_phi;
1051
1052 return 1;
1053 }
1054
1055 struct clsn_data
1056 {
1057 tree store;
1058 tree load;
1059
1060 basic_block store_bb;
1061 basic_block load_bb;
1062 };
1063
1064 /* Callback for htab_traverse. Create an atomic instruction for the
1065 reduction described in SLOT.
1066 DATA annotates the place in memory the atomic operation relates to,
1067 and the basic block it needs to be generated in. */
1068
1069 int
1070 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1071 {
1072 struct reduction_info *const reduc = *slot;
1073 gimple_stmt_iterator gsi;
1074 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1075 tree load_struct;
1076 basic_block bb;
1077 basic_block new_bb;
1078 edge e;
1079 tree t, addr, ref, x;
1080 tree tmp_load, name;
1081 gimple load;
1082
1083 load_struct = build_simple_mem_ref (clsn_data->load);
1084 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1085
1086 addr = build_addr (t, current_function_decl);
1087
1088 /* Create phi node. */
1089 bb = clsn_data->load_bb;
1090
1091 gsi = gsi_last_bb (bb);
1092 e = split_block (bb, gsi_stmt (gsi));
1093 new_bb = e->dest;
1094
1095 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1096 tmp_load = make_ssa_name (tmp_load);
1097 load = gimple_build_omp_atomic_load (tmp_load, addr);
1098 SSA_NAME_DEF_STMT (tmp_load) = load;
1099 gsi = gsi_start_bb (new_bb);
1100 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1101
1102 e = split_block (new_bb, load);
1103 new_bb = e->dest;
1104 gsi = gsi_start_bb (new_bb);
1105 ref = tmp_load;
1106 x = fold_build2 (reduc->reduction_code,
1107 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1108 PHI_RESULT (reduc->new_phi));
1109
1110 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1111 GSI_CONTINUE_LINKING);
1112
1113 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1114 return 1;
1115 }
1116
1117 /* Create the atomic operation at the join point of the threads.
1118 REDUCTION_LIST describes the reductions in the LOOP.
1119 LD_ST_DATA describes the shared data structure where
1120 shared data is stored in and loaded from. */
1121 static void
1122 create_call_for_reduction (struct loop *loop,
1123 reduction_info_table_type *reduction_list,
1124 struct clsn_data *ld_st_data)
1125 {
1126 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1127 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1128 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1129 reduction_list
1130 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1131 }
1132
1133 /* Callback for htab_traverse. Loads the final reduction value at the
1134 join point of all threads, and inserts it in the right place. */
1135
1136 int
1137 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1138 {
1139 struct reduction_info *const red = *slot;
1140 gimple stmt;
1141 gimple_stmt_iterator gsi;
1142 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1143 tree load_struct;
1144 tree name;
1145 tree x;
1146
1147 gsi = gsi_after_labels (clsn_data->load_bb);
1148 load_struct = build_simple_mem_ref (clsn_data->load);
1149 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1150 NULL_TREE);
1151
1152 x = load_struct;
1153 name = PHI_RESULT (red->keep_res);
1154 stmt = gimple_build_assign (name, x);
1155
1156 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1157
1158 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1159 !gsi_end_p (gsi); gsi_next (&gsi))
1160 if (gsi_stmt (gsi) == red->keep_res)
1161 {
1162 remove_phi_node (&gsi, false);
1163 return 1;
1164 }
1165 gcc_unreachable ();
1166 }
1167
1168 /* Load the reduction result that was stored in LD_ST_DATA.
1169 REDUCTION_LIST describes the list of reductions that the
1170 loads should be generated for. */
1171 static void
1172 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1173 struct clsn_data *ld_st_data)
1174 {
1175 gimple_stmt_iterator gsi;
1176 tree t;
1177 gimple stmt;
1178
1179 gsi = gsi_after_labels (ld_st_data->load_bb);
1180 t = build_fold_addr_expr (ld_st_data->store);
1181 stmt = gimple_build_assign (ld_st_data->load, t);
1182
1183 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1184
1185 reduction_list
1186 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1187
1188 }
1189
1190 /* Callback for htab_traverse. Store the neutral value for the
1191 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1192 1 for MULT_EXPR, etc. into the reduction field.
1193 The reduction is specified in SLOT. The store information is
1194 passed in DATA. */
1195
1196 int
1197 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1198 {
1199 struct reduction_info *const red = *slot;
1200 tree t;
1201 gimple stmt;
1202 gimple_stmt_iterator gsi;
1203 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1204
1205 gsi = gsi_last_bb (clsn_data->store_bb);
1206 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1207 stmt = gimple_build_assign (t, red->initial_value);
1208 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1209
1210 return 1;
1211 }
1212
1213 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1214 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1215 specified in SLOT. */
1216
1217 int
1218 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1219 struct clsn_data *clsn_data)
1220 {
1221 struct name_to_copy_elt *const elt = *slot;
1222 tree t;
1223 gimple stmt;
1224 gimple_stmt_iterator gsi;
1225 tree type = TREE_TYPE (elt->new_name);
1226 tree load_struct;
1227
1228 gsi = gsi_last_bb (clsn_data->store_bb);
1229 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1230 stmt = gimple_build_assign (t, ssa_name (elt->version));
1231 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1232
1233 gsi = gsi_last_bb (clsn_data->load_bb);
1234 load_struct = build_simple_mem_ref (clsn_data->load);
1235 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1236 stmt = gimple_build_assign (elt->new_name, t);
1237 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1238
1239 return 1;
1240 }
1241
1242 /* Moves all the variables used in LOOP and defined outside of it (including
1243 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1244 name) to a structure created for this purpose. The code
1245
1246 while (1)
1247 {
1248 use (a);
1249 use (b);
1250 }
1251
1252 is transformed this way:
1253
1254 bb0:
1255 old.a = a;
1256 old.b = b;
1257
1258 bb1:
1259 a' = new->a;
1260 b' = new->b;
1261 while (1)
1262 {
1263 use (a');
1264 use (b');
1265 }
1266
1267 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1268 pointer `new' is intentionally not initialized (the loop will be split to a
1269 separate function later, and `new' will be initialized from its arguments).
1270 LD_ST_DATA holds information about the shared data structure used to pass
1271 information among the threads. It is initialized here, and
1272 gen_parallel_loop will pass it to create_call_for_reduction that
1273 needs this information. REDUCTION_LIST describes the reductions
1274 in LOOP. */
1275
1276 static void
1277 separate_decls_in_region (edge entry, edge exit,
1278 reduction_info_table_type *reduction_list,
1279 tree *arg_struct, tree *new_arg_struct,
1280 struct clsn_data *ld_st_data)
1281
1282 {
1283 basic_block bb1 = split_edge (entry);
1284 basic_block bb0 = single_pred (bb1);
1285 name_to_copy_table_type name_copies (10);
1286 int_tree_htab_type decl_copies (10);
1287 unsigned i;
1288 tree type, type_name, nvar;
1289 gimple_stmt_iterator gsi;
1290 struct clsn_data clsn_data;
1291 auto_vec<basic_block, 3> body;
1292 basic_block bb;
1293 basic_block entry_bb = bb1;
1294 basic_block exit_bb = exit->dest;
1295 bool has_debug_stmt = false;
1296
1297 entry = single_succ_edge (entry_bb);
1298 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1299
1300 FOR_EACH_VEC_ELT (body, i, bb)
1301 {
1302 if (bb != entry_bb && bb != exit_bb)
1303 {
1304 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1305 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1306 &name_copies, &decl_copies);
1307
1308 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1309 {
1310 gimple stmt = gsi_stmt (gsi);
1311
1312 if (is_gimple_debug (stmt))
1313 has_debug_stmt = true;
1314 else
1315 separate_decls_in_region_stmt (entry, exit, stmt,
1316 &name_copies, &decl_copies);
1317 }
1318 }
1319 }
1320
1321 /* Now process debug bind stmts. We must not create decls while
1322 processing debug stmts, so we defer their processing so as to
1323 make sure we will have debug info for as many variables as
1324 possible (all of those that were dealt with in the loop above),
1325 and discard those for which we know there's nothing we can
1326 do. */
1327 if (has_debug_stmt)
1328 FOR_EACH_VEC_ELT (body, i, bb)
1329 if (bb != entry_bb && bb != exit_bb)
1330 {
1331 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1332 {
1333 gimple stmt = gsi_stmt (gsi);
1334
1335 if (is_gimple_debug (stmt))
1336 {
1337 if (separate_decls_in_region_debug (stmt, &name_copies,
1338 &decl_copies))
1339 {
1340 gsi_remove (&gsi, true);
1341 continue;
1342 }
1343 }
1344
1345 gsi_next (&gsi);
1346 }
1347 }
1348
1349 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1350 {
1351 /* It may happen that there is nothing to copy (if there are only
1352 loop carried and external variables in the loop). */
1353 *arg_struct = NULL;
1354 *new_arg_struct = NULL;
1355 }
1356 else
1357 {
1358 /* Create the type for the structure to store the ssa names to. */
1359 type = lang_hooks.types.make_type (RECORD_TYPE);
1360 type_name = build_decl (UNKNOWN_LOCATION,
1361 TYPE_DECL, create_tmp_var_name (".paral_data"),
1362 type);
1363 TYPE_NAME (type) = type_name;
1364
1365 name_copies.traverse <tree, add_field_for_name> (type);
1366 if (reduction_list && reduction_list->elements () > 0)
1367 {
1368 /* Create the fields for reductions. */
1369 reduction_list->traverse <tree, add_field_for_reduction> (type);
1370 }
1371 layout_type (type);
1372
1373 /* Create the loads and stores. */
1374 *arg_struct = create_tmp_var (type, ".paral_data_store");
1375 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1376 *new_arg_struct = make_ssa_name (nvar);
1377
1378 ld_st_data->store = *arg_struct;
1379 ld_st_data->load = *new_arg_struct;
1380 ld_st_data->store_bb = bb0;
1381 ld_st_data->load_bb = bb1;
1382
1383 name_copies
1384 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1385 (ld_st_data);
1386
1387 /* Load the calculation from memory (after the join of the threads). */
1388
1389 if (reduction_list && reduction_list->elements () > 0)
1390 {
1391 reduction_list
1392 ->traverse <struct clsn_data *, create_stores_for_reduction>
1393 (ld_st_data);
1394 clsn_data.load = make_ssa_name (nvar);
1395 clsn_data.load_bb = exit->dest;
1396 clsn_data.store = ld_st_data->store;
1397 create_final_loads_for_reduction (reduction_list, &clsn_data);
1398 }
1399 }
1400 }
1401
1402 /* Returns true if FN was created to run in parallel. */
1403
1404 bool
1405 parallelized_function_p (tree fndecl)
1406 {
1407 cgraph_node *node = cgraph_node::get (fndecl);
1408 gcc_assert (node != NULL);
1409 return node->parallelized_function;
1410 }
1411
1412 /* Creates and returns an empty function that will receive the body of
1413 a parallelized loop. */
1414
1415 static tree
1416 create_loop_fn (location_t loc)
1417 {
1418 char buf[100];
1419 char *tname;
1420 tree decl, type, name, t;
1421 struct function *act_cfun = cfun;
1422 static unsigned loopfn_num;
1423
1424 loc = LOCATION_LOCUS (loc);
1425 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1426 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1427 clean_symbol_name (tname);
1428 name = get_identifier (tname);
1429 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1430
1431 decl = build_decl (loc, FUNCTION_DECL, name, type);
1432 TREE_STATIC (decl) = 1;
1433 TREE_USED (decl) = 1;
1434 DECL_ARTIFICIAL (decl) = 1;
1435 DECL_IGNORED_P (decl) = 0;
1436 TREE_PUBLIC (decl) = 0;
1437 DECL_UNINLINABLE (decl) = 1;
1438 DECL_EXTERNAL (decl) = 0;
1439 DECL_CONTEXT (decl) = NULL_TREE;
1440 DECL_INITIAL (decl) = make_node (BLOCK);
1441
1442 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1443 DECL_ARTIFICIAL (t) = 1;
1444 DECL_IGNORED_P (t) = 1;
1445 DECL_RESULT (decl) = t;
1446
1447 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1448 ptr_type_node);
1449 DECL_ARTIFICIAL (t) = 1;
1450 DECL_ARG_TYPE (t) = ptr_type_node;
1451 DECL_CONTEXT (t) = decl;
1452 TREE_USED (t) = 1;
1453 DECL_ARGUMENTS (decl) = t;
1454
1455 allocate_struct_function (decl, false);
1456
1457 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1458 it. */
1459 set_cfun (act_cfun);
1460
1461 return decl;
1462 }
1463
1464 /* Replace uses of NAME by VAL in block BB. */
1465
1466 static void
1467 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1468 {
1469 gimple use_stmt;
1470 imm_use_iterator imm_iter;
1471
1472 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1473 {
1474 if (gimple_bb (use_stmt) != bb)
1475 continue;
1476
1477 use_operand_p use_p;
1478 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1479 SET_USE (use_p, val);
1480 }
1481 }
1482
1483 /* Do transformation from:
1484
1485 <bb preheader>:
1486 ...
1487 goto <bb header>
1488
1489 <bb header>:
1490 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1491 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1492 ...
1493 use (ivtmp_a)
1494 ...
1495 sum_b = sum_a + sum_update
1496 ...
1497 if (ivtmp_a < n)
1498 goto <bb latch>;
1499 else
1500 goto <bb exit>;
1501
1502 <bb latch>:
1503 ivtmp_b = ivtmp_a + 1;
1504 goto <bb header>
1505
1506 <bb exit>:
1507 sum_z = PHI <sum_b (cond[1]), ...>
1508
1509 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1510 that's <bb header>.
1511
1512 to:
1513
1514 <bb preheader>:
1515 ...
1516 goto <bb newheader>
1517
1518 <bb header>:
1519 ivtmp_a = PHI <ivtmp_c (latch)>
1520 sum_a = PHI <sum_c (latch)>
1521 ...
1522 use (ivtmp_a)
1523 ...
1524 sum_b = sum_a + sum_update
1525 ...
1526 goto <bb latch>;
1527
1528 <bb newheader>:
1529 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1530 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1531 if (ivtmp_c < n + 1)
1532 goto <bb header>;
1533 else
1534 goto <bb newexit>;
1535
1536 <bb latch>:
1537 ivtmp_b = ivtmp_a + 1;
1538 goto <bb newheader>
1539
1540 <bb newexit>:
1541 sum_y = PHI <sum_c (newheader)>
1542
1543 <bb exit>:
1544 sum_z = PHI <sum_y (newexit), ...>
1545
1546
1547 In unified diff format:
1548
1549 <bb preheader>:
1550 ...
1551 - goto <bb header>
1552 + goto <bb newheader>
1553
1554 <bb header>:
1555 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1556 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1557 + ivtmp_a = PHI <ivtmp_c (latch)>
1558 + sum_a = PHI <sum_c (latch)>
1559 ...
1560 use (ivtmp_a)
1561 ...
1562 sum_b = sum_a + sum_update
1563 ...
1564 - if (ivtmp_a < n)
1565 - goto <bb latch>;
1566 + goto <bb latch>;
1567 +
1568 + <bb newheader>:
1569 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1570 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1571 + if (ivtmp_c < n + 1)
1572 + goto <bb header>;
1573 else
1574 goto <bb exit>;
1575
1576 <bb latch>:
1577 ivtmp_b = ivtmp_a + 1;
1578 - goto <bb header>
1579 + goto <bb newheader>
1580
1581 + <bb newexit>:
1582 + sum_y = PHI <sum_c (newheader)>
1583
1584 <bb exit>:
1585 - sum_z = PHI <sum_b (cond[1]), ...>
1586 + sum_z = PHI <sum_y (newexit), ...>
1587
1588 Note: the example does not show any virtual phis, but these are handled more
1589 or less as reductions.
1590
1591
1592 Moves the exit condition of LOOP to the beginning of its header.
1593 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1594 bound. */
1595
1596 static void
1597 transform_to_exit_first_loop_alt (struct loop *loop,
1598 reduction_info_table_type *reduction_list,
1599 tree bound)
1600 {
1601 basic_block header = loop->header;
1602 basic_block latch = loop->latch;
1603 edge exit = single_dom_exit (loop);
1604 basic_block exit_block = exit->dest;
1605 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1606 tree control = gimple_cond_lhs (cond_stmt);
1607 edge e;
1608
1609 /* Rewriting virtuals into loop-closed ssa normal form makes this
1610 transformation simpler. It also ensures that the virtuals are in
1611 loop-closed ssa normal from after the transformation, which is required by
1612 create_parallel_loop. */
1613 rewrite_virtuals_into_loop_closed_ssa (loop);
1614
1615 /* Create the new_header block. */
1616 basic_block new_header = split_block_before_cond_jump (exit->src);
1617 edge edge_at_split = single_pred_edge (new_header);
1618
1619 /* Redirect entry edge to new_header. */
1620 edge entry = loop_preheader_edge (loop);
1621 e = redirect_edge_and_branch (entry, new_header);
1622 gcc_assert (e == entry);
1623
1624 /* Redirect post_inc_edge to new_header. */
1625 edge post_inc_edge = single_succ_edge (latch);
1626 e = redirect_edge_and_branch (post_inc_edge, new_header);
1627 gcc_assert (e == post_inc_edge);
1628
1629 /* Redirect post_cond_edge to header. */
1630 edge post_cond_edge = single_pred_edge (latch);
1631 e = redirect_edge_and_branch (post_cond_edge, header);
1632 gcc_assert (e == post_cond_edge);
1633
1634 /* Redirect edge_at_split to latch. */
1635 e = redirect_edge_and_branch (edge_at_split, latch);
1636 gcc_assert (e == edge_at_split);
1637
1638 /* Set the new loop bound. */
1639 gimple_cond_set_rhs (cond_stmt, bound);
1640 update_stmt (cond_stmt);
1641
1642 /* Repair the ssa. */
1643 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1644 edge_var_map *vm;
1645 gphi_iterator gsi;
1646 int i;
1647 for (gsi = gsi_start_phis (header), i = 0;
1648 !gsi_end_p (gsi) && v->iterate (i, &vm);
1649 gsi_next (&gsi), i++)
1650 {
1651 gphi *phi = gsi.phi ();
1652 tree res_a = PHI_RESULT (phi);
1653
1654 /* Create new phi. */
1655 tree res_c = copy_ssa_name (res_a, phi);
1656 gphi *nphi = create_phi_node (res_c, new_header);
1657
1658 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1659 replace_uses_in_bb_by (res_a, res_c, new_header);
1660
1661 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1662 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1663
1664 /* Replace sum_b with sum_c in exit phi. */
1665 tree res_b = redirect_edge_var_map_def (vm);
1666 replace_uses_in_bb_by (res_b, res_c, exit_block);
1667
1668 struct reduction_info *red = reduction_phi (reduction_list, phi);
1669 gcc_assert (virtual_operand_p (res_a)
1670 || res_a == control
1671 || red != NULL);
1672
1673 if (red)
1674 {
1675 /* Register the new reduction phi. */
1676 red->reduc_phi = nphi;
1677 gimple_set_uid (red->reduc_phi, red->reduc_version);
1678 }
1679 }
1680 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1681
1682 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1683 flush_pending_stmts (entry);
1684
1685 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1686 flush_pending_stmts (post_inc_edge);
1687
1688 /* Create a new empty exit block, inbetween the new loop header and the old
1689 exit block. The function separate_decls_in_region needs this block to
1690 insert code that is active on loop exit, but not any other path. */
1691 basic_block new_exit_block = split_edge (exit);
1692
1693 /* Insert and register the reduction exit phis. */
1694 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1695 !gsi_end_p (gsi);
1696 gsi_next (&gsi))
1697 {
1698 gphi *phi = gsi.phi ();
1699 tree res_z = PHI_RESULT (phi);
1700
1701 /* Now that we have a new exit block, duplicate the phi of the old exit
1702 block in the new exit block to preserve loop-closed ssa. */
1703 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1704 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1705 tree res_y = copy_ssa_name (res_z, phi);
1706 gphi *nphi = create_phi_node (res_y, new_exit_block);
1707 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1708 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1709 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1710
1711 if (virtual_operand_p (res_z))
1712 continue;
1713
1714 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1715 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1716 if (red != NULL)
1717 red->keep_res = nphi;
1718 }
1719
1720 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1721 then we're still using some fields, so only bother about fields that are
1722 still used: header and latch.
1723 The loop has a new header bb, so we update it. The latch bb stays the
1724 same. */
1725 loop->header = new_header;
1726
1727 /* Recalculate dominance info. */
1728 free_dominance_info (CDI_DOMINATORS);
1729 calculate_dominance_info (CDI_DOMINATORS);
1730 }
1731
1732 /* Tries to moves the exit condition of LOOP to the beginning of its header
1733 without duplication of the loop body. NIT is the number of iterations of the
1734 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1735 transformation is successful. */
1736
1737 static bool
1738 try_transform_to_exit_first_loop_alt (struct loop *loop,
1739 reduction_info_table_type *reduction_list,
1740 tree nit)
1741 {
1742 /* Check whether the latch contains a single statement. */
1743 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1744 return false;
1745
1746 /* Check whether the latch contains the loop iv increment. */
1747 edge back = single_succ_edge (loop->latch);
1748 edge exit = single_dom_exit (loop);
1749 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1750 tree control = gimple_cond_lhs (cond_stmt);
1751 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1752 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1753 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1754 return false;
1755
1756 /* Check whether there's no code between the loop condition and the latch. */
1757 if (!single_pred_p (loop->latch)
1758 || single_pred (loop->latch) != exit->src)
1759 return false;
1760
1761 tree alt_bound = NULL_TREE;
1762 tree nit_type = TREE_TYPE (nit);
1763
1764 /* Figure out whether nit + 1 overflows. */
1765 if (TREE_CODE (nit) == INTEGER_CST)
1766 {
1767 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1768 {
1769 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1770 nit, build_one_cst (nit_type));
1771
1772 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1773 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1774 return true;
1775 }
1776 else
1777 {
1778 /* Todo: Figure out if we can trigger this, if it's worth to handle
1779 optimally, and if we can handle it optimally. */
1780 return false;
1781 }
1782 }
1783
1784 gcc_assert (TREE_CODE (nit) == SSA_NAME);
1785
1786 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1787 iv with base 0 and step 1 that is incremented in the latch, like this:
1788
1789 <bb header>:
1790 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1791 ...
1792 if (iv_1 < nit)
1793 goto <bb latch>;
1794 else
1795 goto <bb exit>;
1796
1797 <bb latch>:
1798 iv_2 = iv_1 + 1;
1799 goto <bb header>;
1800
1801 The range of iv_1 is [0, nit]. The latch edge is taken for
1802 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1803 number of latch executions is equal to nit.
1804
1805 The function max_loop_iterations gives us the maximum number of latch
1806 executions, so it gives us the maximum value of nit. */
1807 widest_int nit_max;
1808 if (!max_loop_iterations (loop, &nit_max))
1809 return false;
1810
1811 /* Check if nit + 1 overflows. */
1812 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1813 if (!wi::lts_p (nit_max, type_max))
1814 return false;
1815
1816 gimple def = SSA_NAME_DEF_STMT (nit);
1817
1818 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1819 if (def
1820 && is_gimple_assign (def)
1821 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1822 {
1823 tree op1 = gimple_assign_rhs1 (def);
1824 tree op2 = gimple_assign_rhs2 (def);
1825 if (integer_minus_onep (op1))
1826 alt_bound = op2;
1827 else if (integer_minus_onep (op2))
1828 alt_bound = op1;
1829 }
1830
1831 if (alt_bound == NULL_TREE)
1832 return false;
1833
1834 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1835 return true;
1836 }
1837
1838 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1839 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1840 LOOP. */
1841
1842 static void
1843 transform_to_exit_first_loop (struct loop *loop,
1844 reduction_info_table_type *reduction_list,
1845 tree nit)
1846 {
1847 basic_block *bbs, *nbbs, ex_bb, orig_header;
1848 unsigned n;
1849 bool ok;
1850 edge exit = single_dom_exit (loop), hpred;
1851 tree control, control_name, res, t;
1852 gphi *phi, *nphi;
1853 gassign *stmt;
1854 gcond *cond_stmt, *cond_nit;
1855 tree nit_1;
1856
1857 split_block_after_labels (loop->header);
1858 orig_header = single_succ (loop->header);
1859 hpred = single_succ_edge (loop->header);
1860
1861 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1862 control = gimple_cond_lhs (cond_stmt);
1863 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1864
1865 /* Make sure that we have phi nodes on exit for all loop header phis
1866 (create_parallel_loop requires that). */
1867 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1868 !gsi_end_p (gsi);
1869 gsi_next (&gsi))
1870 {
1871 phi = gsi.phi ();
1872 res = PHI_RESULT (phi);
1873 t = copy_ssa_name (res, phi);
1874 SET_PHI_RESULT (phi, t);
1875 nphi = create_phi_node (res, orig_header);
1876 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1877
1878 if (res == control)
1879 {
1880 gimple_cond_set_lhs (cond_stmt, t);
1881 update_stmt (cond_stmt);
1882 control = t;
1883 }
1884 }
1885
1886 bbs = get_loop_body_in_dom_order (loop);
1887
1888 for (n = 0; bbs[n] != exit->src; n++)
1889 continue;
1890 nbbs = XNEWVEC (basic_block, n);
1891 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1892 bbs + 1, n, nbbs);
1893 gcc_assert (ok);
1894 free (bbs);
1895 ex_bb = nbbs[0];
1896 free (nbbs);
1897
1898 /* Other than reductions, the only gimple reg that should be copied
1899 out of the loop is the control variable. */
1900 exit = single_dom_exit (loop);
1901 control_name = NULL_TREE;
1902 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1903 !gsi_end_p (gsi); )
1904 {
1905 phi = gsi.phi ();
1906 res = PHI_RESULT (phi);
1907 if (virtual_operand_p (res))
1908 {
1909 gsi_next (&gsi);
1910 continue;
1911 }
1912
1913 /* Check if it is a part of reduction. If it is,
1914 keep the phi at the reduction's keep_res field. The
1915 PHI_RESULT of this phi is the resulting value of the reduction
1916 variable when exiting the loop. */
1917
1918 if (reduction_list->elements () > 0)
1919 {
1920 struct reduction_info *red;
1921
1922 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1923 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1924 if (red)
1925 {
1926 red->keep_res = phi;
1927 gsi_next (&gsi);
1928 continue;
1929 }
1930 }
1931 gcc_assert (control_name == NULL_TREE
1932 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1933 control_name = res;
1934 remove_phi_node (&gsi, false);
1935 }
1936 gcc_assert (control_name != NULL_TREE);
1937
1938 /* Initialize the control variable to number of iterations
1939 according to the rhs of the exit condition. */
1940 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1941 cond_nit = as_a <gcond *> (last_stmt (exit->src));
1942 nit_1 = gimple_cond_rhs (cond_nit);
1943 nit_1 = force_gimple_operand_gsi (&gsi,
1944 fold_convert (TREE_TYPE (control_name), nit_1),
1945 false, NULL_TREE, false, GSI_SAME_STMT);
1946 stmt = gimple_build_assign (control_name, nit_1);
1947 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1948 }
1949
1950 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1951 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1952 NEW_DATA is the variable that should be initialized from the argument
1953 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1954 basic block containing GIMPLE_OMP_PARALLEL tree. */
1955
1956 static basic_block
1957 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1958 tree new_data, unsigned n_threads, location_t loc)
1959 {
1960 gimple_stmt_iterator gsi;
1961 basic_block bb, paral_bb, for_bb, ex_bb;
1962 tree t, param;
1963 gomp_parallel *omp_par_stmt;
1964 gimple omp_return_stmt1, omp_return_stmt2;
1965 gimple phi;
1966 gcond *cond_stmt;
1967 gomp_for *for_stmt;
1968 gomp_continue *omp_cont_stmt;
1969 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1970 edge exit, nexit, guard, end, e;
1971
1972 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1973 bb = loop_preheader_edge (loop)->src;
1974 paral_bb = single_pred (bb);
1975 gsi = gsi_last_bb (paral_bb);
1976
1977 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1978 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1979 = build_int_cst (integer_type_node, n_threads);
1980 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1981 gimple_set_location (omp_par_stmt, loc);
1982
1983 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
1984
1985 /* Initialize NEW_DATA. */
1986 if (data)
1987 {
1988 gassign *assign_stmt;
1989
1990 gsi = gsi_after_labels (bb);
1991
1992 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
1993 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1994 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
1995
1996 assign_stmt = gimple_build_assign (new_data,
1997 fold_convert (TREE_TYPE (new_data), param));
1998 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
1999 }
2000
2001 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2002 bb = split_loop_exit_edge (single_dom_exit (loop));
2003 gsi = gsi_last_bb (bb);
2004 omp_return_stmt1 = gimple_build_omp_return (false);
2005 gimple_set_location (omp_return_stmt1, loc);
2006 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2007
2008 /* Extract data for GIMPLE_OMP_FOR. */
2009 gcc_assert (loop->header == single_dom_exit (loop)->src);
2010 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2011
2012 cvar = gimple_cond_lhs (cond_stmt);
2013 cvar_base = SSA_NAME_VAR (cvar);
2014 phi = SSA_NAME_DEF_STMT (cvar);
2015 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2016 initvar = copy_ssa_name (cvar);
2017 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2018 initvar);
2019 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2020
2021 gsi = gsi_last_nondebug_bb (loop->latch);
2022 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2023 gsi_remove (&gsi, true);
2024
2025 /* Prepare cfg. */
2026 for_bb = split_edge (loop_preheader_edge (loop));
2027 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2028 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2029 gcc_assert (exit == single_dom_exit (loop));
2030
2031 guard = make_edge (for_bb, ex_bb, 0);
2032 single_succ_edge (loop->latch)->flags = 0;
2033 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
2034 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2035 !gsi_end_p (gpi); gsi_next (&gpi))
2036 {
2037 source_location locus;
2038 tree def;
2039 gphi *phi = gpi.phi ();
2040 gphi *stmt;
2041
2042 stmt = as_a <gphi *> (
2043 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)));
2044
2045 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2046 locus = gimple_phi_arg_location_from_edge (stmt,
2047 loop_preheader_edge (loop));
2048 add_phi_arg (phi, def, guard, locus);
2049
2050 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2051 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2052 add_phi_arg (phi, def, end, locus);
2053 }
2054 e = redirect_edge_and_branch (exit, nexit->dest);
2055 PENDING_STMT (e) = NULL;
2056
2057 /* Emit GIMPLE_OMP_FOR. */
2058 gimple_cond_set_lhs (cond_stmt, cvar_base);
2059 type = TREE_TYPE (cvar);
2060 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2061 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2062
2063 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
2064 gimple_set_location (for_stmt, loc);
2065 gimple_omp_for_set_index (for_stmt, 0, initvar);
2066 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2067 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2068 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2069 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2070 cvar_base,
2071 build_int_cst (type, 1)));
2072
2073 gsi = gsi_last_bb (for_bb);
2074 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2075 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2076
2077 /* Emit GIMPLE_OMP_CONTINUE. */
2078 gsi = gsi_last_bb (loop->latch);
2079 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2080 gimple_set_location (omp_cont_stmt, loc);
2081 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2082 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2083
2084 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2085 gsi = gsi_last_bb (ex_bb);
2086 omp_return_stmt2 = gimple_build_omp_return (true);
2087 gimple_set_location (omp_return_stmt2, loc);
2088 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2089
2090 /* After the above dom info is hosed. Re-compute it. */
2091 free_dominance_info (CDI_DOMINATORS);
2092 calculate_dominance_info (CDI_DOMINATORS);
2093
2094 return paral_bb;
2095 }
2096
2097 /* Generates code to execute the iterations of LOOP in N_THREADS
2098 threads in parallel.
2099
2100 NITER describes number of iterations of LOOP.
2101 REDUCTION_LIST describes the reductions existent in the LOOP. */
2102
2103 static void
2104 gen_parallel_loop (struct loop *loop,
2105 reduction_info_table_type *reduction_list,
2106 unsigned n_threads, struct tree_niter_desc *niter)
2107 {
2108 tree many_iterations_cond, type, nit;
2109 tree arg_struct, new_arg_struct;
2110 gimple_seq stmts;
2111 edge entry, exit;
2112 struct clsn_data clsn_data;
2113 unsigned prob;
2114 location_t loc;
2115 gimple cond_stmt;
2116 unsigned int m_p_thread=2;
2117
2118 /* From
2119
2120 ---------------------------------------------------------------------
2121 loop
2122 {
2123 IV = phi (INIT, IV + STEP)
2124 BODY1;
2125 if (COND)
2126 break;
2127 BODY2;
2128 }
2129 ---------------------------------------------------------------------
2130
2131 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2132 we generate the following code:
2133
2134 ---------------------------------------------------------------------
2135
2136 if (MAY_BE_ZERO
2137 || NITER < MIN_PER_THREAD * N_THREADS)
2138 goto original;
2139
2140 BODY1;
2141 store all local loop-invariant variables used in body of the loop to DATA.
2142 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2143 load the variables from DATA.
2144 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2145 BODY2;
2146 BODY1;
2147 GIMPLE_OMP_CONTINUE;
2148 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2149 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2150 goto end;
2151
2152 original:
2153 loop
2154 {
2155 IV = phi (INIT, IV + STEP)
2156 BODY1;
2157 if (COND)
2158 break;
2159 BODY2;
2160 }
2161
2162 end:
2163
2164 */
2165
2166 /* Create two versions of the loop -- in the old one, we know that the
2167 number of iterations is large enough, and we will transform it into the
2168 loop that will be split to loop_fn, the new one will be used for the
2169 remaining iterations. */
2170
2171 /* We should compute a better number-of-iterations value for outer loops.
2172 That is, if we have
2173
2174 for (i = 0; i < n; ++i)
2175 for (j = 0; j < m; ++j)
2176 ...
2177
2178 we should compute nit = n * m, not nit = n.
2179 Also may_be_zero handling would need to be adjusted. */
2180
2181 type = TREE_TYPE (niter->niter);
2182 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2183 NULL_TREE);
2184 if (stmts)
2185 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2186
2187 if (loop->inner)
2188 m_p_thread=2;
2189 else
2190 m_p_thread=MIN_PER_THREAD;
2191
2192 many_iterations_cond =
2193 fold_build2 (GE_EXPR, boolean_type_node,
2194 nit, build_int_cst (type, m_p_thread * n_threads));
2195
2196 many_iterations_cond
2197 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2198 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2199 many_iterations_cond);
2200 many_iterations_cond
2201 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2202 if (stmts)
2203 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2204 if (!is_gimple_condexpr (many_iterations_cond))
2205 {
2206 many_iterations_cond
2207 = force_gimple_operand (many_iterations_cond, &stmts,
2208 true, NULL_TREE);
2209 if (stmts)
2210 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2211 }
2212
2213 initialize_original_copy_tables ();
2214
2215 /* We assume that the loop usually iterates a lot. */
2216 prob = 4 * REG_BR_PROB_BASE / 5;
2217 loop_version (loop, many_iterations_cond, NULL,
2218 prob, prob, REG_BR_PROB_BASE - prob, true);
2219 update_ssa (TODO_update_ssa);
2220 free_original_copy_tables ();
2221
2222 /* Base all the induction variables in LOOP on a single control one. */
2223 canonicalize_loop_ivs (loop, &nit, true);
2224
2225 /* Ensure that the exit condition is the first statement in the loop.
2226 The common case is that latch of the loop is empty (apart from the
2227 increment) and immediately follows the loop exit test. Attempt to move the
2228 entry of the loop directly before the exit check and increase the number of
2229 iterations of the loop by one. */
2230 if (!try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2231 {
2232 /* Fall back on the method that handles more cases, but duplicates the
2233 loop body: move the exit condition of LOOP to the beginning of its
2234 header, and duplicate the part of the last iteration that gets disabled
2235 to the exit of the loop. */
2236 transform_to_exit_first_loop (loop, reduction_list, nit);
2237 }
2238
2239 /* Generate initializations for reductions. */
2240 if (reduction_list->elements () > 0)
2241 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2242
2243 /* Eliminate the references to local variables from the loop. */
2244 gcc_assert (single_exit (loop));
2245 entry = loop_preheader_edge (loop);
2246 exit = single_dom_exit (loop);
2247
2248 eliminate_local_variables (entry, exit);
2249 /* In the old loop, move all variables non-local to the loop to a structure
2250 and back, and create separate decls for the variables used in loop. */
2251 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2252 &new_arg_struct, &clsn_data);
2253
2254 /* Create the parallel constructs. */
2255 loc = UNKNOWN_LOCATION;
2256 cond_stmt = last_stmt (loop->header);
2257 if (cond_stmt)
2258 loc = gimple_location (cond_stmt);
2259 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2260 new_arg_struct, n_threads, loc);
2261 if (reduction_list->elements () > 0)
2262 create_call_for_reduction (loop, reduction_list, &clsn_data);
2263
2264 scev_reset ();
2265
2266 /* Cancel the loop (it is simpler to do it here rather than to teach the
2267 expander to do it). */
2268 cancel_loop_tree (loop);
2269
2270 /* Free loop bound estimations that could contain references to
2271 removed statements. */
2272 FOR_EACH_LOOP (loop, 0)
2273 free_numbers_of_iterations_estimates_loop (loop);
2274 }
2275
2276 /* Returns true when LOOP contains vector phi nodes. */
2277
2278 static bool
2279 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2280 {
2281 unsigned i;
2282 basic_block *bbs = get_loop_body_in_dom_order (loop);
2283 gphi_iterator gsi;
2284 bool res = true;
2285
2286 for (i = 0; i < loop->num_nodes; i++)
2287 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2288 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2289 goto end;
2290
2291 res = false;
2292 end:
2293 free (bbs);
2294 return res;
2295 }
2296
2297 /* Create a reduction_info struct, initialize it with REDUC_STMT
2298 and PHI, insert it to the REDUCTION_LIST. */
2299
2300 static void
2301 build_new_reduction (reduction_info_table_type *reduction_list,
2302 gimple reduc_stmt, gphi *phi)
2303 {
2304 reduction_info **slot;
2305 struct reduction_info *new_reduction;
2306
2307 gcc_assert (reduc_stmt);
2308
2309 if (dump_file && (dump_flags & TDF_DETAILS))
2310 {
2311 fprintf (dump_file,
2312 "Detected reduction. reduction stmt is: \n");
2313 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2314 fprintf (dump_file, "\n");
2315 }
2316
2317 new_reduction = XCNEW (struct reduction_info);
2318
2319 new_reduction->reduc_stmt = reduc_stmt;
2320 new_reduction->reduc_phi = phi;
2321 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2322 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
2323 slot = reduction_list->find_slot (new_reduction, INSERT);
2324 *slot = new_reduction;
2325 }
2326
2327 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2328
2329 int
2330 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2331 {
2332 struct reduction_info *const red = *slot;
2333 gimple_set_uid (red->reduc_phi, red->reduc_version);
2334 return 1;
2335 }
2336
2337 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2338
2339 static void
2340 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2341 {
2342 gphi_iterator gsi;
2343 loop_vec_info simple_loop_info;
2344
2345 simple_loop_info = vect_analyze_loop_form (loop);
2346
2347 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2348 {
2349 gphi *phi = gsi.phi ();
2350 affine_iv iv;
2351 tree res = PHI_RESULT (phi);
2352 bool double_reduc;
2353
2354 if (virtual_operand_p (res))
2355 continue;
2356
2357 if (!simple_iv (loop, loop, res, &iv, true)
2358 && simple_loop_info)
2359 {
2360 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2361 phi, true,
2362 &double_reduc);
2363 if (reduc_stmt && !double_reduc)
2364 build_new_reduction (reduction_list, reduc_stmt, phi);
2365 }
2366 }
2367 destroy_loop_vec_info (simple_loop_info, true);
2368
2369 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2370 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2371 only now. */
2372 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2373 }
2374
2375 /* Try to initialize NITER for code generation part. */
2376
2377 static bool
2378 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2379 {
2380 edge exit = single_dom_exit (loop);
2381
2382 gcc_assert (exit);
2383
2384 /* We need to know # of iterations, and there should be no uses of values
2385 defined inside loop outside of it, unless the values are invariants of
2386 the loop. */
2387 if (!number_of_iterations_exit (loop, exit, niter, false))
2388 {
2389 if (dump_file && (dump_flags & TDF_DETAILS))
2390 fprintf (dump_file, " FAILED: number of iterations not known\n");
2391 return false;
2392 }
2393
2394 return true;
2395 }
2396
2397 /* Try to initialize REDUCTION_LIST for code generation part.
2398 REDUCTION_LIST describes the reductions. */
2399
2400 static bool
2401 try_create_reduction_list (loop_p loop,
2402 reduction_info_table_type *reduction_list)
2403 {
2404 edge exit = single_dom_exit (loop);
2405 gphi_iterator gsi;
2406
2407 gcc_assert (exit);
2408
2409 gather_scalar_reductions (loop, reduction_list);
2410
2411
2412 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2413 {
2414 gphi *phi = gsi.phi ();
2415 struct reduction_info *red;
2416 imm_use_iterator imm_iter;
2417 use_operand_p use_p;
2418 gimple reduc_phi;
2419 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2420
2421 if (!virtual_operand_p (val))
2422 {
2423 if (dump_file && (dump_flags & TDF_DETAILS))
2424 {
2425 fprintf (dump_file, "phi is ");
2426 print_gimple_stmt (dump_file, phi, 0, 0);
2427 fprintf (dump_file, "arg of phi to exit: value ");
2428 print_generic_expr (dump_file, val, 0);
2429 fprintf (dump_file, " used outside loop\n");
2430 fprintf (dump_file,
2431 " checking if it a part of reduction pattern: \n");
2432 }
2433 if (reduction_list->elements () == 0)
2434 {
2435 if (dump_file && (dump_flags & TDF_DETAILS))
2436 fprintf (dump_file,
2437 " FAILED: it is not a part of reduction.\n");
2438 return false;
2439 }
2440 reduc_phi = NULL;
2441 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2442 {
2443 if (!gimple_debug_bind_p (USE_STMT (use_p))
2444 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2445 {
2446 reduc_phi = USE_STMT (use_p);
2447 break;
2448 }
2449 }
2450 red = reduction_phi (reduction_list, reduc_phi);
2451 if (red == NULL)
2452 {
2453 if (dump_file && (dump_flags & TDF_DETAILS))
2454 fprintf (dump_file,
2455 " FAILED: it is not a part of reduction.\n");
2456 return false;
2457 }
2458 if (dump_file && (dump_flags & TDF_DETAILS))
2459 {
2460 fprintf (dump_file, "reduction phi is ");
2461 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2462 fprintf (dump_file, "reduction stmt is ");
2463 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2464 }
2465 }
2466 }
2467
2468 /* The iterations of the loop may communicate only through bivs whose
2469 iteration space can be distributed efficiently. */
2470 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2471 {
2472 gphi *phi = gsi.phi ();
2473 tree def = PHI_RESULT (phi);
2474 affine_iv iv;
2475
2476 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2477 {
2478 struct reduction_info *red;
2479
2480 red = reduction_phi (reduction_list, phi);
2481 if (red == NULL)
2482 {
2483 if (dump_file && (dump_flags & TDF_DETAILS))
2484 fprintf (dump_file,
2485 " FAILED: scalar dependency between iterations\n");
2486 return false;
2487 }
2488 }
2489 }
2490
2491
2492 return true;
2493 }
2494
2495 /* Detect parallel loops and generate parallel code using libgomp
2496 primitives. Returns true if some loop was parallelized, false
2497 otherwise. */
2498
2499 static bool
2500 parallelize_loops (void)
2501 {
2502 unsigned n_threads = flag_tree_parallelize_loops;
2503 bool changed = false;
2504 struct loop *loop;
2505 struct tree_niter_desc niter_desc;
2506 struct obstack parloop_obstack;
2507 HOST_WIDE_INT estimated;
2508 source_location loop_loc;
2509
2510 /* Do not parallelize loops in the functions created by parallelization. */
2511 if (parallelized_function_p (cfun->decl))
2512 return false;
2513 if (cfun->has_nonlocal_label)
2514 return false;
2515
2516 gcc_obstack_init (&parloop_obstack);
2517 reduction_info_table_type reduction_list (10);
2518 init_stmt_vec_info_vec ();
2519
2520 FOR_EACH_LOOP (loop, 0)
2521 {
2522 reduction_list.empty ();
2523 if (dump_file && (dump_flags & TDF_DETAILS))
2524 {
2525 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2526 if (loop->inner)
2527 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2528 else
2529 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2530 }
2531
2532 /* If we use autopar in graphite pass, we use its marked dependency
2533 checking results. */
2534 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2535 {
2536 if (dump_file && (dump_flags & TDF_DETAILS))
2537 fprintf (dump_file, "loop is not parallel according to graphite\n");
2538 continue;
2539 }
2540
2541 if (!single_dom_exit (loop))
2542 {
2543
2544 if (dump_file && (dump_flags & TDF_DETAILS))
2545 fprintf (dump_file, "loop is !single_dom_exit\n");
2546
2547 continue;
2548 }
2549
2550 if (/* And of course, the loop must be parallelizable. */
2551 !can_duplicate_loop_p (loop)
2552 || loop_has_blocks_with_irreducible_flag (loop)
2553 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2554 /* FIXME: the check for vector phi nodes could be removed. */
2555 || loop_has_vector_phi_nodes (loop))
2556 continue;
2557
2558 estimated = estimated_stmt_executions_int (loop);
2559 if (estimated == -1)
2560 estimated = max_stmt_executions_int (loop);
2561 /* FIXME: Bypass this check as graphite doesn't update the
2562 count and frequency correctly now. */
2563 if (!flag_loop_parallelize_all
2564 && ((estimated != -1
2565 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2566 /* Do not bother with loops in cold areas. */
2567 || optimize_loop_nest_for_size_p (loop)))
2568 continue;
2569
2570 if (!try_get_loop_niter (loop, &niter_desc))
2571 continue;
2572
2573 if (!try_create_reduction_list (loop, &reduction_list))
2574 continue;
2575
2576 if (!flag_loop_parallelize_all
2577 && !loop_parallel_p (loop, &parloop_obstack))
2578 continue;
2579
2580 changed = true;
2581 if (dump_file && (dump_flags & TDF_DETAILS))
2582 {
2583 if (loop->inner)
2584 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2585 else
2586 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2587 loop_loc = find_loop_location (loop);
2588 if (loop_loc != UNKNOWN_LOCATION)
2589 fprintf (dump_file, "\nloop at %s:%d: ",
2590 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
2591 }
2592 gen_parallel_loop (loop, &reduction_list,
2593 n_threads, &niter_desc);
2594 }
2595
2596 free_stmt_vec_info_vec ();
2597 obstack_free (&parloop_obstack, NULL);
2598
2599 /* Parallelization will cause new function calls to be inserted through
2600 which local variables will escape. Reset the points-to solution
2601 for ESCAPED. */
2602 if (changed)
2603 pt_solution_reset (&cfun->gimple_df->escaped);
2604
2605 return changed;
2606 }
2607
2608 /* Parallelization. */
2609
2610 namespace {
2611
2612 const pass_data pass_data_parallelize_loops =
2613 {
2614 GIMPLE_PASS, /* type */
2615 "parloops", /* name */
2616 OPTGROUP_LOOP, /* optinfo_flags */
2617 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2618 ( PROP_cfg | PROP_ssa ), /* properties_required */
2619 0, /* properties_provided */
2620 0, /* properties_destroyed */
2621 0, /* todo_flags_start */
2622 0, /* todo_flags_finish */
2623 };
2624
2625 class pass_parallelize_loops : public gimple_opt_pass
2626 {
2627 public:
2628 pass_parallelize_loops (gcc::context *ctxt)
2629 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2630 {}
2631
2632 /* opt_pass methods: */
2633 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
2634 virtual unsigned int execute (function *);
2635
2636 }; // class pass_parallelize_loops
2637
2638 unsigned
2639 pass_parallelize_loops::execute (function *fun)
2640 {
2641 if (number_of_loops (fun) <= 1)
2642 return 0;
2643
2644 if (parallelize_loops ())
2645 {
2646 fun->curr_properties &= ~(PROP_gimple_eomp);
2647 return TODO_update_ssa;
2648 }
2649
2650 return 0;
2651 }
2652
2653 } // anon namespace
2654
2655 gimple_opt_pass *
2656 make_pass_parallelize_loops (gcc::context *ctxt)
2657 {
2658 return new pass_parallelize_loops (ctxt);
2659 }