]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-parloops.c
Add transform_to_exit_first_loop_alt dump success message
[thirdparty/gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "alias.h"
26 #include "backend.h"
27 #include "cfghooks.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "hard-reg-set.h"
31 #include "ssa.h"
32 #include "options.h"
33 #include "fold-const.h"
34 #include "internal-fn.h"
35 #include "gimplify.h"
36 #include "gimple-iterator.h"
37 #include "gimplify-me.h"
38 #include "gimple-walk.h"
39 #include "stor-layout.h"
40 #include "tree-nested.h"
41 #include "tree-cfg.h"
42 #include "tree-ssa-loop-ivopts.h"
43 #include "tree-ssa-loop-manip.h"
44 #include "tree-ssa-loop-niter.h"
45 #include "tree-ssa-loop.h"
46 #include "tree-into-ssa.h"
47 #include "cfgloop.h"
48 #include "tree-data-ref.h"
49 #include "tree-scalar-evolution.h"
50 #include "gimple-pretty-print.h"
51 #include "tree-pass.h"
52 #include "langhooks.h"
53 #include "tree-vectorizer.h"
54 #include "tree-hasher.h"
55 #include "tree-parloops.h"
56 #include "omp-low.h"
57 #include "tree-nested.h"
58 #include "cgraph.h"
59 #include "tree-ssa.h"
60
61 /* This pass tries to distribute iterations of loops into several threads.
62 The implementation is straightforward -- for each loop we test whether its
63 iterations are independent, and if it is the case (and some additional
64 conditions regarding profitability and correctness are satisfied), we
65 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
66 machinery do its job.
67
68 The most of the complexity is in bringing the code into shape expected
69 by the omp expanders:
70 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
71 variable and that the exit test is at the start of the loop body
72 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
73 variables by accesses through pointers, and breaking up ssa chains
74 by storing the values incoming to the parallelized loop to a structure
75 passed to the new function as an argument (something similar is done
76 in omp gimplification, unfortunately only a small part of the code
77 can be shared).
78
79 TODO:
80 -- if there are several parallelizable loops in a function, it may be
81 possible to generate the threads just once (using synchronization to
82 ensure that cross-loop dependences are obeyed).
83 -- handling of common reduction patterns for outer loops.
84
85 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
86 /*
87 Reduction handling:
88 currently we use vect_force_simple_reduction() to detect reduction patterns.
89 The code transformation will be introduced by an example.
90
91
92 parloop
93 {
94 int sum=1;
95
96 for (i = 0; i < N; i++)
97 {
98 x[i] = i + 3;
99 sum+=x[i];
100 }
101 }
102
103 gimple-like code:
104 header_bb:
105
106 # sum_29 = PHI <sum_11(5), 1(3)>
107 # i_28 = PHI <i_12(5), 0(3)>
108 D.1795_8 = i_28 + 3;
109 x[i_28] = D.1795_8;
110 sum_11 = D.1795_8 + sum_29;
111 i_12 = i_28 + 1;
112 if (N_6(D) > i_12)
113 goto header_bb;
114
115
116 exit_bb:
117
118 # sum_21 = PHI <sum_11(4)>
119 printf (&"%d"[0], sum_21);
120
121
122 after reduction transformation (only relevant parts):
123
124 parloop
125 {
126
127 ....
128
129
130 # Storing the initial value given by the user. #
131
132 .paral_data_store.32.sum.27 = 1;
133
134 #pragma omp parallel num_threads(4)
135
136 #pragma omp for schedule(static)
137
138 # The neutral element corresponding to the particular
139 reduction's operation, e.g. 0 for PLUS_EXPR,
140 1 for MULT_EXPR, etc. replaces the user's initial value. #
141
142 # sum.27_29 = PHI <sum.27_11, 0>
143
144 sum.27_11 = D.1827_8 + sum.27_29;
145
146 GIMPLE_OMP_CONTINUE
147
148 # Adding this reduction phi is done at create_phi_for_local_result() #
149 # sum.27_56 = PHI <sum.27_11, 0>
150 GIMPLE_OMP_RETURN
151
152 # Creating the atomic operation is done at
153 create_call_for_reduction_1() #
154
155 #pragma omp atomic_load
156 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
157 D.1840_60 = sum.27_56 + D.1839_59;
158 #pragma omp atomic_store (D.1840_60);
159
160 GIMPLE_OMP_RETURN
161
162 # collecting the result after the join of the threads is done at
163 create_loads_for_reductions().
164 The value computed by the threads is loaded from the
165 shared struct. #
166
167
168 .paral_data_load.33_52 = &.paral_data_store.32;
169 sum_37 = .paral_data_load.33_52->sum.27;
170 sum_43 = D.1795_41 + sum_37;
171
172 exit bb:
173 # sum_21 = PHI <sum_43, sum_26>
174 printf (&"%d"[0], sum_21);
175
176 ...
177
178 }
179
180 */
181
182 /* Minimal number of iterations of a loop that should be executed in each
183 thread. */
184 #define MIN_PER_THREAD 100
185
186 /* Element of the hashtable, representing a
187 reduction in the current loop. */
188 struct reduction_info
189 {
190 gimple reduc_stmt; /* reduction statement. */
191 gimple reduc_phi; /* The phi node defining the reduction. */
192 enum tree_code reduction_code;/* code for the reduction operation. */
193 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
194 result. */
195 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
196 of the reduction variable when existing the loop. */
197 tree initial_value; /* The initial value of the reduction var before entering the loop. */
198 tree field; /* the name of the field in the parloop data structure intended for reduction. */
199 tree init; /* reduction initialization value. */
200 gphi *new_phi; /* (helper field) Newly created phi node whose result
201 will be passed to the atomic operation. Represents
202 the local result each thread computed for the reduction
203 operation. */
204 };
205
206 /* Reduction info hashtable helpers. */
207
208 struct reduction_hasher : free_ptr_hash <reduction_info>
209 {
210 static inline hashval_t hash (const reduction_info *);
211 static inline bool equal (const reduction_info *, const reduction_info *);
212 };
213
214 /* Equality and hash functions for hashtab code. */
215
216 inline bool
217 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
218 {
219 return (a->reduc_phi == b->reduc_phi);
220 }
221
222 inline hashval_t
223 reduction_hasher::hash (const reduction_info *a)
224 {
225 return a->reduc_version;
226 }
227
228 typedef hash_table<reduction_hasher> reduction_info_table_type;
229
230
231 static struct reduction_info *
232 reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
233 {
234 struct reduction_info tmpred, *red;
235
236 if (reduction_list->elements () == 0 || phi == NULL)
237 return NULL;
238
239 tmpred.reduc_phi = phi;
240 tmpred.reduc_version = gimple_uid (phi);
241 red = reduction_list->find (&tmpred);
242
243 return red;
244 }
245
246 /* Element of hashtable of names to copy. */
247
248 struct name_to_copy_elt
249 {
250 unsigned version; /* The version of the name to copy. */
251 tree new_name; /* The new name used in the copy. */
252 tree field; /* The field of the structure used to pass the
253 value. */
254 };
255
256 /* Name copies hashtable helpers. */
257
258 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
259 {
260 static inline hashval_t hash (const name_to_copy_elt *);
261 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
262 };
263
264 /* Equality and hash functions for hashtab code. */
265
266 inline bool
267 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
268 {
269 return a->version == b->version;
270 }
271
272 inline hashval_t
273 name_to_copy_hasher::hash (const name_to_copy_elt *a)
274 {
275 return (hashval_t) a->version;
276 }
277
278 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
279
280 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
281 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
282 represents the denominator for every element in the matrix. */
283 typedef struct lambda_trans_matrix_s
284 {
285 lambda_matrix matrix;
286 int rowsize;
287 int colsize;
288 int denominator;
289 } *lambda_trans_matrix;
290 #define LTM_MATRIX(T) ((T)->matrix)
291 #define LTM_ROWSIZE(T) ((T)->rowsize)
292 #define LTM_COLSIZE(T) ((T)->colsize)
293 #define LTM_DENOMINATOR(T) ((T)->denominator)
294
295 /* Allocate a new transformation matrix. */
296
297 static lambda_trans_matrix
298 lambda_trans_matrix_new (int colsize, int rowsize,
299 struct obstack * lambda_obstack)
300 {
301 lambda_trans_matrix ret;
302
303 ret = (lambda_trans_matrix)
304 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
305 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
306 LTM_ROWSIZE (ret) = rowsize;
307 LTM_COLSIZE (ret) = colsize;
308 LTM_DENOMINATOR (ret) = 1;
309 return ret;
310 }
311
312 /* Multiply a vector VEC by a matrix MAT.
313 MAT is an M*N matrix, and VEC is a vector with length N. The result
314 is stored in DEST which must be a vector of length M. */
315
316 static void
317 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
318 lambda_vector vec, lambda_vector dest)
319 {
320 int i, j;
321
322 lambda_vector_clear (dest, m);
323 for (i = 0; i < m; i++)
324 for (j = 0; j < n; j++)
325 dest[i] += matrix[i][j] * vec[j];
326 }
327
328 /* Return true if TRANS is a legal transformation matrix that respects
329 the dependence vectors in DISTS and DIRS. The conservative answer
330 is false.
331
332 "Wolfe proves that a unimodular transformation represented by the
333 matrix T is legal when applied to a loop nest with a set of
334 lexicographically non-negative distance vectors RDG if and only if
335 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
336 i.e.: if and only if it transforms the lexicographically positive
337 distance vectors to lexicographically positive vectors. Note that
338 a unimodular matrix must transform the zero vector (and only it) to
339 the zero vector." S.Muchnick. */
340
341 static bool
342 lambda_transform_legal_p (lambda_trans_matrix trans,
343 int nb_loops,
344 vec<ddr_p> dependence_relations)
345 {
346 unsigned int i, j;
347 lambda_vector distres;
348 struct data_dependence_relation *ddr;
349
350 gcc_assert (LTM_COLSIZE (trans) == nb_loops
351 && LTM_ROWSIZE (trans) == nb_loops);
352
353 /* When there are no dependences, the transformation is correct. */
354 if (dependence_relations.length () == 0)
355 return true;
356
357 ddr = dependence_relations[0];
358 if (ddr == NULL)
359 return true;
360
361 /* When there is an unknown relation in the dependence_relations, we
362 know that it is no worth looking at this loop nest: give up. */
363 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
364 return false;
365
366 distres = lambda_vector_new (nb_loops);
367
368 /* For each distance vector in the dependence graph. */
369 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
370 {
371 /* Don't care about relations for which we know that there is no
372 dependence, nor about read-read (aka. output-dependences):
373 these data accesses can happen in any order. */
374 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
375 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
376 continue;
377
378 /* Conservatively answer: "this transformation is not valid". */
379 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
380 return false;
381
382 /* If the dependence could not be captured by a distance vector,
383 conservatively answer that the transform is not valid. */
384 if (DDR_NUM_DIST_VECTS (ddr) == 0)
385 return false;
386
387 /* Compute trans.dist_vect */
388 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
389 {
390 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
391 DDR_DIST_VECT (ddr, j), distres);
392
393 if (!lambda_vector_lexico_pos (distres, nb_loops))
394 return false;
395 }
396 }
397 return true;
398 }
399
400 /* Data dependency analysis. Returns true if the iterations of LOOP
401 are independent on each other (that is, if we can execute them
402 in parallel). */
403
404 static bool
405 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
406 {
407 vec<ddr_p> dependence_relations;
408 vec<data_reference_p> datarefs;
409 lambda_trans_matrix trans;
410 bool ret = false;
411
412 if (dump_file && (dump_flags & TDF_DETAILS))
413 {
414 fprintf (dump_file, "Considering loop %d\n", loop->num);
415 if (!loop->inner)
416 fprintf (dump_file, "loop is innermost\n");
417 else
418 fprintf (dump_file, "loop NOT innermost\n");
419 }
420
421 /* Check for problems with dependences. If the loop can be reversed,
422 the iterations are independent. */
423 auto_vec<loop_p, 3> loop_nest;
424 datarefs.create (10);
425 dependence_relations.create (100);
426 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
427 &dependence_relations))
428 {
429 if (dump_file && (dump_flags & TDF_DETAILS))
430 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
431 ret = false;
432 goto end;
433 }
434 if (dump_file && (dump_flags & TDF_DETAILS))
435 dump_data_dependence_relations (dump_file, dependence_relations);
436
437 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
438 LTM_MATRIX (trans)[0][0] = -1;
439
440 if (lambda_transform_legal_p (trans, 1, dependence_relations))
441 {
442 ret = true;
443 if (dump_file && (dump_flags & TDF_DETAILS))
444 fprintf (dump_file, " SUCCESS: may be parallelized\n");
445 }
446 else if (dump_file && (dump_flags & TDF_DETAILS))
447 fprintf (dump_file,
448 " FAILED: data dependencies exist across iterations\n");
449
450 end:
451 free_dependence_relations (dependence_relations);
452 free_data_refs (datarefs);
453
454 return ret;
455 }
456
457 /* Return true when LOOP contains basic blocks marked with the
458 BB_IRREDUCIBLE_LOOP flag. */
459
460 static inline bool
461 loop_has_blocks_with_irreducible_flag (struct loop *loop)
462 {
463 unsigned i;
464 basic_block *bbs = get_loop_body_in_dom_order (loop);
465 bool res = true;
466
467 for (i = 0; i < loop->num_nodes; i++)
468 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
469 goto end;
470
471 res = false;
472 end:
473 free (bbs);
474 return res;
475 }
476
477 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
478 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
479 to their addresses that can be reused. The address of OBJ is known to
480 be invariant in the whole function. Other needed statements are placed
481 right before GSI. */
482
483 static tree
484 take_address_of (tree obj, tree type, edge entry,
485 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
486 {
487 int uid;
488 tree *var_p, name, addr;
489 gassign *stmt;
490 gimple_seq stmts;
491
492 /* Since the address of OBJ is invariant, the trees may be shared.
493 Avoid rewriting unrelated parts of the code. */
494 obj = unshare_expr (obj);
495 for (var_p = &obj;
496 handled_component_p (*var_p);
497 var_p = &TREE_OPERAND (*var_p, 0))
498 continue;
499
500 /* Canonicalize the access to base on a MEM_REF. */
501 if (DECL_P (*var_p))
502 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
503
504 /* Assign a canonical SSA name to the address of the base decl used
505 in the address and share it for all accesses and addresses based
506 on it. */
507 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
508 int_tree_map elt;
509 elt.uid = uid;
510 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
511 if (!slot->to)
512 {
513 if (gsi == NULL)
514 return NULL;
515 addr = TREE_OPERAND (*var_p, 0);
516 const char *obj_name
517 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
518 if (obj_name)
519 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
520 else
521 name = make_ssa_name (TREE_TYPE (addr));
522 stmt = gimple_build_assign (name, addr);
523 gsi_insert_on_edge_immediate (entry, stmt);
524
525 slot->uid = uid;
526 slot->to = name;
527 }
528 else
529 name = slot->to;
530
531 /* Express the address in terms of the canonical SSA name. */
532 TREE_OPERAND (*var_p, 0) = name;
533 if (gsi == NULL)
534 return build_fold_addr_expr_with_type (obj, type);
535
536 name = force_gimple_operand (build_addr (obj, current_function_decl),
537 &stmts, true, NULL_TREE);
538 if (!gimple_seq_empty_p (stmts))
539 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
540
541 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
542 {
543 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
544 NULL_TREE);
545 if (!gimple_seq_empty_p (stmts))
546 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
547 }
548
549 return name;
550 }
551
552 /* Callback for htab_traverse. Create the initialization statement
553 for reduction described in SLOT, and place it at the preheader of
554 the loop described in DATA. */
555
556 int
557 initialize_reductions (reduction_info **slot, struct loop *loop)
558 {
559 tree init, c;
560 tree bvar, type, arg;
561 edge e;
562
563 struct reduction_info *const reduc = *slot;
564
565 /* Create initialization in preheader:
566 reduction_variable = initialization value of reduction. */
567
568 /* In the phi node at the header, replace the argument coming
569 from the preheader with the reduction initialization value. */
570
571 /* Create a new variable to initialize the reduction. */
572 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
573 bvar = create_tmp_var (type, "reduction");
574
575 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
576 OMP_CLAUSE_REDUCTION);
577 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
578 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
579
580 init = omp_reduction_init (c, TREE_TYPE (bvar));
581 reduc->init = init;
582
583 /* Replace the argument representing the initialization value
584 with the initialization value for the reduction (neutral
585 element for the particular operation, e.g. 0 for PLUS_EXPR,
586 1 for MULT_EXPR, etc).
587 Keep the old value in a new variable "reduction_initial",
588 that will be taken in consideration after the parallel
589 computing is done. */
590
591 e = loop_preheader_edge (loop);
592 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
593 /* Create new variable to hold the initial value. */
594
595 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
596 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
597 reduc->initial_value = arg;
598 return 1;
599 }
600
601 struct elv_data
602 {
603 struct walk_stmt_info info;
604 edge entry;
605 int_tree_htab_type *decl_address;
606 gimple_stmt_iterator *gsi;
607 bool changed;
608 bool reset;
609 };
610
611 /* Eliminates references to local variables in *TP out of the single
612 entry single exit region starting at DTA->ENTRY.
613 DECL_ADDRESS contains addresses of the references that had their
614 address taken already. If the expression is changed, CHANGED is
615 set to true. Callback for walk_tree. */
616
617 static tree
618 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
619 {
620 struct elv_data *const dta = (struct elv_data *) data;
621 tree t = *tp, var, addr, addr_type, type, obj;
622
623 if (DECL_P (t))
624 {
625 *walk_subtrees = 0;
626
627 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
628 return NULL_TREE;
629
630 type = TREE_TYPE (t);
631 addr_type = build_pointer_type (type);
632 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
633 dta->gsi);
634 if (dta->gsi == NULL && addr == NULL_TREE)
635 {
636 dta->reset = true;
637 return NULL_TREE;
638 }
639
640 *tp = build_simple_mem_ref (addr);
641
642 dta->changed = true;
643 return NULL_TREE;
644 }
645
646 if (TREE_CODE (t) == ADDR_EXPR)
647 {
648 /* ADDR_EXPR may appear in two contexts:
649 -- as a gimple operand, when the address taken is a function invariant
650 -- as gimple rhs, when the resulting address in not a function
651 invariant
652 We do not need to do anything special in the latter case (the base of
653 the memory reference whose address is taken may be replaced in the
654 DECL_P case). The former case is more complicated, as we need to
655 ensure that the new address is still a gimple operand. Thus, it
656 is not sufficient to replace just the base of the memory reference --
657 we need to move the whole computation of the address out of the
658 loop. */
659 if (!is_gimple_val (t))
660 return NULL_TREE;
661
662 *walk_subtrees = 0;
663 obj = TREE_OPERAND (t, 0);
664 var = get_base_address (obj);
665 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
666 return NULL_TREE;
667
668 addr_type = TREE_TYPE (t);
669 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
670 dta->gsi);
671 if (dta->gsi == NULL && addr == NULL_TREE)
672 {
673 dta->reset = true;
674 return NULL_TREE;
675 }
676 *tp = addr;
677
678 dta->changed = true;
679 return NULL_TREE;
680 }
681
682 if (!EXPR_P (t))
683 *walk_subtrees = 0;
684
685 return NULL_TREE;
686 }
687
688 /* Moves the references to local variables in STMT at *GSI out of the single
689 entry single exit region starting at ENTRY. DECL_ADDRESS contains
690 addresses of the references that had their address taken
691 already. */
692
693 static void
694 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
695 int_tree_htab_type *decl_address)
696 {
697 struct elv_data dta;
698 gimple stmt = gsi_stmt (*gsi);
699
700 memset (&dta.info, '\0', sizeof (dta.info));
701 dta.entry = entry;
702 dta.decl_address = decl_address;
703 dta.changed = false;
704 dta.reset = false;
705
706 if (gimple_debug_bind_p (stmt))
707 {
708 dta.gsi = NULL;
709 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
710 eliminate_local_variables_1, &dta.info, NULL);
711 if (dta.reset)
712 {
713 gimple_debug_bind_reset_value (stmt);
714 dta.changed = true;
715 }
716 }
717 else if (gimple_clobber_p (stmt))
718 {
719 stmt = gimple_build_nop ();
720 gsi_replace (gsi, stmt, false);
721 dta.changed = true;
722 }
723 else
724 {
725 dta.gsi = gsi;
726 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
727 }
728
729 if (dta.changed)
730 update_stmt (stmt);
731 }
732
733 /* Eliminates the references to local variables from the single entry
734 single exit region between the ENTRY and EXIT edges.
735
736 This includes:
737 1) Taking address of a local variable -- these are moved out of the
738 region (and temporary variable is created to hold the address if
739 necessary).
740
741 2) Dereferencing a local variable -- these are replaced with indirect
742 references. */
743
744 static void
745 eliminate_local_variables (edge entry, edge exit)
746 {
747 basic_block bb;
748 auto_vec<basic_block, 3> body;
749 unsigned i;
750 gimple_stmt_iterator gsi;
751 bool has_debug_stmt = false;
752 int_tree_htab_type decl_address (10);
753 basic_block entry_bb = entry->src;
754 basic_block exit_bb = exit->dest;
755
756 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
757
758 FOR_EACH_VEC_ELT (body, i, bb)
759 if (bb != entry_bb && bb != exit_bb)
760 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
761 if (is_gimple_debug (gsi_stmt (gsi)))
762 {
763 if (gimple_debug_bind_p (gsi_stmt (gsi)))
764 has_debug_stmt = true;
765 }
766 else
767 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
768
769 if (has_debug_stmt)
770 FOR_EACH_VEC_ELT (body, i, bb)
771 if (bb != entry_bb && bb != exit_bb)
772 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
773 if (gimple_debug_bind_p (gsi_stmt (gsi)))
774 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
775 }
776
777 /* Returns true if expression EXPR is not defined between ENTRY and
778 EXIT, i.e. if all its operands are defined outside of the region. */
779
780 static bool
781 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
782 {
783 basic_block entry_bb = entry->src;
784 basic_block exit_bb = exit->dest;
785 basic_block def_bb;
786
787 if (is_gimple_min_invariant (expr))
788 return true;
789
790 if (TREE_CODE (expr) == SSA_NAME)
791 {
792 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
793 if (def_bb
794 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
795 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
796 return false;
797
798 return true;
799 }
800
801 return false;
802 }
803
804 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
805 The copies are stored to NAME_COPIES, if NAME was already duplicated,
806 its duplicate stored in NAME_COPIES is returned.
807
808 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
809 duplicated, storing the copies in DECL_COPIES. */
810
811 static tree
812 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
813 int_tree_htab_type *decl_copies,
814 bool copy_name_p)
815 {
816 tree copy, var, var_copy;
817 unsigned idx, uid, nuid;
818 struct int_tree_map ielt;
819 struct name_to_copy_elt elt, *nelt;
820 name_to_copy_elt **slot;
821 int_tree_map *dslot;
822
823 if (TREE_CODE (name) != SSA_NAME)
824 return name;
825
826 idx = SSA_NAME_VERSION (name);
827 elt.version = idx;
828 slot = name_copies->find_slot_with_hash (&elt, idx,
829 copy_name_p ? INSERT : NO_INSERT);
830 if (slot && *slot)
831 return (*slot)->new_name;
832
833 if (copy_name_p)
834 {
835 copy = duplicate_ssa_name (name, NULL);
836 nelt = XNEW (struct name_to_copy_elt);
837 nelt->version = idx;
838 nelt->new_name = copy;
839 nelt->field = NULL_TREE;
840 *slot = nelt;
841 }
842 else
843 {
844 gcc_assert (!slot);
845 copy = name;
846 }
847
848 var = SSA_NAME_VAR (name);
849 if (!var)
850 return copy;
851
852 uid = DECL_UID (var);
853 ielt.uid = uid;
854 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
855 if (!dslot->to)
856 {
857 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
858 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
859 dslot->uid = uid;
860 dslot->to = var_copy;
861
862 /* Ensure that when we meet this decl next time, we won't duplicate
863 it again. */
864 nuid = DECL_UID (var_copy);
865 ielt.uid = nuid;
866 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
867 gcc_assert (!dslot->to);
868 dslot->uid = nuid;
869 dslot->to = var_copy;
870 }
871 else
872 var_copy = dslot->to;
873
874 replace_ssa_name_symbol (copy, var_copy);
875 return copy;
876 }
877
878 /* Finds the ssa names used in STMT that are defined outside the
879 region between ENTRY and EXIT and replaces such ssa names with
880 their duplicates. The duplicates are stored to NAME_COPIES. Base
881 decls of all ssa names used in STMT (including those defined in
882 LOOP) are replaced with the new temporary variables; the
883 replacement decls are stored in DECL_COPIES. */
884
885 static void
886 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
887 name_to_copy_table_type *name_copies,
888 int_tree_htab_type *decl_copies)
889 {
890 use_operand_p use;
891 def_operand_p def;
892 ssa_op_iter oi;
893 tree name, copy;
894 bool copy_name_p;
895
896 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
897 {
898 name = DEF_FROM_PTR (def);
899 gcc_assert (TREE_CODE (name) == SSA_NAME);
900 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
901 false);
902 gcc_assert (copy == name);
903 }
904
905 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
906 {
907 name = USE_FROM_PTR (use);
908 if (TREE_CODE (name) != SSA_NAME)
909 continue;
910
911 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
912 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
913 copy_name_p);
914 SET_USE (use, copy);
915 }
916 }
917
918 /* Finds the ssa names used in STMT that are defined outside the
919 region between ENTRY and EXIT and replaces such ssa names with
920 their duplicates. The duplicates are stored to NAME_COPIES. Base
921 decls of all ssa names used in STMT (including those defined in
922 LOOP) are replaced with the new temporary variables; the
923 replacement decls are stored in DECL_COPIES. */
924
925 static bool
926 separate_decls_in_region_debug (gimple stmt,
927 name_to_copy_table_type *name_copies,
928 int_tree_htab_type *decl_copies)
929 {
930 use_operand_p use;
931 ssa_op_iter oi;
932 tree var, name;
933 struct int_tree_map ielt;
934 struct name_to_copy_elt elt;
935 name_to_copy_elt **slot;
936 int_tree_map *dslot;
937
938 if (gimple_debug_bind_p (stmt))
939 var = gimple_debug_bind_get_var (stmt);
940 else if (gimple_debug_source_bind_p (stmt))
941 var = gimple_debug_source_bind_get_var (stmt);
942 else
943 return true;
944 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
945 return true;
946 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
947 ielt.uid = DECL_UID (var);
948 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
949 if (!dslot)
950 return true;
951 if (gimple_debug_bind_p (stmt))
952 gimple_debug_bind_set_var (stmt, dslot->to);
953 else if (gimple_debug_source_bind_p (stmt))
954 gimple_debug_source_bind_set_var (stmt, dslot->to);
955
956 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
957 {
958 name = USE_FROM_PTR (use);
959 if (TREE_CODE (name) != SSA_NAME)
960 continue;
961
962 elt.version = SSA_NAME_VERSION (name);
963 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
964 if (!slot)
965 {
966 gimple_debug_bind_reset_value (stmt);
967 update_stmt (stmt);
968 break;
969 }
970
971 SET_USE (use, (*slot)->new_name);
972 }
973
974 return false;
975 }
976
977 /* Callback for htab_traverse. Adds a field corresponding to the reduction
978 specified in SLOT. The type is passed in DATA. */
979
980 int
981 add_field_for_reduction (reduction_info **slot, tree type)
982 {
983
984 struct reduction_info *const red = *slot;
985 tree var = gimple_assign_lhs (red->reduc_stmt);
986 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
987 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
988
989 insert_field_into_struct (type, field);
990
991 red->field = field;
992
993 return 1;
994 }
995
996 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
997 described in SLOT. The type is passed in DATA. */
998
999 int
1000 add_field_for_name (name_to_copy_elt **slot, tree type)
1001 {
1002 struct name_to_copy_elt *const elt = *slot;
1003 tree name = ssa_name (elt->version);
1004 tree field = build_decl (UNKNOWN_LOCATION,
1005 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1006 TREE_TYPE (name));
1007
1008 insert_field_into_struct (type, field);
1009 elt->field = field;
1010
1011 return 1;
1012 }
1013
1014 /* Callback for htab_traverse. A local result is the intermediate result
1015 computed by a single
1016 thread, or the initial value in case no iteration was executed.
1017 This function creates a phi node reflecting these values.
1018 The phi's result will be stored in NEW_PHI field of the
1019 reduction's data structure. */
1020
1021 int
1022 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1023 {
1024 struct reduction_info *const reduc = *slot;
1025 edge e;
1026 gphi *new_phi;
1027 basic_block store_bb;
1028 tree local_res;
1029 source_location locus;
1030
1031 /* STORE_BB is the block where the phi
1032 should be stored. It is the destination of the loop exit.
1033 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1034 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1035
1036 /* STORE_BB has two predecessors. One coming from the loop
1037 (the reduction's result is computed at the loop),
1038 and another coming from a block preceding the loop,
1039 when no iterations
1040 are executed (the initial value should be taken). */
1041 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1042 e = EDGE_PRED (store_bb, 1);
1043 else
1044 e = EDGE_PRED (store_bb, 0);
1045 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
1046 locus = gimple_location (reduc->reduc_stmt);
1047 new_phi = create_phi_node (local_res, store_bb);
1048 add_phi_arg (new_phi, reduc->init, e, locus);
1049 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1050 FALLTHRU_EDGE (loop->latch), locus);
1051 reduc->new_phi = new_phi;
1052
1053 return 1;
1054 }
1055
1056 struct clsn_data
1057 {
1058 tree store;
1059 tree load;
1060
1061 basic_block store_bb;
1062 basic_block load_bb;
1063 };
1064
1065 /* Callback for htab_traverse. Create an atomic instruction for the
1066 reduction described in SLOT.
1067 DATA annotates the place in memory the atomic operation relates to,
1068 and the basic block it needs to be generated in. */
1069
1070 int
1071 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1072 {
1073 struct reduction_info *const reduc = *slot;
1074 gimple_stmt_iterator gsi;
1075 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1076 tree load_struct;
1077 basic_block bb;
1078 basic_block new_bb;
1079 edge e;
1080 tree t, addr, ref, x;
1081 tree tmp_load, name;
1082 gimple load;
1083
1084 load_struct = build_simple_mem_ref (clsn_data->load);
1085 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1086
1087 addr = build_addr (t, current_function_decl);
1088
1089 /* Create phi node. */
1090 bb = clsn_data->load_bb;
1091
1092 gsi = gsi_last_bb (bb);
1093 e = split_block (bb, gsi_stmt (gsi));
1094 new_bb = e->dest;
1095
1096 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1097 tmp_load = make_ssa_name (tmp_load);
1098 load = gimple_build_omp_atomic_load (tmp_load, addr);
1099 SSA_NAME_DEF_STMT (tmp_load) = load;
1100 gsi = gsi_start_bb (new_bb);
1101 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1102
1103 e = split_block (new_bb, load);
1104 new_bb = e->dest;
1105 gsi = gsi_start_bb (new_bb);
1106 ref = tmp_load;
1107 x = fold_build2 (reduc->reduction_code,
1108 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1109 PHI_RESULT (reduc->new_phi));
1110
1111 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1112 GSI_CONTINUE_LINKING);
1113
1114 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1115 return 1;
1116 }
1117
1118 /* Create the atomic operation at the join point of the threads.
1119 REDUCTION_LIST describes the reductions in the LOOP.
1120 LD_ST_DATA describes the shared data structure where
1121 shared data is stored in and loaded from. */
1122 static void
1123 create_call_for_reduction (struct loop *loop,
1124 reduction_info_table_type *reduction_list,
1125 struct clsn_data *ld_st_data)
1126 {
1127 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1128 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1129 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1130 reduction_list
1131 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1132 }
1133
1134 /* Callback for htab_traverse. Loads the final reduction value at the
1135 join point of all threads, and inserts it in the right place. */
1136
1137 int
1138 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1139 {
1140 struct reduction_info *const red = *slot;
1141 gimple stmt;
1142 gimple_stmt_iterator gsi;
1143 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1144 tree load_struct;
1145 tree name;
1146 tree x;
1147
1148 /* If there's no exit phi, the result of the reduction is unused. */
1149 if (red->keep_res == NULL)
1150 return 1;
1151
1152 gsi = gsi_after_labels (clsn_data->load_bb);
1153 load_struct = build_simple_mem_ref (clsn_data->load);
1154 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1155 NULL_TREE);
1156
1157 x = load_struct;
1158 name = PHI_RESULT (red->keep_res);
1159 stmt = gimple_build_assign (name, x);
1160
1161 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1162
1163 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1164 !gsi_end_p (gsi); gsi_next (&gsi))
1165 if (gsi_stmt (gsi) == red->keep_res)
1166 {
1167 remove_phi_node (&gsi, false);
1168 return 1;
1169 }
1170 gcc_unreachable ();
1171 }
1172
1173 /* Load the reduction result that was stored in LD_ST_DATA.
1174 REDUCTION_LIST describes the list of reductions that the
1175 loads should be generated for. */
1176 static void
1177 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1178 struct clsn_data *ld_st_data)
1179 {
1180 gimple_stmt_iterator gsi;
1181 tree t;
1182 gimple stmt;
1183
1184 gsi = gsi_after_labels (ld_st_data->load_bb);
1185 t = build_fold_addr_expr (ld_st_data->store);
1186 stmt = gimple_build_assign (ld_st_data->load, t);
1187
1188 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1189
1190 reduction_list
1191 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1192
1193 }
1194
1195 /* Callback for htab_traverse. Store the neutral value for the
1196 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1197 1 for MULT_EXPR, etc. into the reduction field.
1198 The reduction is specified in SLOT. The store information is
1199 passed in DATA. */
1200
1201 int
1202 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1203 {
1204 struct reduction_info *const red = *slot;
1205 tree t;
1206 gimple stmt;
1207 gimple_stmt_iterator gsi;
1208 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1209
1210 gsi = gsi_last_bb (clsn_data->store_bb);
1211 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1212 stmt = gimple_build_assign (t, red->initial_value);
1213 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1214
1215 return 1;
1216 }
1217
1218 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1219 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1220 specified in SLOT. */
1221
1222 int
1223 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1224 struct clsn_data *clsn_data)
1225 {
1226 struct name_to_copy_elt *const elt = *slot;
1227 tree t;
1228 gimple stmt;
1229 gimple_stmt_iterator gsi;
1230 tree type = TREE_TYPE (elt->new_name);
1231 tree load_struct;
1232
1233 gsi = gsi_last_bb (clsn_data->store_bb);
1234 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1235 stmt = gimple_build_assign (t, ssa_name (elt->version));
1236 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1237
1238 gsi = gsi_last_bb (clsn_data->load_bb);
1239 load_struct = build_simple_mem_ref (clsn_data->load);
1240 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1241 stmt = gimple_build_assign (elt->new_name, t);
1242 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1243
1244 return 1;
1245 }
1246
1247 /* Moves all the variables used in LOOP and defined outside of it (including
1248 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1249 name) to a structure created for this purpose. The code
1250
1251 while (1)
1252 {
1253 use (a);
1254 use (b);
1255 }
1256
1257 is transformed this way:
1258
1259 bb0:
1260 old.a = a;
1261 old.b = b;
1262
1263 bb1:
1264 a' = new->a;
1265 b' = new->b;
1266 while (1)
1267 {
1268 use (a');
1269 use (b');
1270 }
1271
1272 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1273 pointer `new' is intentionally not initialized (the loop will be split to a
1274 separate function later, and `new' will be initialized from its arguments).
1275 LD_ST_DATA holds information about the shared data structure used to pass
1276 information among the threads. It is initialized here, and
1277 gen_parallel_loop will pass it to create_call_for_reduction that
1278 needs this information. REDUCTION_LIST describes the reductions
1279 in LOOP. */
1280
1281 static void
1282 separate_decls_in_region (edge entry, edge exit,
1283 reduction_info_table_type *reduction_list,
1284 tree *arg_struct, tree *new_arg_struct,
1285 struct clsn_data *ld_st_data)
1286
1287 {
1288 basic_block bb1 = split_edge (entry);
1289 basic_block bb0 = single_pred (bb1);
1290 name_to_copy_table_type name_copies (10);
1291 int_tree_htab_type decl_copies (10);
1292 unsigned i;
1293 tree type, type_name, nvar;
1294 gimple_stmt_iterator gsi;
1295 struct clsn_data clsn_data;
1296 auto_vec<basic_block, 3> body;
1297 basic_block bb;
1298 basic_block entry_bb = bb1;
1299 basic_block exit_bb = exit->dest;
1300 bool has_debug_stmt = false;
1301
1302 entry = single_succ_edge (entry_bb);
1303 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1304
1305 FOR_EACH_VEC_ELT (body, i, bb)
1306 {
1307 if (bb != entry_bb && bb != exit_bb)
1308 {
1309 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1310 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1311 &name_copies, &decl_copies);
1312
1313 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1314 {
1315 gimple stmt = gsi_stmt (gsi);
1316
1317 if (is_gimple_debug (stmt))
1318 has_debug_stmt = true;
1319 else
1320 separate_decls_in_region_stmt (entry, exit, stmt,
1321 &name_copies, &decl_copies);
1322 }
1323 }
1324 }
1325
1326 /* Now process debug bind stmts. We must not create decls while
1327 processing debug stmts, so we defer their processing so as to
1328 make sure we will have debug info for as many variables as
1329 possible (all of those that were dealt with in the loop above),
1330 and discard those for which we know there's nothing we can
1331 do. */
1332 if (has_debug_stmt)
1333 FOR_EACH_VEC_ELT (body, i, bb)
1334 if (bb != entry_bb && bb != exit_bb)
1335 {
1336 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1337 {
1338 gimple stmt = gsi_stmt (gsi);
1339
1340 if (is_gimple_debug (stmt))
1341 {
1342 if (separate_decls_in_region_debug (stmt, &name_copies,
1343 &decl_copies))
1344 {
1345 gsi_remove (&gsi, true);
1346 continue;
1347 }
1348 }
1349
1350 gsi_next (&gsi);
1351 }
1352 }
1353
1354 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1355 {
1356 /* It may happen that there is nothing to copy (if there are only
1357 loop carried and external variables in the loop). */
1358 *arg_struct = NULL;
1359 *new_arg_struct = NULL;
1360 }
1361 else
1362 {
1363 /* Create the type for the structure to store the ssa names to. */
1364 type = lang_hooks.types.make_type (RECORD_TYPE);
1365 type_name = build_decl (UNKNOWN_LOCATION,
1366 TYPE_DECL, create_tmp_var_name (".paral_data"),
1367 type);
1368 TYPE_NAME (type) = type_name;
1369
1370 name_copies.traverse <tree, add_field_for_name> (type);
1371 if (reduction_list && reduction_list->elements () > 0)
1372 {
1373 /* Create the fields for reductions. */
1374 reduction_list->traverse <tree, add_field_for_reduction> (type);
1375 }
1376 layout_type (type);
1377
1378 /* Create the loads and stores. */
1379 *arg_struct = create_tmp_var (type, ".paral_data_store");
1380 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1381 *new_arg_struct = make_ssa_name (nvar);
1382
1383 ld_st_data->store = *arg_struct;
1384 ld_st_data->load = *new_arg_struct;
1385 ld_st_data->store_bb = bb0;
1386 ld_st_data->load_bb = bb1;
1387
1388 name_copies
1389 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1390 (ld_st_data);
1391
1392 /* Load the calculation from memory (after the join of the threads). */
1393
1394 if (reduction_list && reduction_list->elements () > 0)
1395 {
1396 reduction_list
1397 ->traverse <struct clsn_data *, create_stores_for_reduction>
1398 (ld_st_data);
1399 clsn_data.load = make_ssa_name (nvar);
1400 clsn_data.load_bb = exit->dest;
1401 clsn_data.store = ld_st_data->store;
1402 create_final_loads_for_reduction (reduction_list, &clsn_data);
1403 }
1404 }
1405 }
1406
1407 /* Returns true if FN was created to run in parallel. */
1408
1409 bool
1410 parallelized_function_p (tree fndecl)
1411 {
1412 cgraph_node *node = cgraph_node::get (fndecl);
1413 gcc_assert (node != NULL);
1414 return node->parallelized_function;
1415 }
1416
1417 /* Creates and returns an empty function that will receive the body of
1418 a parallelized loop. */
1419
1420 static tree
1421 create_loop_fn (location_t loc)
1422 {
1423 char buf[100];
1424 char *tname;
1425 tree decl, type, name, t;
1426 struct function *act_cfun = cfun;
1427 static unsigned loopfn_num;
1428
1429 loc = LOCATION_LOCUS (loc);
1430 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1431 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1432 clean_symbol_name (tname);
1433 name = get_identifier (tname);
1434 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1435
1436 decl = build_decl (loc, FUNCTION_DECL, name, type);
1437 TREE_STATIC (decl) = 1;
1438 TREE_USED (decl) = 1;
1439 DECL_ARTIFICIAL (decl) = 1;
1440 DECL_IGNORED_P (decl) = 0;
1441 TREE_PUBLIC (decl) = 0;
1442 DECL_UNINLINABLE (decl) = 1;
1443 DECL_EXTERNAL (decl) = 0;
1444 DECL_CONTEXT (decl) = NULL_TREE;
1445 DECL_INITIAL (decl) = make_node (BLOCK);
1446
1447 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1448 DECL_ARTIFICIAL (t) = 1;
1449 DECL_IGNORED_P (t) = 1;
1450 DECL_RESULT (decl) = t;
1451
1452 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1453 ptr_type_node);
1454 DECL_ARTIFICIAL (t) = 1;
1455 DECL_ARG_TYPE (t) = ptr_type_node;
1456 DECL_CONTEXT (t) = decl;
1457 TREE_USED (t) = 1;
1458 DECL_ARGUMENTS (decl) = t;
1459
1460 allocate_struct_function (decl, false);
1461
1462 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1463 it. */
1464 set_cfun (act_cfun);
1465
1466 return decl;
1467 }
1468
1469 /* Replace uses of NAME by VAL in block BB. */
1470
1471 static void
1472 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1473 {
1474 gimple use_stmt;
1475 imm_use_iterator imm_iter;
1476
1477 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1478 {
1479 if (gimple_bb (use_stmt) != bb)
1480 continue;
1481
1482 use_operand_p use_p;
1483 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1484 SET_USE (use_p, val);
1485 }
1486 }
1487
1488 /* Do transformation from:
1489
1490 <bb preheader>:
1491 ...
1492 goto <bb header>
1493
1494 <bb header>:
1495 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1496 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1497 ...
1498 use (ivtmp_a)
1499 ...
1500 sum_b = sum_a + sum_update
1501 ...
1502 if (ivtmp_a < n)
1503 goto <bb latch>;
1504 else
1505 goto <bb exit>;
1506
1507 <bb latch>:
1508 ivtmp_b = ivtmp_a + 1;
1509 goto <bb header>
1510
1511 <bb exit>:
1512 sum_z = PHI <sum_b (cond[1]), ...>
1513
1514 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1515 that's <bb header>.
1516
1517 to:
1518
1519 <bb preheader>:
1520 ...
1521 goto <bb newheader>
1522
1523 <bb header>:
1524 ivtmp_a = PHI <ivtmp_c (latch)>
1525 sum_a = PHI <sum_c (latch)>
1526 ...
1527 use (ivtmp_a)
1528 ...
1529 sum_b = sum_a + sum_update
1530 ...
1531 goto <bb latch>;
1532
1533 <bb newheader>:
1534 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1535 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1536 if (ivtmp_c < n + 1)
1537 goto <bb header>;
1538 else
1539 goto <bb newexit>;
1540
1541 <bb latch>:
1542 ivtmp_b = ivtmp_a + 1;
1543 goto <bb newheader>
1544
1545 <bb newexit>:
1546 sum_y = PHI <sum_c (newheader)>
1547
1548 <bb exit>:
1549 sum_z = PHI <sum_y (newexit), ...>
1550
1551
1552 In unified diff format:
1553
1554 <bb preheader>:
1555 ...
1556 - goto <bb header>
1557 + goto <bb newheader>
1558
1559 <bb header>:
1560 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1561 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1562 + ivtmp_a = PHI <ivtmp_c (latch)>
1563 + sum_a = PHI <sum_c (latch)>
1564 ...
1565 use (ivtmp_a)
1566 ...
1567 sum_b = sum_a + sum_update
1568 ...
1569 - if (ivtmp_a < n)
1570 - goto <bb latch>;
1571 + goto <bb latch>;
1572 +
1573 + <bb newheader>:
1574 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1575 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1576 + if (ivtmp_c < n + 1)
1577 + goto <bb header>;
1578 else
1579 goto <bb exit>;
1580
1581 <bb latch>:
1582 ivtmp_b = ivtmp_a + 1;
1583 - goto <bb header>
1584 + goto <bb newheader>
1585
1586 + <bb newexit>:
1587 + sum_y = PHI <sum_c (newheader)>
1588
1589 <bb exit>:
1590 - sum_z = PHI <sum_b (cond[1]), ...>
1591 + sum_z = PHI <sum_y (newexit), ...>
1592
1593 Note: the example does not show any virtual phis, but these are handled more
1594 or less as reductions.
1595
1596
1597 Moves the exit condition of LOOP to the beginning of its header.
1598 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1599 bound. */
1600
1601 static void
1602 transform_to_exit_first_loop_alt (struct loop *loop,
1603 reduction_info_table_type *reduction_list,
1604 tree bound)
1605 {
1606 basic_block header = loop->header;
1607 basic_block latch = loop->latch;
1608 edge exit = single_dom_exit (loop);
1609 basic_block exit_block = exit->dest;
1610 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1611 tree control = gimple_cond_lhs (cond_stmt);
1612 edge e;
1613
1614 /* Rewriting virtuals into loop-closed ssa normal form makes this
1615 transformation simpler. It also ensures that the virtuals are in
1616 loop-closed ssa normal from after the transformation, which is required by
1617 create_parallel_loop. */
1618 rewrite_virtuals_into_loop_closed_ssa (loop);
1619
1620 /* Create the new_header block. */
1621 basic_block new_header = split_block_before_cond_jump (exit->src);
1622 edge edge_at_split = single_pred_edge (new_header);
1623
1624 /* Redirect entry edge to new_header. */
1625 edge entry = loop_preheader_edge (loop);
1626 e = redirect_edge_and_branch (entry, new_header);
1627 gcc_assert (e == entry);
1628
1629 /* Redirect post_inc_edge to new_header. */
1630 edge post_inc_edge = single_succ_edge (latch);
1631 e = redirect_edge_and_branch (post_inc_edge, new_header);
1632 gcc_assert (e == post_inc_edge);
1633
1634 /* Redirect post_cond_edge to header. */
1635 edge post_cond_edge = single_pred_edge (latch);
1636 e = redirect_edge_and_branch (post_cond_edge, header);
1637 gcc_assert (e == post_cond_edge);
1638
1639 /* Redirect edge_at_split to latch. */
1640 e = redirect_edge_and_branch (edge_at_split, latch);
1641 gcc_assert (e == edge_at_split);
1642
1643 /* Set the new loop bound. */
1644 gimple_cond_set_rhs (cond_stmt, bound);
1645 update_stmt (cond_stmt);
1646
1647 /* Repair the ssa. */
1648 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1649 edge_var_map *vm;
1650 gphi_iterator gsi;
1651 int i;
1652 for (gsi = gsi_start_phis (header), i = 0;
1653 !gsi_end_p (gsi) && v->iterate (i, &vm);
1654 gsi_next (&gsi), i++)
1655 {
1656 gphi *phi = gsi.phi ();
1657 tree res_a = PHI_RESULT (phi);
1658
1659 /* Create new phi. */
1660 tree res_c = copy_ssa_name (res_a, phi);
1661 gphi *nphi = create_phi_node (res_c, new_header);
1662
1663 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1664 replace_uses_in_bb_by (res_a, res_c, new_header);
1665
1666 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1667 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1668
1669 /* Replace sum_b with sum_c in exit phi. */
1670 tree res_b = redirect_edge_var_map_def (vm);
1671 replace_uses_in_bb_by (res_b, res_c, exit_block);
1672
1673 struct reduction_info *red = reduction_phi (reduction_list, phi);
1674 gcc_assert (virtual_operand_p (res_a)
1675 || res_a == control
1676 || red != NULL);
1677
1678 if (red)
1679 {
1680 /* Register the new reduction phi. */
1681 red->reduc_phi = nphi;
1682 gimple_set_uid (red->reduc_phi, red->reduc_version);
1683 }
1684 }
1685 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1686
1687 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1688 flush_pending_stmts (entry);
1689
1690 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1691 flush_pending_stmts (post_inc_edge);
1692
1693 /* Create a new empty exit block, inbetween the new loop header and the old
1694 exit block. The function separate_decls_in_region needs this block to
1695 insert code that is active on loop exit, but not any other path. */
1696 basic_block new_exit_block = split_edge (exit);
1697
1698 /* Insert and register the reduction exit phis. */
1699 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1700 !gsi_end_p (gsi);
1701 gsi_next (&gsi))
1702 {
1703 gphi *phi = gsi.phi ();
1704 tree res_z = PHI_RESULT (phi);
1705
1706 /* Now that we have a new exit block, duplicate the phi of the old exit
1707 block in the new exit block to preserve loop-closed ssa. */
1708 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1709 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1710 tree res_y = copy_ssa_name (res_z, phi);
1711 gphi *nphi = create_phi_node (res_y, new_exit_block);
1712 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1713 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1714 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1715
1716 if (virtual_operand_p (res_z))
1717 continue;
1718
1719 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1720 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1721 if (red != NULL)
1722 red->keep_res = nphi;
1723 }
1724
1725 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1726 then we're still using some fields, so only bother about fields that are
1727 still used: header and latch.
1728 The loop has a new header bb, so we update it. The latch bb stays the
1729 same. */
1730 loop->header = new_header;
1731
1732 /* Recalculate dominance info. */
1733 free_dominance_info (CDI_DOMINATORS);
1734 calculate_dominance_info (CDI_DOMINATORS);
1735 }
1736
1737 /* Tries to moves the exit condition of LOOP to the beginning of its header
1738 without duplication of the loop body. NIT is the number of iterations of the
1739 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1740 transformation is successful. */
1741
1742 static bool
1743 try_transform_to_exit_first_loop_alt (struct loop *loop,
1744 reduction_info_table_type *reduction_list,
1745 tree nit)
1746 {
1747 /* Check whether the latch contains a single statement. */
1748 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1749 return false;
1750
1751 /* Check whether the latch contains the loop iv increment. */
1752 edge back = single_succ_edge (loop->latch);
1753 edge exit = single_dom_exit (loop);
1754 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1755 tree control = gimple_cond_lhs (cond_stmt);
1756 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1757 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1758 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1759 return false;
1760
1761 /* Check whether there's no code between the loop condition and the latch. */
1762 if (!single_pred_p (loop->latch)
1763 || single_pred (loop->latch) != exit->src)
1764 return false;
1765
1766 tree alt_bound = NULL_TREE;
1767 tree nit_type = TREE_TYPE (nit);
1768
1769 /* Figure out whether nit + 1 overflows. */
1770 if (TREE_CODE (nit) == INTEGER_CST)
1771 {
1772 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1773 {
1774 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1775 nit, build_one_cst (nit_type));
1776
1777 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1778 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1779 return true;
1780 }
1781 else
1782 {
1783 /* Todo: Figure out if we can trigger this, if it's worth to handle
1784 optimally, and if we can handle it optimally. */
1785 return false;
1786 }
1787 }
1788
1789 gcc_assert (TREE_CODE (nit) == SSA_NAME);
1790
1791 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1792 iv with base 0 and step 1 that is incremented in the latch, like this:
1793
1794 <bb header>:
1795 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1796 ...
1797 if (iv_1 < nit)
1798 goto <bb latch>;
1799 else
1800 goto <bb exit>;
1801
1802 <bb latch>:
1803 iv_2 = iv_1 + 1;
1804 goto <bb header>;
1805
1806 The range of iv_1 is [0, nit]. The latch edge is taken for
1807 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1808 number of latch executions is equal to nit.
1809
1810 The function max_loop_iterations gives us the maximum number of latch
1811 executions, so it gives us the maximum value of nit. */
1812 widest_int nit_max;
1813 if (!max_loop_iterations (loop, &nit_max))
1814 return false;
1815
1816 /* Check if nit + 1 overflows. */
1817 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1818 if (!wi::lts_p (nit_max, type_max))
1819 return false;
1820
1821 gimple def = SSA_NAME_DEF_STMT (nit);
1822
1823 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1824 if (def
1825 && is_gimple_assign (def)
1826 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1827 {
1828 tree op1 = gimple_assign_rhs1 (def);
1829 tree op2 = gimple_assign_rhs2 (def);
1830 if (integer_minus_onep (op1))
1831 alt_bound = op2;
1832 else if (integer_minus_onep (op2))
1833 alt_bound = op1;
1834 }
1835
1836 /* If not found, insert nit + 1. */
1837 if (alt_bound == NULL_TREE)
1838 {
1839 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1840 build_int_cst_type (nit_type, 1));
1841
1842 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1843
1844 alt_bound
1845 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1846 GSI_CONTINUE_LINKING);
1847 }
1848
1849 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1850 return true;
1851 }
1852
1853 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1854 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1855 LOOP. */
1856
1857 static void
1858 transform_to_exit_first_loop (struct loop *loop,
1859 reduction_info_table_type *reduction_list,
1860 tree nit)
1861 {
1862 basic_block *bbs, *nbbs, ex_bb, orig_header;
1863 unsigned n;
1864 bool ok;
1865 edge exit = single_dom_exit (loop), hpred;
1866 tree control, control_name, res, t;
1867 gphi *phi, *nphi;
1868 gassign *stmt;
1869 gcond *cond_stmt, *cond_nit;
1870 tree nit_1;
1871
1872 split_block_after_labels (loop->header);
1873 orig_header = single_succ (loop->header);
1874 hpred = single_succ_edge (loop->header);
1875
1876 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1877 control = gimple_cond_lhs (cond_stmt);
1878 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1879
1880 /* Make sure that we have phi nodes on exit for all loop header phis
1881 (create_parallel_loop requires that). */
1882 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1883 !gsi_end_p (gsi);
1884 gsi_next (&gsi))
1885 {
1886 phi = gsi.phi ();
1887 res = PHI_RESULT (phi);
1888 t = copy_ssa_name (res, phi);
1889 SET_PHI_RESULT (phi, t);
1890 nphi = create_phi_node (res, orig_header);
1891 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1892
1893 if (res == control)
1894 {
1895 gimple_cond_set_lhs (cond_stmt, t);
1896 update_stmt (cond_stmt);
1897 control = t;
1898 }
1899 }
1900
1901 bbs = get_loop_body_in_dom_order (loop);
1902
1903 for (n = 0; bbs[n] != exit->src; n++)
1904 continue;
1905 nbbs = XNEWVEC (basic_block, n);
1906 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1907 bbs + 1, n, nbbs);
1908 gcc_assert (ok);
1909 free (bbs);
1910 ex_bb = nbbs[0];
1911 free (nbbs);
1912
1913 /* Other than reductions, the only gimple reg that should be copied
1914 out of the loop is the control variable. */
1915 exit = single_dom_exit (loop);
1916 control_name = NULL_TREE;
1917 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1918 !gsi_end_p (gsi); )
1919 {
1920 phi = gsi.phi ();
1921 res = PHI_RESULT (phi);
1922 if (virtual_operand_p (res))
1923 {
1924 gsi_next (&gsi);
1925 continue;
1926 }
1927
1928 /* Check if it is a part of reduction. If it is,
1929 keep the phi at the reduction's keep_res field. The
1930 PHI_RESULT of this phi is the resulting value of the reduction
1931 variable when exiting the loop. */
1932
1933 if (reduction_list->elements () > 0)
1934 {
1935 struct reduction_info *red;
1936
1937 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1938 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1939 if (red)
1940 {
1941 red->keep_res = phi;
1942 gsi_next (&gsi);
1943 continue;
1944 }
1945 }
1946 gcc_assert (control_name == NULL_TREE
1947 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1948 control_name = res;
1949 remove_phi_node (&gsi, false);
1950 }
1951 gcc_assert (control_name != NULL_TREE);
1952
1953 /* Initialize the control variable to number of iterations
1954 according to the rhs of the exit condition. */
1955 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1956 cond_nit = as_a <gcond *> (last_stmt (exit->src));
1957 nit_1 = gimple_cond_rhs (cond_nit);
1958 nit_1 = force_gimple_operand_gsi (&gsi,
1959 fold_convert (TREE_TYPE (control_name), nit_1),
1960 false, NULL_TREE, false, GSI_SAME_STMT);
1961 stmt = gimple_build_assign (control_name, nit_1);
1962 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1963 }
1964
1965 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1966 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1967 NEW_DATA is the variable that should be initialized from the argument
1968 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1969 basic block containing GIMPLE_OMP_PARALLEL tree. */
1970
1971 static basic_block
1972 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1973 tree new_data, unsigned n_threads, location_t loc)
1974 {
1975 gimple_stmt_iterator gsi;
1976 basic_block bb, paral_bb, for_bb, ex_bb;
1977 tree t, param;
1978 gomp_parallel *omp_par_stmt;
1979 gimple omp_return_stmt1, omp_return_stmt2;
1980 gimple phi;
1981 gcond *cond_stmt;
1982 gomp_for *for_stmt;
1983 gomp_continue *omp_cont_stmt;
1984 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1985 edge exit, nexit, guard, end, e;
1986
1987 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1988 bb = loop_preheader_edge (loop)->src;
1989 paral_bb = single_pred (bb);
1990 gsi = gsi_last_bb (paral_bb);
1991
1992 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1993 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1994 = build_int_cst (integer_type_node, n_threads);
1995 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1996 gimple_set_location (omp_par_stmt, loc);
1997
1998 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
1999
2000 /* Initialize NEW_DATA. */
2001 if (data)
2002 {
2003 gassign *assign_stmt;
2004
2005 gsi = gsi_after_labels (bb);
2006
2007 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2008 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2009 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2010
2011 assign_stmt = gimple_build_assign (new_data,
2012 fold_convert (TREE_TYPE (new_data), param));
2013 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2014 }
2015
2016 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2017 bb = split_loop_exit_edge (single_dom_exit (loop));
2018 gsi = gsi_last_bb (bb);
2019 omp_return_stmt1 = gimple_build_omp_return (false);
2020 gimple_set_location (omp_return_stmt1, loc);
2021 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2022
2023 /* Extract data for GIMPLE_OMP_FOR. */
2024 gcc_assert (loop->header == single_dom_exit (loop)->src);
2025 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2026
2027 cvar = gimple_cond_lhs (cond_stmt);
2028 cvar_base = SSA_NAME_VAR (cvar);
2029 phi = SSA_NAME_DEF_STMT (cvar);
2030 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2031 initvar = copy_ssa_name (cvar);
2032 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2033 initvar);
2034 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2035
2036 gsi = gsi_last_nondebug_bb (loop->latch);
2037 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2038 gsi_remove (&gsi, true);
2039
2040 /* Prepare cfg. */
2041 for_bb = split_edge (loop_preheader_edge (loop));
2042 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2043 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2044 gcc_assert (exit == single_dom_exit (loop));
2045
2046 guard = make_edge (for_bb, ex_bb, 0);
2047 single_succ_edge (loop->latch)->flags = 0;
2048 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
2049 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2050 !gsi_end_p (gpi); gsi_next (&gpi))
2051 {
2052 source_location locus;
2053 gphi *phi = gpi.phi ();
2054 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2055 gimple def_stmt = SSA_NAME_DEF_STMT (def);
2056
2057 /* If the exit phi is not connected to a header phi in the same loop, this
2058 value is not modified in the loop, and we're done with this phi. */
2059 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2060 && gimple_bb (def_stmt) == loop->header))
2061 continue;
2062
2063 gphi *stmt = as_a <gphi *> (def_stmt);
2064 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2065 locus = gimple_phi_arg_location_from_edge (stmt,
2066 loop_preheader_edge (loop));
2067 add_phi_arg (phi, def, guard, locus);
2068
2069 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2070 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2071 add_phi_arg (phi, def, end, locus);
2072 }
2073 e = redirect_edge_and_branch (exit, nexit->dest);
2074 PENDING_STMT (e) = NULL;
2075
2076 /* Emit GIMPLE_OMP_FOR. */
2077 gimple_cond_set_lhs (cond_stmt, cvar_base);
2078 type = TREE_TYPE (cvar);
2079 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2080 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2081
2082 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
2083 gimple_set_location (for_stmt, loc);
2084 gimple_omp_for_set_index (for_stmt, 0, initvar);
2085 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2086 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2087 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2088 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2089 cvar_base,
2090 build_int_cst (type, 1)));
2091
2092 gsi = gsi_last_bb (for_bb);
2093 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2094 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2095
2096 /* Emit GIMPLE_OMP_CONTINUE. */
2097 gsi = gsi_last_bb (loop->latch);
2098 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2099 gimple_set_location (omp_cont_stmt, loc);
2100 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2101 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2102
2103 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2104 gsi = gsi_last_bb (ex_bb);
2105 omp_return_stmt2 = gimple_build_omp_return (true);
2106 gimple_set_location (omp_return_stmt2, loc);
2107 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2108
2109 /* After the above dom info is hosed. Re-compute it. */
2110 free_dominance_info (CDI_DOMINATORS);
2111 calculate_dominance_info (CDI_DOMINATORS);
2112
2113 return paral_bb;
2114 }
2115
2116 /* Generates code to execute the iterations of LOOP in N_THREADS
2117 threads in parallel.
2118
2119 NITER describes number of iterations of LOOP.
2120 REDUCTION_LIST describes the reductions existent in the LOOP. */
2121
2122 static void
2123 gen_parallel_loop (struct loop *loop,
2124 reduction_info_table_type *reduction_list,
2125 unsigned n_threads, struct tree_niter_desc *niter)
2126 {
2127 tree many_iterations_cond, type, nit;
2128 tree arg_struct, new_arg_struct;
2129 gimple_seq stmts;
2130 edge entry, exit;
2131 struct clsn_data clsn_data;
2132 unsigned prob;
2133 location_t loc;
2134 gimple cond_stmt;
2135 unsigned int m_p_thread=2;
2136
2137 /* From
2138
2139 ---------------------------------------------------------------------
2140 loop
2141 {
2142 IV = phi (INIT, IV + STEP)
2143 BODY1;
2144 if (COND)
2145 break;
2146 BODY2;
2147 }
2148 ---------------------------------------------------------------------
2149
2150 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2151 we generate the following code:
2152
2153 ---------------------------------------------------------------------
2154
2155 if (MAY_BE_ZERO
2156 || NITER < MIN_PER_THREAD * N_THREADS)
2157 goto original;
2158
2159 BODY1;
2160 store all local loop-invariant variables used in body of the loop to DATA.
2161 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2162 load the variables from DATA.
2163 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2164 BODY2;
2165 BODY1;
2166 GIMPLE_OMP_CONTINUE;
2167 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2168 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2169 goto end;
2170
2171 original:
2172 loop
2173 {
2174 IV = phi (INIT, IV + STEP)
2175 BODY1;
2176 if (COND)
2177 break;
2178 BODY2;
2179 }
2180
2181 end:
2182
2183 */
2184
2185 /* Create two versions of the loop -- in the old one, we know that the
2186 number of iterations is large enough, and we will transform it into the
2187 loop that will be split to loop_fn, the new one will be used for the
2188 remaining iterations. */
2189
2190 /* We should compute a better number-of-iterations value for outer loops.
2191 That is, if we have
2192
2193 for (i = 0; i < n; ++i)
2194 for (j = 0; j < m; ++j)
2195 ...
2196
2197 we should compute nit = n * m, not nit = n.
2198 Also may_be_zero handling would need to be adjusted. */
2199
2200 type = TREE_TYPE (niter->niter);
2201 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2202 NULL_TREE);
2203 if (stmts)
2204 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2205
2206 if (loop->inner)
2207 m_p_thread=2;
2208 else
2209 m_p_thread=MIN_PER_THREAD;
2210
2211 many_iterations_cond =
2212 fold_build2 (GE_EXPR, boolean_type_node,
2213 nit, build_int_cst (type, m_p_thread * n_threads));
2214
2215 many_iterations_cond
2216 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2217 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2218 many_iterations_cond);
2219 many_iterations_cond
2220 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2221 if (stmts)
2222 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2223 if (!is_gimple_condexpr (many_iterations_cond))
2224 {
2225 many_iterations_cond
2226 = force_gimple_operand (many_iterations_cond, &stmts,
2227 true, NULL_TREE);
2228 if (stmts)
2229 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2230 }
2231
2232 initialize_original_copy_tables ();
2233
2234 /* We assume that the loop usually iterates a lot. */
2235 prob = 4 * REG_BR_PROB_BASE / 5;
2236 loop_version (loop, many_iterations_cond, NULL,
2237 prob, prob, REG_BR_PROB_BASE - prob, true);
2238 update_ssa (TODO_update_ssa);
2239 free_original_copy_tables ();
2240
2241 /* Base all the induction variables in LOOP on a single control one. */
2242 canonicalize_loop_ivs (loop, &nit, true);
2243
2244 /* Ensure that the exit condition is the first statement in the loop.
2245 The common case is that latch of the loop is empty (apart from the
2246 increment) and immediately follows the loop exit test. Attempt to move the
2247 entry of the loop directly before the exit check and increase the number of
2248 iterations of the loop by one. */
2249 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2250 {
2251 if (dump_file
2252 && (dump_flags & TDF_DETAILS))
2253 fprintf (dump_file,
2254 "alternative exit-first loop transform succeeded"
2255 " for loop %d\n", loop->num);
2256 }
2257 else
2258 {
2259 /* Fall back on the method that handles more cases, but duplicates the
2260 loop body: move the exit condition of LOOP to the beginning of its
2261 header, and duplicate the part of the last iteration that gets disabled
2262 to the exit of the loop. */
2263 transform_to_exit_first_loop (loop, reduction_list, nit);
2264 }
2265
2266 /* Generate initializations for reductions. */
2267 if (reduction_list->elements () > 0)
2268 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2269
2270 /* Eliminate the references to local variables from the loop. */
2271 gcc_assert (single_exit (loop));
2272 entry = loop_preheader_edge (loop);
2273 exit = single_dom_exit (loop);
2274
2275 eliminate_local_variables (entry, exit);
2276 /* In the old loop, move all variables non-local to the loop to a structure
2277 and back, and create separate decls for the variables used in loop. */
2278 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2279 &new_arg_struct, &clsn_data);
2280
2281 /* Create the parallel constructs. */
2282 loc = UNKNOWN_LOCATION;
2283 cond_stmt = last_stmt (loop->header);
2284 if (cond_stmt)
2285 loc = gimple_location (cond_stmt);
2286 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2287 new_arg_struct, n_threads, loc);
2288 if (reduction_list->elements () > 0)
2289 create_call_for_reduction (loop, reduction_list, &clsn_data);
2290
2291 scev_reset ();
2292
2293 /* Cancel the loop (it is simpler to do it here rather than to teach the
2294 expander to do it). */
2295 cancel_loop_tree (loop);
2296
2297 /* Free loop bound estimations that could contain references to
2298 removed statements. */
2299 FOR_EACH_LOOP (loop, 0)
2300 free_numbers_of_iterations_estimates_loop (loop);
2301 }
2302
2303 /* Returns true when LOOP contains vector phi nodes. */
2304
2305 static bool
2306 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2307 {
2308 unsigned i;
2309 basic_block *bbs = get_loop_body_in_dom_order (loop);
2310 gphi_iterator gsi;
2311 bool res = true;
2312
2313 for (i = 0; i < loop->num_nodes; i++)
2314 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2315 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2316 goto end;
2317
2318 res = false;
2319 end:
2320 free (bbs);
2321 return res;
2322 }
2323
2324 /* Create a reduction_info struct, initialize it with REDUC_STMT
2325 and PHI, insert it to the REDUCTION_LIST. */
2326
2327 static void
2328 build_new_reduction (reduction_info_table_type *reduction_list,
2329 gimple reduc_stmt, gphi *phi)
2330 {
2331 reduction_info **slot;
2332 struct reduction_info *new_reduction;
2333
2334 gcc_assert (reduc_stmt);
2335
2336 if (dump_file && (dump_flags & TDF_DETAILS))
2337 {
2338 fprintf (dump_file,
2339 "Detected reduction. reduction stmt is: \n");
2340 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2341 fprintf (dump_file, "\n");
2342 }
2343
2344 new_reduction = XCNEW (struct reduction_info);
2345
2346 new_reduction->reduc_stmt = reduc_stmt;
2347 new_reduction->reduc_phi = phi;
2348 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2349 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
2350 slot = reduction_list->find_slot (new_reduction, INSERT);
2351 *slot = new_reduction;
2352 }
2353
2354 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2355
2356 int
2357 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2358 {
2359 struct reduction_info *const red = *slot;
2360 gimple_set_uid (red->reduc_phi, red->reduc_version);
2361 return 1;
2362 }
2363
2364 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2365
2366 static void
2367 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2368 {
2369 gphi_iterator gsi;
2370 loop_vec_info simple_loop_info;
2371
2372 simple_loop_info = vect_analyze_loop_form (loop);
2373
2374 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2375 {
2376 gphi *phi = gsi.phi ();
2377 affine_iv iv;
2378 tree res = PHI_RESULT (phi);
2379 bool double_reduc;
2380
2381 if (virtual_operand_p (res))
2382 continue;
2383
2384 if (!simple_iv (loop, loop, res, &iv, true)
2385 && simple_loop_info)
2386 {
2387 gimple reduc_stmt
2388 = vect_force_simple_reduction (simple_loop_info, phi, true,
2389 &double_reduc, true);
2390 if (reduc_stmt && !double_reduc)
2391 build_new_reduction (reduction_list, reduc_stmt, phi);
2392 }
2393 }
2394 destroy_loop_vec_info (simple_loop_info, true);
2395
2396 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2397 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2398 only now. */
2399 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2400 }
2401
2402 /* Try to initialize NITER for code generation part. */
2403
2404 static bool
2405 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2406 {
2407 edge exit = single_dom_exit (loop);
2408
2409 gcc_assert (exit);
2410
2411 /* We need to know # of iterations, and there should be no uses of values
2412 defined inside loop outside of it, unless the values are invariants of
2413 the loop. */
2414 if (!number_of_iterations_exit (loop, exit, niter, false))
2415 {
2416 if (dump_file && (dump_flags & TDF_DETAILS))
2417 fprintf (dump_file, " FAILED: number of iterations not known\n");
2418 return false;
2419 }
2420
2421 return true;
2422 }
2423
2424 /* Try to initialize REDUCTION_LIST for code generation part.
2425 REDUCTION_LIST describes the reductions. */
2426
2427 static bool
2428 try_create_reduction_list (loop_p loop,
2429 reduction_info_table_type *reduction_list)
2430 {
2431 edge exit = single_dom_exit (loop);
2432 gphi_iterator gsi;
2433
2434 gcc_assert (exit);
2435
2436 gather_scalar_reductions (loop, reduction_list);
2437
2438
2439 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2440 {
2441 gphi *phi = gsi.phi ();
2442 struct reduction_info *red;
2443 imm_use_iterator imm_iter;
2444 use_operand_p use_p;
2445 gimple reduc_phi;
2446 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2447
2448 if (!virtual_operand_p (val))
2449 {
2450 if (dump_file && (dump_flags & TDF_DETAILS))
2451 {
2452 fprintf (dump_file, "phi is ");
2453 print_gimple_stmt (dump_file, phi, 0, 0);
2454 fprintf (dump_file, "arg of phi to exit: value ");
2455 print_generic_expr (dump_file, val, 0);
2456 fprintf (dump_file, " used outside loop\n");
2457 fprintf (dump_file,
2458 " checking if it a part of reduction pattern: \n");
2459 }
2460 if (reduction_list->elements () == 0)
2461 {
2462 if (dump_file && (dump_flags & TDF_DETAILS))
2463 fprintf (dump_file,
2464 " FAILED: it is not a part of reduction.\n");
2465 return false;
2466 }
2467 reduc_phi = NULL;
2468 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2469 {
2470 if (!gimple_debug_bind_p (USE_STMT (use_p))
2471 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2472 {
2473 reduc_phi = USE_STMT (use_p);
2474 break;
2475 }
2476 }
2477 red = reduction_phi (reduction_list, reduc_phi);
2478 if (red == NULL)
2479 {
2480 if (dump_file && (dump_flags & TDF_DETAILS))
2481 fprintf (dump_file,
2482 " FAILED: it is not a part of reduction.\n");
2483 return false;
2484 }
2485 if (dump_file && (dump_flags & TDF_DETAILS))
2486 {
2487 fprintf (dump_file, "reduction phi is ");
2488 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2489 fprintf (dump_file, "reduction stmt is ");
2490 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2491 }
2492 }
2493 }
2494
2495 /* The iterations of the loop may communicate only through bivs whose
2496 iteration space can be distributed efficiently. */
2497 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2498 {
2499 gphi *phi = gsi.phi ();
2500 tree def = PHI_RESULT (phi);
2501 affine_iv iv;
2502
2503 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2504 {
2505 struct reduction_info *red;
2506
2507 red = reduction_phi (reduction_list, phi);
2508 if (red == NULL)
2509 {
2510 if (dump_file && (dump_flags & TDF_DETAILS))
2511 fprintf (dump_file,
2512 " FAILED: scalar dependency between iterations\n");
2513 return false;
2514 }
2515 }
2516 }
2517
2518
2519 return true;
2520 }
2521
2522 /* Detect parallel loops and generate parallel code using libgomp
2523 primitives. Returns true if some loop was parallelized, false
2524 otherwise. */
2525
2526 static bool
2527 parallelize_loops (void)
2528 {
2529 unsigned n_threads = flag_tree_parallelize_loops;
2530 bool changed = false;
2531 struct loop *loop;
2532 struct tree_niter_desc niter_desc;
2533 struct obstack parloop_obstack;
2534 HOST_WIDE_INT estimated;
2535 source_location loop_loc;
2536
2537 /* Do not parallelize loops in the functions created by parallelization. */
2538 if (parallelized_function_p (cfun->decl))
2539 return false;
2540 if (cfun->has_nonlocal_label)
2541 return false;
2542
2543 gcc_obstack_init (&parloop_obstack);
2544 reduction_info_table_type reduction_list (10);
2545 init_stmt_vec_info_vec ();
2546
2547 FOR_EACH_LOOP (loop, 0)
2548 {
2549 reduction_list.empty ();
2550 if (dump_file && (dump_flags & TDF_DETAILS))
2551 {
2552 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2553 if (loop->inner)
2554 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2555 else
2556 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2557 }
2558
2559 /* If we use autopar in graphite pass, we use its marked dependency
2560 checking results. */
2561 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2562 {
2563 if (dump_file && (dump_flags & TDF_DETAILS))
2564 fprintf (dump_file, "loop is not parallel according to graphite\n");
2565 continue;
2566 }
2567
2568 if (!single_dom_exit (loop))
2569 {
2570
2571 if (dump_file && (dump_flags & TDF_DETAILS))
2572 fprintf (dump_file, "loop is !single_dom_exit\n");
2573
2574 continue;
2575 }
2576
2577 if (/* And of course, the loop must be parallelizable. */
2578 !can_duplicate_loop_p (loop)
2579 || loop_has_blocks_with_irreducible_flag (loop)
2580 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2581 /* FIXME: the check for vector phi nodes could be removed. */
2582 || loop_has_vector_phi_nodes (loop))
2583 continue;
2584
2585 estimated = estimated_stmt_executions_int (loop);
2586 if (estimated == -1)
2587 estimated = max_stmt_executions_int (loop);
2588 /* FIXME: Bypass this check as graphite doesn't update the
2589 count and frequency correctly now. */
2590 if (!flag_loop_parallelize_all
2591 && ((estimated != -1
2592 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2593 /* Do not bother with loops in cold areas. */
2594 || optimize_loop_nest_for_size_p (loop)))
2595 continue;
2596
2597 if (!try_get_loop_niter (loop, &niter_desc))
2598 continue;
2599
2600 if (!try_create_reduction_list (loop, &reduction_list))
2601 continue;
2602
2603 if (!flag_loop_parallelize_all
2604 && !loop_parallel_p (loop, &parloop_obstack))
2605 continue;
2606
2607 changed = true;
2608 if (dump_file && (dump_flags & TDF_DETAILS))
2609 {
2610 if (loop->inner)
2611 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2612 else
2613 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2614 loop_loc = find_loop_location (loop);
2615 if (loop_loc != UNKNOWN_LOCATION)
2616 fprintf (dump_file, "\nloop at %s:%d: ",
2617 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
2618 }
2619 gen_parallel_loop (loop, &reduction_list,
2620 n_threads, &niter_desc);
2621 }
2622
2623 free_stmt_vec_info_vec ();
2624 obstack_free (&parloop_obstack, NULL);
2625
2626 /* Parallelization will cause new function calls to be inserted through
2627 which local variables will escape. Reset the points-to solution
2628 for ESCAPED. */
2629 if (changed)
2630 pt_solution_reset (&cfun->gimple_df->escaped);
2631
2632 return changed;
2633 }
2634
2635 /* Parallelization. */
2636
2637 namespace {
2638
2639 const pass_data pass_data_parallelize_loops =
2640 {
2641 GIMPLE_PASS, /* type */
2642 "parloops", /* name */
2643 OPTGROUP_LOOP, /* optinfo_flags */
2644 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2645 ( PROP_cfg | PROP_ssa ), /* properties_required */
2646 0, /* properties_provided */
2647 0, /* properties_destroyed */
2648 0, /* todo_flags_start */
2649 0, /* todo_flags_finish */
2650 };
2651
2652 class pass_parallelize_loops : public gimple_opt_pass
2653 {
2654 public:
2655 pass_parallelize_loops (gcc::context *ctxt)
2656 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2657 {}
2658
2659 /* opt_pass methods: */
2660 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
2661 virtual unsigned int execute (function *);
2662
2663 }; // class pass_parallelize_loops
2664
2665 unsigned
2666 pass_parallelize_loops::execute (function *fun)
2667 {
2668 if (number_of_loops (fun) <= 1)
2669 return 0;
2670
2671 if (parallelize_loops ())
2672 {
2673 fun->curr_properties &= ~(PROP_gimple_eomp);
2674 return TODO_update_ssa;
2675 }
2676
2677 return 0;
2678 }
2679
2680 } // anon namespace
2681
2682 gimple_opt_pass *
2683 make_pass_parallelize_loops (gcc::context *ctxt)
2684 {
2685 return new pass_parallelize_loops (ctxt);
2686 }