]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-parloops.c
2013-11-19 Richard Biener <rguenther@suse.de>
[thirdparty/gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "gimplify.h"
28 #include "gimple-iterator.h"
29 #include "gimplify-me.h"
30 #include "gimple-walk.h"
31 #include "stor-layout.h"
32 #include "tree-nested.h"
33 #include "gimple-ssa.h"
34 #include "tree-cfg.h"
35 #include "tree-phinodes.h"
36 #include "ssa-iterators.h"
37 #include "stringpool.h"
38 #include "tree-ssanames.h"
39 #include "tree-ssa-loop-ivopts.h"
40 #include "tree-ssa-loop-manip.h"
41 #include "tree-ssa-loop-niter.h"
42 #include "tree-ssa-loop.h"
43 #include "tree-into-ssa.h"
44 #include "cfgloop.h"
45 #include "tree-data-ref.h"
46 #include "tree-scalar-evolution.h"
47 #include "gimple-pretty-print.h"
48 #include "tree-pass.h"
49 #include "langhooks.h"
50 #include "tree-vectorizer.h"
51 #include "tree-hasher.h"
52 #include "tree-parloops.h"
53 #include "omp-low.h"
54 #include "tree-nested.h"
55
56 /* This pass tries to distribute iterations of loops into several threads.
57 The implementation is straightforward -- for each loop we test whether its
58 iterations are independent, and if it is the case (and some additional
59 conditions regarding profitability and correctness are satisfied), we
60 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
61 machinery do its job.
62
63 The most of the complexity is in bringing the code into shape expected
64 by the omp expanders:
65 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
66 variable and that the exit test is at the start of the loop body
67 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
68 variables by accesses through pointers, and breaking up ssa chains
69 by storing the values incoming to the parallelized loop to a structure
70 passed to the new function as an argument (something similar is done
71 in omp gimplification, unfortunately only a small part of the code
72 can be shared).
73
74 TODO:
75 -- if there are several parallelizable loops in a function, it may be
76 possible to generate the threads just once (using synchronization to
77 ensure that cross-loop dependences are obeyed).
78 -- handling of common reduction patterns for outer loops.
79
80 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
81 /*
82 Reduction handling:
83 currently we use vect_force_simple_reduction() to detect reduction patterns.
84 The code transformation will be introduced by an example.
85
86
87 parloop
88 {
89 int sum=1;
90
91 for (i = 0; i < N; i++)
92 {
93 x[i] = i + 3;
94 sum+=x[i];
95 }
96 }
97
98 gimple-like code:
99 header_bb:
100
101 # sum_29 = PHI <sum_11(5), 1(3)>
102 # i_28 = PHI <i_12(5), 0(3)>
103 D.1795_8 = i_28 + 3;
104 x[i_28] = D.1795_8;
105 sum_11 = D.1795_8 + sum_29;
106 i_12 = i_28 + 1;
107 if (N_6(D) > i_12)
108 goto header_bb;
109
110
111 exit_bb:
112
113 # sum_21 = PHI <sum_11(4)>
114 printf (&"%d"[0], sum_21);
115
116
117 after reduction transformation (only relevant parts):
118
119 parloop
120 {
121
122 ....
123
124
125 # Storing the initial value given by the user. #
126
127 .paral_data_store.32.sum.27 = 1;
128
129 #pragma omp parallel num_threads(4)
130
131 #pragma omp for schedule(static)
132
133 # The neutral element corresponding to the particular
134 reduction's operation, e.g. 0 for PLUS_EXPR,
135 1 for MULT_EXPR, etc. replaces the user's initial value. #
136
137 # sum.27_29 = PHI <sum.27_11, 0>
138
139 sum.27_11 = D.1827_8 + sum.27_29;
140
141 GIMPLE_OMP_CONTINUE
142
143 # Adding this reduction phi is done at create_phi_for_local_result() #
144 # sum.27_56 = PHI <sum.27_11, 0>
145 GIMPLE_OMP_RETURN
146
147 # Creating the atomic operation is done at
148 create_call_for_reduction_1() #
149
150 #pragma omp atomic_load
151 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
152 D.1840_60 = sum.27_56 + D.1839_59;
153 #pragma omp atomic_store (D.1840_60);
154
155 GIMPLE_OMP_RETURN
156
157 # collecting the result after the join of the threads is done at
158 create_loads_for_reductions().
159 The value computed by the threads is loaded from the
160 shared struct. #
161
162
163 .paral_data_load.33_52 = &.paral_data_store.32;
164 sum_37 = .paral_data_load.33_52->sum.27;
165 sum_43 = D.1795_41 + sum_37;
166
167 exit bb:
168 # sum_21 = PHI <sum_43, sum_26>
169 printf (&"%d"[0], sum_21);
170
171 ...
172
173 }
174
175 */
176
177 /* Minimal number of iterations of a loop that should be executed in each
178 thread. */
179 #define MIN_PER_THREAD 100
180
181 /* Element of the hashtable, representing a
182 reduction in the current loop. */
183 struct reduction_info
184 {
185 gimple reduc_stmt; /* reduction statement. */
186 gimple reduc_phi; /* The phi node defining the reduction. */
187 enum tree_code reduction_code;/* code for the reduction operation. */
188 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
189 result. */
190 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
191 of the reduction variable when existing the loop. */
192 tree initial_value; /* The initial value of the reduction var before entering the loop. */
193 tree field; /* the name of the field in the parloop data structure intended for reduction. */
194 tree init; /* reduction initialization value. */
195 gimple new_phi; /* (helper field) Newly created phi node whose result
196 will be passed to the atomic operation. Represents
197 the local result each thread computed for the reduction
198 operation. */
199 };
200
201 /* Reduction info hashtable helpers. */
202
203 struct reduction_hasher : typed_free_remove <reduction_info>
204 {
205 typedef reduction_info value_type;
206 typedef reduction_info compare_type;
207 static inline hashval_t hash (const value_type *);
208 static inline bool equal (const value_type *, const compare_type *);
209 };
210
211 /* Equality and hash functions for hashtab code. */
212
213 inline bool
214 reduction_hasher::equal (const value_type *a, const compare_type *b)
215 {
216 return (a->reduc_phi == b->reduc_phi);
217 }
218
219 inline hashval_t
220 reduction_hasher::hash (const value_type *a)
221 {
222 return a->reduc_version;
223 }
224
225 typedef hash_table <reduction_hasher> reduction_info_table_type;
226
227
228 static struct reduction_info *
229 reduction_phi (reduction_info_table_type reduction_list, gimple phi)
230 {
231 struct reduction_info tmpred, *red;
232
233 if (reduction_list.elements () == 0 || phi == NULL)
234 return NULL;
235
236 tmpred.reduc_phi = phi;
237 tmpred.reduc_version = gimple_uid (phi);
238 red = reduction_list.find (&tmpred);
239
240 return red;
241 }
242
243 /* Element of hashtable of names to copy. */
244
245 struct name_to_copy_elt
246 {
247 unsigned version; /* The version of the name to copy. */
248 tree new_name; /* The new name used in the copy. */
249 tree field; /* The field of the structure used to pass the
250 value. */
251 };
252
253 /* Name copies hashtable helpers. */
254
255 struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
256 {
257 typedef name_to_copy_elt value_type;
258 typedef name_to_copy_elt compare_type;
259 static inline hashval_t hash (const value_type *);
260 static inline bool equal (const value_type *, const compare_type *);
261 };
262
263 /* Equality and hash functions for hashtab code. */
264
265 inline bool
266 name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
267 {
268 return a->version == b->version;
269 }
270
271 inline hashval_t
272 name_to_copy_hasher::hash (const value_type *a)
273 {
274 return (hashval_t) a->version;
275 }
276
277 typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
278
279 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
280 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
281 represents the denominator for every element in the matrix. */
282 typedef struct lambda_trans_matrix_s
283 {
284 lambda_matrix matrix;
285 int rowsize;
286 int colsize;
287 int denominator;
288 } *lambda_trans_matrix;
289 #define LTM_MATRIX(T) ((T)->matrix)
290 #define LTM_ROWSIZE(T) ((T)->rowsize)
291 #define LTM_COLSIZE(T) ((T)->colsize)
292 #define LTM_DENOMINATOR(T) ((T)->denominator)
293
294 /* Allocate a new transformation matrix. */
295
296 static lambda_trans_matrix
297 lambda_trans_matrix_new (int colsize, int rowsize,
298 struct obstack * lambda_obstack)
299 {
300 lambda_trans_matrix ret;
301
302 ret = (lambda_trans_matrix)
303 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
304 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
305 LTM_ROWSIZE (ret) = rowsize;
306 LTM_COLSIZE (ret) = colsize;
307 LTM_DENOMINATOR (ret) = 1;
308 return ret;
309 }
310
311 /* Multiply a vector VEC by a matrix MAT.
312 MAT is an M*N matrix, and VEC is a vector with length N. The result
313 is stored in DEST which must be a vector of length M. */
314
315 static void
316 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
317 lambda_vector vec, lambda_vector dest)
318 {
319 int i, j;
320
321 lambda_vector_clear (dest, m);
322 for (i = 0; i < m; i++)
323 for (j = 0; j < n; j++)
324 dest[i] += matrix[i][j] * vec[j];
325 }
326
327 /* Return true if TRANS is a legal transformation matrix that respects
328 the dependence vectors in DISTS and DIRS. The conservative answer
329 is false.
330
331 "Wolfe proves that a unimodular transformation represented by the
332 matrix T is legal when applied to a loop nest with a set of
333 lexicographically non-negative distance vectors RDG if and only if
334 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
335 i.e.: if and only if it transforms the lexicographically positive
336 distance vectors to lexicographically positive vectors. Note that
337 a unimodular matrix must transform the zero vector (and only it) to
338 the zero vector." S.Muchnick. */
339
340 static bool
341 lambda_transform_legal_p (lambda_trans_matrix trans,
342 int nb_loops,
343 vec<ddr_p> dependence_relations)
344 {
345 unsigned int i, j;
346 lambda_vector distres;
347 struct data_dependence_relation *ddr;
348
349 gcc_assert (LTM_COLSIZE (trans) == nb_loops
350 && LTM_ROWSIZE (trans) == nb_loops);
351
352 /* When there are no dependences, the transformation is correct. */
353 if (dependence_relations.length () == 0)
354 return true;
355
356 ddr = dependence_relations[0];
357 if (ddr == NULL)
358 return true;
359
360 /* When there is an unknown relation in the dependence_relations, we
361 know that it is no worth looking at this loop nest: give up. */
362 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
363 return false;
364
365 distres = lambda_vector_new (nb_loops);
366
367 /* For each distance vector in the dependence graph. */
368 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
369 {
370 /* Don't care about relations for which we know that there is no
371 dependence, nor about read-read (aka. output-dependences):
372 these data accesses can happen in any order. */
373 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
374 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
375 continue;
376
377 /* Conservatively answer: "this transformation is not valid". */
378 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
379 return false;
380
381 /* If the dependence could not be captured by a distance vector,
382 conservatively answer that the transform is not valid. */
383 if (DDR_NUM_DIST_VECTS (ddr) == 0)
384 return false;
385
386 /* Compute trans.dist_vect */
387 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
388 {
389 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
390 DDR_DIST_VECT (ddr, j), distres);
391
392 if (!lambda_vector_lexico_pos (distres, nb_loops))
393 return false;
394 }
395 }
396 return true;
397 }
398
399 /* Data dependency analysis. Returns true if the iterations of LOOP
400 are independent on each other (that is, if we can execute them
401 in parallel). */
402
403 static bool
404 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
405 {
406 vec<ddr_p> dependence_relations;
407 vec<data_reference_p> datarefs;
408 lambda_trans_matrix trans;
409 bool ret = false;
410
411 if (dump_file && (dump_flags & TDF_DETAILS))
412 {
413 fprintf (dump_file, "Considering loop %d\n", loop->num);
414 if (!loop->inner)
415 fprintf (dump_file, "loop is innermost\n");
416 else
417 fprintf (dump_file, "loop NOT innermost\n");
418 }
419
420 /* Check for problems with dependences. If the loop can be reversed,
421 the iterations are independent. */
422 stack_vec<loop_p, 3> loop_nest;
423 datarefs.create (10);
424 dependence_relations.create (100);
425 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
426 &dependence_relations))
427 {
428 if (dump_file && (dump_flags & TDF_DETAILS))
429 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
430 ret = false;
431 goto end;
432 }
433 if (dump_file && (dump_flags & TDF_DETAILS))
434 dump_data_dependence_relations (dump_file, dependence_relations);
435
436 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
437 LTM_MATRIX (trans)[0][0] = -1;
438
439 if (lambda_transform_legal_p (trans, 1, dependence_relations))
440 {
441 ret = true;
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (dump_file, " SUCCESS: may be parallelized\n");
444 }
445 else if (dump_file && (dump_flags & TDF_DETAILS))
446 fprintf (dump_file,
447 " FAILED: data dependencies exist across iterations\n");
448
449 end:
450 free_dependence_relations (dependence_relations);
451 free_data_refs (datarefs);
452
453 return ret;
454 }
455
456 /* Return true when LOOP contains basic blocks marked with the
457 BB_IRREDUCIBLE_LOOP flag. */
458
459 static inline bool
460 loop_has_blocks_with_irreducible_flag (struct loop *loop)
461 {
462 unsigned i;
463 basic_block *bbs = get_loop_body_in_dom_order (loop);
464 bool res = true;
465
466 for (i = 0; i < loop->num_nodes; i++)
467 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
468 goto end;
469
470 res = false;
471 end:
472 free (bbs);
473 return res;
474 }
475
476 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
477 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
478 to their addresses that can be reused. The address of OBJ is known to
479 be invariant in the whole function. Other needed statements are placed
480 right before GSI. */
481
482 static tree
483 take_address_of (tree obj, tree type, edge entry,
484 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
485 {
486 int uid;
487 int_tree_map **dslot;
488 struct int_tree_map ielt, *nielt;
489 tree *var_p, name, addr;
490 gimple stmt;
491 gimple_seq stmts;
492
493 /* Since the address of OBJ is invariant, the trees may be shared.
494 Avoid rewriting unrelated parts of the code. */
495 obj = unshare_expr (obj);
496 for (var_p = &obj;
497 handled_component_p (*var_p);
498 var_p = &TREE_OPERAND (*var_p, 0))
499 continue;
500
501 /* Canonicalize the access to base on a MEM_REF. */
502 if (DECL_P (*var_p))
503 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
504
505 /* Assign a canonical SSA name to the address of the base decl used
506 in the address and share it for all accesses and addresses based
507 on it. */
508 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
509 ielt.uid = uid;
510 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
511 if (!*dslot)
512 {
513 if (gsi == NULL)
514 return NULL;
515 addr = TREE_OPERAND (*var_p, 0);
516 const char *obj_name
517 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
518 if (obj_name)
519 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
520 else
521 name = make_ssa_name (TREE_TYPE (addr), NULL);
522 stmt = gimple_build_assign (name, addr);
523 gsi_insert_on_edge_immediate (entry, stmt);
524
525 nielt = XNEW (struct int_tree_map);
526 nielt->uid = uid;
527 nielt->to = name;
528 *dslot = nielt;
529 }
530 else
531 name = (*dslot)->to;
532
533 /* Express the address in terms of the canonical SSA name. */
534 TREE_OPERAND (*var_p, 0) = name;
535 if (gsi == NULL)
536 return build_fold_addr_expr_with_type (obj, type);
537
538 name = force_gimple_operand (build_addr (obj, current_function_decl),
539 &stmts, true, NULL_TREE);
540 if (!gimple_seq_empty_p (stmts))
541 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
542
543 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
544 {
545 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
546 NULL_TREE);
547 if (!gimple_seq_empty_p (stmts))
548 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
549 }
550
551 return name;
552 }
553
554 /* Callback for htab_traverse. Create the initialization statement
555 for reduction described in SLOT, and place it at the preheader of
556 the loop described in DATA. */
557
558 int
559 initialize_reductions (reduction_info **slot, struct loop *loop)
560 {
561 tree init, c;
562 tree bvar, type, arg;
563 edge e;
564
565 struct reduction_info *const reduc = *slot;
566
567 /* Create initialization in preheader:
568 reduction_variable = initialization value of reduction. */
569
570 /* In the phi node at the header, replace the argument coming
571 from the preheader with the reduction initialization value. */
572
573 /* Create a new variable to initialize the reduction. */
574 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
575 bvar = create_tmp_var (type, "reduction");
576
577 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
578 OMP_CLAUSE_REDUCTION);
579 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
580 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
581
582 init = omp_reduction_init (c, TREE_TYPE (bvar));
583 reduc->init = init;
584
585 /* Replace the argument representing the initialization value
586 with the initialization value for the reduction (neutral
587 element for the particular operation, e.g. 0 for PLUS_EXPR,
588 1 for MULT_EXPR, etc).
589 Keep the old value in a new variable "reduction_initial",
590 that will be taken in consideration after the parallel
591 computing is done. */
592
593 e = loop_preheader_edge (loop);
594 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
595 /* Create new variable to hold the initial value. */
596
597 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
598 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
599 reduc->initial_value = arg;
600 return 1;
601 }
602
603 struct elv_data
604 {
605 struct walk_stmt_info info;
606 edge entry;
607 int_tree_htab_type decl_address;
608 gimple_stmt_iterator *gsi;
609 bool changed;
610 bool reset;
611 };
612
613 /* Eliminates references to local variables in *TP out of the single
614 entry single exit region starting at DTA->ENTRY.
615 DECL_ADDRESS contains addresses of the references that had their
616 address taken already. If the expression is changed, CHANGED is
617 set to true. Callback for walk_tree. */
618
619 static tree
620 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
621 {
622 struct elv_data *const dta = (struct elv_data *) data;
623 tree t = *tp, var, addr, addr_type, type, obj;
624
625 if (DECL_P (t))
626 {
627 *walk_subtrees = 0;
628
629 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
630 return NULL_TREE;
631
632 type = TREE_TYPE (t);
633 addr_type = build_pointer_type (type);
634 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
635 dta->gsi);
636 if (dta->gsi == NULL && addr == NULL_TREE)
637 {
638 dta->reset = true;
639 return NULL_TREE;
640 }
641
642 *tp = build_simple_mem_ref (addr);
643
644 dta->changed = true;
645 return NULL_TREE;
646 }
647
648 if (TREE_CODE (t) == ADDR_EXPR)
649 {
650 /* ADDR_EXPR may appear in two contexts:
651 -- as a gimple operand, when the address taken is a function invariant
652 -- as gimple rhs, when the resulting address in not a function
653 invariant
654 We do not need to do anything special in the latter case (the base of
655 the memory reference whose address is taken may be replaced in the
656 DECL_P case). The former case is more complicated, as we need to
657 ensure that the new address is still a gimple operand. Thus, it
658 is not sufficient to replace just the base of the memory reference --
659 we need to move the whole computation of the address out of the
660 loop. */
661 if (!is_gimple_val (t))
662 return NULL_TREE;
663
664 *walk_subtrees = 0;
665 obj = TREE_OPERAND (t, 0);
666 var = get_base_address (obj);
667 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
668 return NULL_TREE;
669
670 addr_type = TREE_TYPE (t);
671 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
672 dta->gsi);
673 if (dta->gsi == NULL && addr == NULL_TREE)
674 {
675 dta->reset = true;
676 return NULL_TREE;
677 }
678 *tp = addr;
679
680 dta->changed = true;
681 return NULL_TREE;
682 }
683
684 if (!EXPR_P (t))
685 *walk_subtrees = 0;
686
687 return NULL_TREE;
688 }
689
690 /* Moves the references to local variables in STMT at *GSI out of the single
691 entry single exit region starting at ENTRY. DECL_ADDRESS contains
692 addresses of the references that had their address taken
693 already. */
694
695 static void
696 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
697 int_tree_htab_type decl_address)
698 {
699 struct elv_data dta;
700 gimple stmt = gsi_stmt (*gsi);
701
702 memset (&dta.info, '\0', sizeof (dta.info));
703 dta.entry = entry;
704 dta.decl_address = decl_address;
705 dta.changed = false;
706 dta.reset = false;
707
708 if (gimple_debug_bind_p (stmt))
709 {
710 dta.gsi = NULL;
711 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
712 eliminate_local_variables_1, &dta.info, NULL);
713 if (dta.reset)
714 {
715 gimple_debug_bind_reset_value (stmt);
716 dta.changed = true;
717 }
718 }
719 else if (gimple_clobber_p (stmt))
720 {
721 stmt = gimple_build_nop ();
722 gsi_replace (gsi, stmt, false);
723 dta.changed = true;
724 }
725 else
726 {
727 dta.gsi = gsi;
728 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
729 }
730
731 if (dta.changed)
732 update_stmt (stmt);
733 }
734
735 /* Eliminates the references to local variables from the single entry
736 single exit region between the ENTRY and EXIT edges.
737
738 This includes:
739 1) Taking address of a local variable -- these are moved out of the
740 region (and temporary variable is created to hold the address if
741 necessary).
742
743 2) Dereferencing a local variable -- these are replaced with indirect
744 references. */
745
746 static void
747 eliminate_local_variables (edge entry, edge exit)
748 {
749 basic_block bb;
750 stack_vec<basic_block, 3> body;
751 unsigned i;
752 gimple_stmt_iterator gsi;
753 bool has_debug_stmt = false;
754 int_tree_htab_type decl_address;
755 decl_address.create (10);
756 basic_block entry_bb = entry->src;
757 basic_block exit_bb = exit->dest;
758
759 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
760
761 FOR_EACH_VEC_ELT (body, i, bb)
762 if (bb != entry_bb && bb != exit_bb)
763 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
764 if (is_gimple_debug (gsi_stmt (gsi)))
765 {
766 if (gimple_debug_bind_p (gsi_stmt (gsi)))
767 has_debug_stmt = true;
768 }
769 else
770 eliminate_local_variables_stmt (entry, &gsi, decl_address);
771
772 if (has_debug_stmt)
773 FOR_EACH_VEC_ELT (body, i, bb)
774 if (bb != entry_bb && bb != exit_bb)
775 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
776 if (gimple_debug_bind_p (gsi_stmt (gsi)))
777 eliminate_local_variables_stmt (entry, &gsi, decl_address);
778
779 decl_address.dispose ();
780 }
781
782 /* Returns true if expression EXPR is not defined between ENTRY and
783 EXIT, i.e. if all its operands are defined outside of the region. */
784
785 static bool
786 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
787 {
788 basic_block entry_bb = entry->src;
789 basic_block exit_bb = exit->dest;
790 basic_block def_bb;
791
792 if (is_gimple_min_invariant (expr))
793 return true;
794
795 if (TREE_CODE (expr) == SSA_NAME)
796 {
797 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
798 if (def_bb
799 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
800 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
801 return false;
802
803 return true;
804 }
805
806 return false;
807 }
808
809 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
810 The copies are stored to NAME_COPIES, if NAME was already duplicated,
811 its duplicate stored in NAME_COPIES is returned.
812
813 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
814 duplicated, storing the copies in DECL_COPIES. */
815
816 static tree
817 separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
818 int_tree_htab_type decl_copies, bool copy_name_p)
819 {
820 tree copy, var, var_copy;
821 unsigned idx, uid, nuid;
822 struct int_tree_map ielt, *nielt;
823 struct name_to_copy_elt elt, *nelt;
824 name_to_copy_elt **slot;
825 int_tree_map **dslot;
826
827 if (TREE_CODE (name) != SSA_NAME)
828 return name;
829
830 idx = SSA_NAME_VERSION (name);
831 elt.version = idx;
832 slot = name_copies.find_slot_with_hash (&elt, idx,
833 copy_name_p ? INSERT : NO_INSERT);
834 if (slot && *slot)
835 return (*slot)->new_name;
836
837 if (copy_name_p)
838 {
839 copy = duplicate_ssa_name (name, NULL);
840 nelt = XNEW (struct name_to_copy_elt);
841 nelt->version = idx;
842 nelt->new_name = copy;
843 nelt->field = NULL_TREE;
844 *slot = nelt;
845 }
846 else
847 {
848 gcc_assert (!slot);
849 copy = name;
850 }
851
852 var = SSA_NAME_VAR (name);
853 if (!var)
854 return copy;
855
856 uid = DECL_UID (var);
857 ielt.uid = uid;
858 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
859 if (!*dslot)
860 {
861 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
862 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
863 nielt = XNEW (struct int_tree_map);
864 nielt->uid = uid;
865 nielt->to = var_copy;
866 *dslot = nielt;
867
868 /* Ensure that when we meet this decl next time, we won't duplicate
869 it again. */
870 nuid = DECL_UID (var_copy);
871 ielt.uid = nuid;
872 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
873 gcc_assert (!*dslot);
874 nielt = XNEW (struct int_tree_map);
875 nielt->uid = nuid;
876 nielt->to = var_copy;
877 *dslot = nielt;
878 }
879 else
880 var_copy = ((struct int_tree_map *) *dslot)->to;
881
882 replace_ssa_name_symbol (copy, var_copy);
883 return copy;
884 }
885
886 /* Finds the ssa names used in STMT that are defined outside the
887 region between ENTRY and EXIT and replaces such ssa names with
888 their duplicates. The duplicates are stored to NAME_COPIES. Base
889 decls of all ssa names used in STMT (including those defined in
890 LOOP) are replaced with the new temporary variables; the
891 replacement decls are stored in DECL_COPIES. */
892
893 static void
894 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
895 name_to_copy_table_type name_copies,
896 int_tree_htab_type decl_copies)
897 {
898 use_operand_p use;
899 def_operand_p def;
900 ssa_op_iter oi;
901 tree name, copy;
902 bool copy_name_p;
903
904 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
905 {
906 name = DEF_FROM_PTR (def);
907 gcc_assert (TREE_CODE (name) == SSA_NAME);
908 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
909 false);
910 gcc_assert (copy == name);
911 }
912
913 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
914 {
915 name = USE_FROM_PTR (use);
916 if (TREE_CODE (name) != SSA_NAME)
917 continue;
918
919 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
920 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
921 copy_name_p);
922 SET_USE (use, copy);
923 }
924 }
925
926 /* Finds the ssa names used in STMT that are defined outside the
927 region between ENTRY and EXIT and replaces such ssa names with
928 their duplicates. The duplicates are stored to NAME_COPIES. Base
929 decls of all ssa names used in STMT (including those defined in
930 LOOP) are replaced with the new temporary variables; the
931 replacement decls are stored in DECL_COPIES. */
932
933 static bool
934 separate_decls_in_region_debug (gimple stmt,
935 name_to_copy_table_type name_copies,
936 int_tree_htab_type decl_copies)
937 {
938 use_operand_p use;
939 ssa_op_iter oi;
940 tree var, name;
941 struct int_tree_map ielt;
942 struct name_to_copy_elt elt;
943 name_to_copy_elt **slot;
944 int_tree_map **dslot;
945
946 if (gimple_debug_bind_p (stmt))
947 var = gimple_debug_bind_get_var (stmt);
948 else if (gimple_debug_source_bind_p (stmt))
949 var = gimple_debug_source_bind_get_var (stmt);
950 else
951 return true;
952 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
953 return true;
954 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
955 ielt.uid = DECL_UID (var);
956 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
957 if (!dslot)
958 return true;
959 if (gimple_debug_bind_p (stmt))
960 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
961 else if (gimple_debug_source_bind_p (stmt))
962 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
963
964 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
965 {
966 name = USE_FROM_PTR (use);
967 if (TREE_CODE (name) != SSA_NAME)
968 continue;
969
970 elt.version = SSA_NAME_VERSION (name);
971 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
972 if (!slot)
973 {
974 gimple_debug_bind_reset_value (stmt);
975 update_stmt (stmt);
976 break;
977 }
978
979 SET_USE (use, (*slot)->new_name);
980 }
981
982 return false;
983 }
984
985 /* Callback for htab_traverse. Adds a field corresponding to the reduction
986 specified in SLOT. The type is passed in DATA. */
987
988 int
989 add_field_for_reduction (reduction_info **slot, tree type)
990 {
991
992 struct reduction_info *const red = *slot;
993 tree var = gimple_assign_lhs (red->reduc_stmt);
994 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
995 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
996
997 insert_field_into_struct (type, field);
998
999 red->field = field;
1000
1001 return 1;
1002 }
1003
1004 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1005 described in SLOT. The type is passed in DATA. */
1006
1007 int
1008 add_field_for_name (name_to_copy_elt **slot, tree type)
1009 {
1010 struct name_to_copy_elt *const elt = *slot;
1011 tree name = ssa_name (elt->version);
1012 tree field = build_decl (UNKNOWN_LOCATION,
1013 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1014 TREE_TYPE (name));
1015
1016 insert_field_into_struct (type, field);
1017 elt->field = field;
1018
1019 return 1;
1020 }
1021
1022 /* Callback for htab_traverse. A local result is the intermediate result
1023 computed by a single
1024 thread, or the initial value in case no iteration was executed.
1025 This function creates a phi node reflecting these values.
1026 The phi's result will be stored in NEW_PHI field of the
1027 reduction's data structure. */
1028
1029 int
1030 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1031 {
1032 struct reduction_info *const reduc = *slot;
1033 edge e;
1034 gimple new_phi;
1035 basic_block store_bb;
1036 tree local_res;
1037 source_location locus;
1038
1039 /* STORE_BB is the block where the phi
1040 should be stored. It is the destination of the loop exit.
1041 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1042 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1043
1044 /* STORE_BB has two predecessors. One coming from the loop
1045 (the reduction's result is computed at the loop),
1046 and another coming from a block preceding the loop,
1047 when no iterations
1048 are executed (the initial value should be taken). */
1049 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1050 e = EDGE_PRED (store_bb, 1);
1051 else
1052 e = EDGE_PRED (store_bb, 0);
1053 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
1054 locus = gimple_location (reduc->reduc_stmt);
1055 new_phi = create_phi_node (local_res, store_bb);
1056 add_phi_arg (new_phi, reduc->init, e, locus);
1057 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
1058 FALLTHRU_EDGE (loop->latch), locus);
1059 reduc->new_phi = new_phi;
1060
1061 return 1;
1062 }
1063
1064 struct clsn_data
1065 {
1066 tree store;
1067 tree load;
1068
1069 basic_block store_bb;
1070 basic_block load_bb;
1071 };
1072
1073 /* Callback for htab_traverse. Create an atomic instruction for the
1074 reduction described in SLOT.
1075 DATA annotates the place in memory the atomic operation relates to,
1076 and the basic block it needs to be generated in. */
1077
1078 int
1079 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1080 {
1081 struct reduction_info *const reduc = *slot;
1082 gimple_stmt_iterator gsi;
1083 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1084 tree load_struct;
1085 basic_block bb;
1086 basic_block new_bb;
1087 edge e;
1088 tree t, addr, ref, x;
1089 tree tmp_load, name;
1090 gimple load;
1091
1092 load_struct = build_simple_mem_ref (clsn_data->load);
1093 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1094
1095 addr = build_addr (t, current_function_decl);
1096
1097 /* Create phi node. */
1098 bb = clsn_data->load_bb;
1099
1100 e = split_block (bb, t);
1101 new_bb = e->dest;
1102
1103 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1104 tmp_load = make_ssa_name (tmp_load, NULL);
1105 load = gimple_build_omp_atomic_load (tmp_load, addr);
1106 SSA_NAME_DEF_STMT (tmp_load) = load;
1107 gsi = gsi_start_bb (new_bb);
1108 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1109
1110 e = split_block (new_bb, load);
1111 new_bb = e->dest;
1112 gsi = gsi_start_bb (new_bb);
1113 ref = tmp_load;
1114 x = fold_build2 (reduc->reduction_code,
1115 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1116 PHI_RESULT (reduc->new_phi));
1117
1118 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1119 GSI_CONTINUE_LINKING);
1120
1121 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1122 return 1;
1123 }
1124
1125 /* Create the atomic operation at the join point of the threads.
1126 REDUCTION_LIST describes the reductions in the LOOP.
1127 LD_ST_DATA describes the shared data structure where
1128 shared data is stored in and loaded from. */
1129 static void
1130 create_call_for_reduction (struct loop *loop,
1131 reduction_info_table_type reduction_list,
1132 struct clsn_data *ld_st_data)
1133 {
1134 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
1135 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1136 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1137 reduction_list
1138 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1139 }
1140
1141 /* Callback for htab_traverse. Loads the final reduction value at the
1142 join point of all threads, and inserts it in the right place. */
1143
1144 int
1145 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1146 {
1147 struct reduction_info *const red = *slot;
1148 gimple stmt;
1149 gimple_stmt_iterator gsi;
1150 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1151 tree load_struct;
1152 tree name;
1153 tree x;
1154
1155 gsi = gsi_after_labels (clsn_data->load_bb);
1156 load_struct = build_simple_mem_ref (clsn_data->load);
1157 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1158 NULL_TREE);
1159
1160 x = load_struct;
1161 name = PHI_RESULT (red->keep_res);
1162 stmt = gimple_build_assign (name, x);
1163
1164 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1165
1166 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1167 !gsi_end_p (gsi); gsi_next (&gsi))
1168 if (gsi_stmt (gsi) == red->keep_res)
1169 {
1170 remove_phi_node (&gsi, false);
1171 return 1;
1172 }
1173 gcc_unreachable ();
1174 }
1175
1176 /* Load the reduction result that was stored in LD_ST_DATA.
1177 REDUCTION_LIST describes the list of reductions that the
1178 loads should be generated for. */
1179 static void
1180 create_final_loads_for_reduction (reduction_info_table_type reduction_list,
1181 struct clsn_data *ld_st_data)
1182 {
1183 gimple_stmt_iterator gsi;
1184 tree t;
1185 gimple stmt;
1186
1187 gsi = gsi_after_labels (ld_st_data->load_bb);
1188 t = build_fold_addr_expr (ld_st_data->store);
1189 stmt = gimple_build_assign (ld_st_data->load, t);
1190
1191 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1192
1193 reduction_list
1194 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1195
1196 }
1197
1198 /* Callback for htab_traverse. Store the neutral value for the
1199 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1200 1 for MULT_EXPR, etc. into the reduction field.
1201 The reduction is specified in SLOT. The store information is
1202 passed in DATA. */
1203
1204 int
1205 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1206 {
1207 struct reduction_info *const red = *slot;
1208 tree t;
1209 gimple stmt;
1210 gimple_stmt_iterator gsi;
1211 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1212
1213 gsi = gsi_last_bb (clsn_data->store_bb);
1214 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1215 stmt = gimple_build_assign (t, red->initial_value);
1216 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1217
1218 return 1;
1219 }
1220
1221 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1222 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1223 specified in SLOT. */
1224
1225 int
1226 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1227 struct clsn_data *clsn_data)
1228 {
1229 struct name_to_copy_elt *const elt = *slot;
1230 tree t;
1231 gimple stmt;
1232 gimple_stmt_iterator gsi;
1233 tree type = TREE_TYPE (elt->new_name);
1234 tree load_struct;
1235
1236 gsi = gsi_last_bb (clsn_data->store_bb);
1237 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1238 stmt = gimple_build_assign (t, ssa_name (elt->version));
1239 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1240
1241 gsi = gsi_last_bb (clsn_data->load_bb);
1242 load_struct = build_simple_mem_ref (clsn_data->load);
1243 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1244 stmt = gimple_build_assign (elt->new_name, t);
1245 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1246
1247 return 1;
1248 }
1249
1250 /* Moves all the variables used in LOOP and defined outside of it (including
1251 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1252 name) to a structure created for this purpose. The code
1253
1254 while (1)
1255 {
1256 use (a);
1257 use (b);
1258 }
1259
1260 is transformed this way:
1261
1262 bb0:
1263 old.a = a;
1264 old.b = b;
1265
1266 bb1:
1267 a' = new->a;
1268 b' = new->b;
1269 while (1)
1270 {
1271 use (a');
1272 use (b');
1273 }
1274
1275 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1276 pointer `new' is intentionally not initialized (the loop will be split to a
1277 separate function later, and `new' will be initialized from its arguments).
1278 LD_ST_DATA holds information about the shared data structure used to pass
1279 information among the threads. It is initialized here, and
1280 gen_parallel_loop will pass it to create_call_for_reduction that
1281 needs this information. REDUCTION_LIST describes the reductions
1282 in LOOP. */
1283
1284 static void
1285 separate_decls_in_region (edge entry, edge exit,
1286 reduction_info_table_type reduction_list,
1287 tree *arg_struct, tree *new_arg_struct,
1288 struct clsn_data *ld_st_data)
1289
1290 {
1291 basic_block bb1 = split_edge (entry);
1292 basic_block bb0 = single_pred (bb1);
1293 name_to_copy_table_type name_copies;
1294 name_copies.create (10);
1295 int_tree_htab_type decl_copies;
1296 decl_copies.create (10);
1297 unsigned i;
1298 tree type, type_name, nvar;
1299 gimple_stmt_iterator gsi;
1300 struct clsn_data clsn_data;
1301 stack_vec<basic_block, 3> body;
1302 basic_block bb;
1303 basic_block entry_bb = bb1;
1304 basic_block exit_bb = exit->dest;
1305 bool has_debug_stmt = false;
1306
1307 entry = single_succ_edge (entry_bb);
1308 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1309
1310 FOR_EACH_VEC_ELT (body, i, bb)
1311 {
1312 if (bb != entry_bb && bb != exit_bb)
1313 {
1314 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1315 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1316 name_copies, decl_copies);
1317
1318 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1319 {
1320 gimple stmt = gsi_stmt (gsi);
1321
1322 if (is_gimple_debug (stmt))
1323 has_debug_stmt = true;
1324 else
1325 separate_decls_in_region_stmt (entry, exit, stmt,
1326 name_copies, decl_copies);
1327 }
1328 }
1329 }
1330
1331 /* Now process debug bind stmts. We must not create decls while
1332 processing debug stmts, so we defer their processing so as to
1333 make sure we will have debug info for as many variables as
1334 possible (all of those that were dealt with in the loop above),
1335 and discard those for which we know there's nothing we can
1336 do. */
1337 if (has_debug_stmt)
1338 FOR_EACH_VEC_ELT (body, i, bb)
1339 if (bb != entry_bb && bb != exit_bb)
1340 {
1341 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1342 {
1343 gimple stmt = gsi_stmt (gsi);
1344
1345 if (is_gimple_debug (stmt))
1346 {
1347 if (separate_decls_in_region_debug (stmt, name_copies,
1348 decl_copies))
1349 {
1350 gsi_remove (&gsi, true);
1351 continue;
1352 }
1353 }
1354
1355 gsi_next (&gsi);
1356 }
1357 }
1358
1359 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
1360 {
1361 /* It may happen that there is nothing to copy (if there are only
1362 loop carried and external variables in the loop). */
1363 *arg_struct = NULL;
1364 *new_arg_struct = NULL;
1365 }
1366 else
1367 {
1368 /* Create the type for the structure to store the ssa names to. */
1369 type = lang_hooks.types.make_type (RECORD_TYPE);
1370 type_name = build_decl (UNKNOWN_LOCATION,
1371 TYPE_DECL, create_tmp_var_name (".paral_data"),
1372 type);
1373 TYPE_NAME (type) = type_name;
1374
1375 name_copies.traverse <tree, add_field_for_name> (type);
1376 if (reduction_list.is_created () && reduction_list.elements () > 0)
1377 {
1378 /* Create the fields for reductions. */
1379 reduction_list.traverse <tree, add_field_for_reduction> (type);
1380 }
1381 layout_type (type);
1382
1383 /* Create the loads and stores. */
1384 *arg_struct = create_tmp_var (type, ".paral_data_store");
1385 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1386 *new_arg_struct = make_ssa_name (nvar, NULL);
1387
1388 ld_st_data->store = *arg_struct;
1389 ld_st_data->load = *new_arg_struct;
1390 ld_st_data->store_bb = bb0;
1391 ld_st_data->load_bb = bb1;
1392
1393 name_copies
1394 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1395 (ld_st_data);
1396
1397 /* Load the calculation from memory (after the join of the threads). */
1398
1399 if (reduction_list.is_created () && reduction_list.elements () > 0)
1400 {
1401 reduction_list
1402 .traverse <struct clsn_data *, create_stores_for_reduction>
1403 (ld_st_data);
1404 clsn_data.load = make_ssa_name (nvar, NULL);
1405 clsn_data.load_bb = exit->dest;
1406 clsn_data.store = ld_st_data->store;
1407 create_final_loads_for_reduction (reduction_list, &clsn_data);
1408 }
1409 }
1410
1411 decl_copies.dispose ();
1412 name_copies.dispose ();
1413 }
1414
1415 /* Bitmap containing uids of functions created by parallelization. We cannot
1416 allocate it from the default obstack, as it must live across compilation
1417 of several functions; we make it gc allocated instead. */
1418
1419 static GTY(()) bitmap parallelized_functions;
1420
1421 /* Returns true if FN was created by create_loop_fn. */
1422
1423 bool
1424 parallelized_function_p (tree fn)
1425 {
1426 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1427 return false;
1428
1429 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1430 }
1431
1432 /* Creates and returns an empty function that will receive the body of
1433 a parallelized loop. */
1434
1435 static tree
1436 create_loop_fn (location_t loc)
1437 {
1438 char buf[100];
1439 char *tname;
1440 tree decl, type, name, t;
1441 struct function *act_cfun = cfun;
1442 static unsigned loopfn_num;
1443
1444 loc = LOCATION_LOCUS (loc);
1445 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1446 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1447 clean_symbol_name (tname);
1448 name = get_identifier (tname);
1449 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1450
1451 decl = build_decl (loc, FUNCTION_DECL, name, type);
1452 if (!parallelized_functions)
1453 parallelized_functions = BITMAP_GGC_ALLOC ();
1454 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1455
1456 TREE_STATIC (decl) = 1;
1457 TREE_USED (decl) = 1;
1458 DECL_ARTIFICIAL (decl) = 1;
1459 DECL_IGNORED_P (decl) = 0;
1460 TREE_PUBLIC (decl) = 0;
1461 DECL_UNINLINABLE (decl) = 1;
1462 DECL_EXTERNAL (decl) = 0;
1463 DECL_CONTEXT (decl) = NULL_TREE;
1464 DECL_INITIAL (decl) = make_node (BLOCK);
1465
1466 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1467 DECL_ARTIFICIAL (t) = 1;
1468 DECL_IGNORED_P (t) = 1;
1469 DECL_RESULT (decl) = t;
1470
1471 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1472 ptr_type_node);
1473 DECL_ARTIFICIAL (t) = 1;
1474 DECL_ARG_TYPE (t) = ptr_type_node;
1475 DECL_CONTEXT (t) = decl;
1476 TREE_USED (t) = 1;
1477 DECL_ARGUMENTS (decl) = t;
1478
1479 allocate_struct_function (decl, false);
1480
1481 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1482 it. */
1483 set_cfun (act_cfun);
1484
1485 return decl;
1486 }
1487
1488 /* Moves the exit condition of LOOP to the beginning of its header, and
1489 duplicates the part of the last iteration that gets disabled to the
1490 exit of the loop. NIT is the number of iterations of the loop
1491 (used to initialize the variables in the duplicated part).
1492
1493 TODO: the common case is that latch of the loop is empty and immediately
1494 follows the loop exit. In this case, it would be better not to copy the
1495 body of the loop, but only move the entry of the loop directly before the
1496 exit check and increase the number of iterations of the loop by one.
1497 This may need some additional preconditioning in case NIT = ~0.
1498 REDUCTION_LIST describes the reductions in LOOP. */
1499
1500 static void
1501 transform_to_exit_first_loop (struct loop *loop,
1502 reduction_info_table_type reduction_list,
1503 tree nit)
1504 {
1505 basic_block *bbs, *nbbs, ex_bb, orig_header;
1506 unsigned n;
1507 bool ok;
1508 edge exit = single_dom_exit (loop), hpred;
1509 tree control, control_name, res, t;
1510 gimple phi, nphi, cond_stmt, stmt, cond_nit;
1511 gimple_stmt_iterator gsi;
1512 tree nit_1;
1513
1514 split_block_after_labels (loop->header);
1515 orig_header = single_succ (loop->header);
1516 hpred = single_succ_edge (loop->header);
1517
1518 cond_stmt = last_stmt (exit->src);
1519 control = gimple_cond_lhs (cond_stmt);
1520 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1521
1522 /* Make sure that we have phi nodes on exit for all loop header phis
1523 (create_parallel_loop requires that). */
1524 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1525 {
1526 phi = gsi_stmt (gsi);
1527 res = PHI_RESULT (phi);
1528 t = copy_ssa_name (res, phi);
1529 SET_PHI_RESULT (phi, t);
1530 nphi = create_phi_node (res, orig_header);
1531 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1532
1533 if (res == control)
1534 {
1535 gimple_cond_set_lhs (cond_stmt, t);
1536 update_stmt (cond_stmt);
1537 control = t;
1538 }
1539 }
1540
1541 bbs = get_loop_body_in_dom_order (loop);
1542
1543 for (n = 0; bbs[n] != exit->src; n++)
1544 continue;
1545 nbbs = XNEWVEC (basic_block, n);
1546 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1547 bbs + 1, n, nbbs);
1548 gcc_assert (ok);
1549 free (bbs);
1550 ex_bb = nbbs[0];
1551 free (nbbs);
1552
1553 /* Other than reductions, the only gimple reg that should be copied
1554 out of the loop is the control variable. */
1555 exit = single_dom_exit (loop);
1556 control_name = NULL_TREE;
1557 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1558 {
1559 phi = gsi_stmt (gsi);
1560 res = PHI_RESULT (phi);
1561 if (virtual_operand_p (res))
1562 {
1563 gsi_next (&gsi);
1564 continue;
1565 }
1566
1567 /* Check if it is a part of reduction. If it is,
1568 keep the phi at the reduction's keep_res field. The
1569 PHI_RESULT of this phi is the resulting value of the reduction
1570 variable when exiting the loop. */
1571
1572 if (reduction_list.elements () > 0)
1573 {
1574 struct reduction_info *red;
1575
1576 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1577 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1578 if (red)
1579 {
1580 red->keep_res = phi;
1581 gsi_next (&gsi);
1582 continue;
1583 }
1584 }
1585 gcc_assert (control_name == NULL_TREE
1586 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1587 control_name = res;
1588 remove_phi_node (&gsi, false);
1589 }
1590 gcc_assert (control_name != NULL_TREE);
1591
1592 /* Initialize the control variable to number of iterations
1593 according to the rhs of the exit condition. */
1594 gsi = gsi_after_labels (ex_bb);
1595 cond_nit = last_stmt (exit->src);
1596 nit_1 = gimple_cond_rhs (cond_nit);
1597 nit_1 = force_gimple_operand_gsi (&gsi,
1598 fold_convert (TREE_TYPE (control_name), nit_1),
1599 false, NULL_TREE, false, GSI_SAME_STMT);
1600 stmt = gimple_build_assign (control_name, nit_1);
1601 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1602 }
1603
1604 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1605 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1606 NEW_DATA is the variable that should be initialized from the argument
1607 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1608 basic block containing GIMPLE_OMP_PARALLEL tree. */
1609
1610 static basic_block
1611 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1612 tree new_data, unsigned n_threads, location_t loc)
1613 {
1614 gimple_stmt_iterator gsi;
1615 basic_block bb, paral_bb, for_bb, ex_bb;
1616 tree t, param;
1617 gimple stmt, for_stmt, phi, cond_stmt;
1618 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1619 edge exit, nexit, guard, end, e;
1620
1621 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1622 bb = loop_preheader_edge (loop)->src;
1623 paral_bb = single_pred (bb);
1624 gsi = gsi_last_bb (paral_bb);
1625
1626 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
1627 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1628 = build_int_cst (integer_type_node, n_threads);
1629 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1630 gimple_set_location (stmt, loc);
1631
1632 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1633
1634 /* Initialize NEW_DATA. */
1635 if (data)
1636 {
1637 gsi = gsi_after_labels (bb);
1638
1639 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1640 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1641 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1642
1643 stmt = gimple_build_assign (new_data,
1644 fold_convert (TREE_TYPE (new_data), param));
1645 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1646 }
1647
1648 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1649 bb = split_loop_exit_edge (single_dom_exit (loop));
1650 gsi = gsi_last_bb (bb);
1651 stmt = gimple_build_omp_return (false);
1652 gimple_set_location (stmt, loc);
1653 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1654
1655 /* Extract data for GIMPLE_OMP_FOR. */
1656 gcc_assert (loop->header == single_dom_exit (loop)->src);
1657 cond_stmt = last_stmt (loop->header);
1658
1659 cvar = gimple_cond_lhs (cond_stmt);
1660 cvar_base = SSA_NAME_VAR (cvar);
1661 phi = SSA_NAME_DEF_STMT (cvar);
1662 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1663 initvar = copy_ssa_name (cvar, NULL);
1664 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1665 initvar);
1666 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1667
1668 gsi = gsi_last_nondebug_bb (loop->latch);
1669 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1670 gsi_remove (&gsi, true);
1671
1672 /* Prepare cfg. */
1673 for_bb = split_edge (loop_preheader_edge (loop));
1674 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1675 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1676 gcc_assert (exit == single_dom_exit (loop));
1677
1678 guard = make_edge (for_bb, ex_bb, 0);
1679 single_succ_edge (loop->latch)->flags = 0;
1680 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1681 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1682 {
1683 source_location locus;
1684 tree def;
1685 phi = gsi_stmt (gsi);
1686 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1687
1688 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
1689 locus = gimple_phi_arg_location_from_edge (stmt,
1690 loop_preheader_edge (loop));
1691 add_phi_arg (phi, def, guard, locus);
1692
1693 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1694 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
1695 add_phi_arg (phi, def, end, locus);
1696 }
1697 e = redirect_edge_and_branch (exit, nexit->dest);
1698 PENDING_STMT (e) = NULL;
1699
1700 /* Emit GIMPLE_OMP_FOR. */
1701 gimple_cond_set_lhs (cond_stmt, cvar_base);
1702 type = TREE_TYPE (cvar);
1703 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
1704 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1705
1706 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
1707 gimple_set_location (for_stmt, loc);
1708 gimple_omp_for_set_index (for_stmt, 0, initvar);
1709 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1710 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1711 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1712 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1713 cvar_base,
1714 build_int_cst (type, 1)));
1715
1716 gsi = gsi_last_bb (for_bb);
1717 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1718 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1719
1720 /* Emit GIMPLE_OMP_CONTINUE. */
1721 gsi = gsi_last_bb (loop->latch);
1722 stmt = gimple_build_omp_continue (cvar_next, cvar);
1723 gimple_set_location (stmt, loc);
1724 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1725 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1726
1727 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1728 gsi = gsi_last_bb (ex_bb);
1729 stmt = gimple_build_omp_return (true);
1730 gimple_set_location (stmt, loc);
1731 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1732
1733 /* After the above dom info is hosed. Re-compute it. */
1734 free_dominance_info (CDI_DOMINATORS);
1735 calculate_dominance_info (CDI_DOMINATORS);
1736
1737 return paral_bb;
1738 }
1739
1740 /* Generates code to execute the iterations of LOOP in N_THREADS
1741 threads in parallel.
1742
1743 NITER describes number of iterations of LOOP.
1744 REDUCTION_LIST describes the reductions existent in the LOOP. */
1745
1746 static void
1747 gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
1748 unsigned n_threads, struct tree_niter_desc *niter)
1749 {
1750 tree many_iterations_cond, type, nit;
1751 tree arg_struct, new_arg_struct;
1752 gimple_seq stmts;
1753 basic_block parallel_head;
1754 edge entry, exit;
1755 struct clsn_data clsn_data;
1756 unsigned prob;
1757 location_t loc;
1758 gimple cond_stmt;
1759 unsigned int m_p_thread=2;
1760
1761 /* From
1762
1763 ---------------------------------------------------------------------
1764 loop
1765 {
1766 IV = phi (INIT, IV + STEP)
1767 BODY1;
1768 if (COND)
1769 break;
1770 BODY2;
1771 }
1772 ---------------------------------------------------------------------
1773
1774 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1775 we generate the following code:
1776
1777 ---------------------------------------------------------------------
1778
1779 if (MAY_BE_ZERO
1780 || NITER < MIN_PER_THREAD * N_THREADS)
1781 goto original;
1782
1783 BODY1;
1784 store all local loop-invariant variables used in body of the loop to DATA.
1785 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1786 load the variables from DATA.
1787 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1788 BODY2;
1789 BODY1;
1790 GIMPLE_OMP_CONTINUE;
1791 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1792 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1793 goto end;
1794
1795 original:
1796 loop
1797 {
1798 IV = phi (INIT, IV + STEP)
1799 BODY1;
1800 if (COND)
1801 break;
1802 BODY2;
1803 }
1804
1805 end:
1806
1807 */
1808
1809 /* Create two versions of the loop -- in the old one, we know that the
1810 number of iterations is large enough, and we will transform it into the
1811 loop that will be split to loop_fn, the new one will be used for the
1812 remaining iterations. */
1813
1814 /* We should compute a better number-of-iterations value for outer loops.
1815 That is, if we have
1816
1817 for (i = 0; i < n; ++i)
1818 for (j = 0; j < m; ++j)
1819 ...
1820
1821 we should compute nit = n * m, not nit = n.
1822 Also may_be_zero handling would need to be adjusted. */
1823
1824 type = TREE_TYPE (niter->niter);
1825 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1826 NULL_TREE);
1827 if (stmts)
1828 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1829
1830 if (loop->inner)
1831 m_p_thread=2;
1832 else
1833 m_p_thread=MIN_PER_THREAD;
1834
1835 many_iterations_cond =
1836 fold_build2 (GE_EXPR, boolean_type_node,
1837 nit, build_int_cst (type, m_p_thread * n_threads));
1838
1839 many_iterations_cond
1840 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1841 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1842 many_iterations_cond);
1843 many_iterations_cond
1844 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1845 if (stmts)
1846 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1847 if (!is_gimple_condexpr (many_iterations_cond))
1848 {
1849 many_iterations_cond
1850 = force_gimple_operand (many_iterations_cond, &stmts,
1851 true, NULL_TREE);
1852 if (stmts)
1853 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1854 }
1855
1856 initialize_original_copy_tables ();
1857
1858 /* We assume that the loop usually iterates a lot. */
1859 prob = 4 * REG_BR_PROB_BASE / 5;
1860 loop_version (loop, many_iterations_cond, NULL,
1861 prob, prob, REG_BR_PROB_BASE - prob, true);
1862 update_ssa (TODO_update_ssa);
1863 free_original_copy_tables ();
1864
1865 /* Base all the induction variables in LOOP on a single control one. */
1866 canonicalize_loop_ivs (loop, &nit, true);
1867
1868 /* Ensure that the exit condition is the first statement in the loop. */
1869 transform_to_exit_first_loop (loop, reduction_list, nit);
1870
1871 /* Generate initializations for reductions. */
1872 if (reduction_list.elements () > 0)
1873 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
1874
1875 /* Eliminate the references to local variables from the loop. */
1876 gcc_assert (single_exit (loop));
1877 entry = loop_preheader_edge (loop);
1878 exit = single_dom_exit (loop);
1879
1880 eliminate_local_variables (entry, exit);
1881 /* In the old loop, move all variables non-local to the loop to a structure
1882 and back, and create separate decls for the variables used in loop. */
1883 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1884 &new_arg_struct, &clsn_data);
1885
1886 /* Create the parallel constructs. */
1887 loc = UNKNOWN_LOCATION;
1888 cond_stmt = last_stmt (loop->header);
1889 if (cond_stmt)
1890 loc = gimple_location (cond_stmt);
1891 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1892 new_arg_struct, n_threads, loc);
1893 if (reduction_list.elements () > 0)
1894 create_call_for_reduction (loop, reduction_list, &clsn_data);
1895
1896 scev_reset ();
1897
1898 /* Cancel the loop (it is simpler to do it here rather than to teach the
1899 expander to do it). */
1900 cancel_loop_tree (loop);
1901
1902 /* Free loop bound estimations that could contain references to
1903 removed statements. */
1904 FOR_EACH_LOOP (loop, 0)
1905 free_numbers_of_iterations_estimates_loop (loop);
1906
1907 /* Expand the parallel constructs. We do it directly here instead of running
1908 a separate expand_omp pass, since it is more efficient, and less likely to
1909 cause troubles with further analyses not being able to deal with the
1910 OMP trees. */
1911
1912 omp_expand_local (parallel_head);
1913 }
1914
1915 /* Returns true when LOOP contains vector phi nodes. */
1916
1917 static bool
1918 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1919 {
1920 unsigned i;
1921 basic_block *bbs = get_loop_body_in_dom_order (loop);
1922 gimple_stmt_iterator gsi;
1923 bool res = true;
1924
1925 for (i = 0; i < loop->num_nodes; i++)
1926 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1927 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1928 goto end;
1929
1930 res = false;
1931 end:
1932 free (bbs);
1933 return res;
1934 }
1935
1936 /* Create a reduction_info struct, initialize it with REDUC_STMT
1937 and PHI, insert it to the REDUCTION_LIST. */
1938
1939 static void
1940 build_new_reduction (reduction_info_table_type reduction_list,
1941 gimple reduc_stmt, gimple phi)
1942 {
1943 reduction_info **slot;
1944 struct reduction_info *new_reduction;
1945
1946 gcc_assert (reduc_stmt);
1947
1948 if (dump_file && (dump_flags & TDF_DETAILS))
1949 {
1950 fprintf (dump_file,
1951 "Detected reduction. reduction stmt is: \n");
1952 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1953 fprintf (dump_file, "\n");
1954 }
1955
1956 new_reduction = XCNEW (struct reduction_info);
1957
1958 new_reduction->reduc_stmt = reduc_stmt;
1959 new_reduction->reduc_phi = phi;
1960 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
1961 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1962 slot = reduction_list.find_slot (new_reduction, INSERT);
1963 *slot = new_reduction;
1964 }
1965
1966 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1967
1968 int
1969 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
1970 {
1971 struct reduction_info *const red = *slot;
1972 gimple_set_uid (red->reduc_phi, red->reduc_version);
1973 return 1;
1974 }
1975
1976 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1977
1978 static void
1979 gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
1980 {
1981 gimple_stmt_iterator gsi;
1982 loop_vec_info simple_loop_info;
1983
1984 simple_loop_info = vect_analyze_loop_form (loop);
1985
1986 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1987 {
1988 gimple phi = gsi_stmt (gsi);
1989 affine_iv iv;
1990 tree res = PHI_RESULT (phi);
1991 bool double_reduc;
1992
1993 if (virtual_operand_p (res))
1994 continue;
1995
1996 if (!simple_iv (loop, loop, res, &iv, true)
1997 && simple_loop_info)
1998 {
1999 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2000 phi, true,
2001 &double_reduc);
2002 if (reduc_stmt && !double_reduc)
2003 build_new_reduction (reduction_list, reduc_stmt, phi);
2004 }
2005 }
2006 destroy_loop_vec_info (simple_loop_info, true);
2007
2008 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2009 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2010 only now. */
2011 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
2012 }
2013
2014 /* Try to initialize NITER for code generation part. */
2015
2016 static bool
2017 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2018 {
2019 edge exit = single_dom_exit (loop);
2020
2021 gcc_assert (exit);
2022
2023 /* We need to know # of iterations, and there should be no uses of values
2024 defined inside loop outside of it, unless the values are invariants of
2025 the loop. */
2026 if (!number_of_iterations_exit (loop, exit, niter, false))
2027 {
2028 if (dump_file && (dump_flags & TDF_DETAILS))
2029 fprintf (dump_file, " FAILED: number of iterations not known\n");
2030 return false;
2031 }
2032
2033 return true;
2034 }
2035
2036 /* Try to initialize REDUCTION_LIST for code generation part.
2037 REDUCTION_LIST describes the reductions. */
2038
2039 static bool
2040 try_create_reduction_list (loop_p loop,
2041 reduction_info_table_type reduction_list)
2042 {
2043 edge exit = single_dom_exit (loop);
2044 gimple_stmt_iterator gsi;
2045
2046 gcc_assert (exit);
2047
2048 gather_scalar_reductions (loop, reduction_list);
2049
2050
2051 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2052 {
2053 gimple phi = gsi_stmt (gsi);
2054 struct reduction_info *red;
2055 imm_use_iterator imm_iter;
2056 use_operand_p use_p;
2057 gimple reduc_phi;
2058 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2059
2060 if (!virtual_operand_p (val))
2061 {
2062 if (dump_file && (dump_flags & TDF_DETAILS))
2063 {
2064 fprintf (dump_file, "phi is ");
2065 print_gimple_stmt (dump_file, phi, 0, 0);
2066 fprintf (dump_file, "arg of phi to exit: value ");
2067 print_generic_expr (dump_file, val, 0);
2068 fprintf (dump_file, " used outside loop\n");
2069 fprintf (dump_file,
2070 " checking if it a part of reduction pattern: \n");
2071 }
2072 if (reduction_list.elements () == 0)
2073 {
2074 if (dump_file && (dump_flags & TDF_DETAILS))
2075 fprintf (dump_file,
2076 " FAILED: it is not a part of reduction.\n");
2077 return false;
2078 }
2079 reduc_phi = NULL;
2080 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2081 {
2082 if (!gimple_debug_bind_p (USE_STMT (use_p))
2083 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2084 {
2085 reduc_phi = USE_STMT (use_p);
2086 break;
2087 }
2088 }
2089 red = reduction_phi (reduction_list, reduc_phi);
2090 if (red == NULL)
2091 {
2092 if (dump_file && (dump_flags & TDF_DETAILS))
2093 fprintf (dump_file,
2094 " FAILED: it is not a part of reduction.\n");
2095 return false;
2096 }
2097 if (dump_file && (dump_flags & TDF_DETAILS))
2098 {
2099 fprintf (dump_file, "reduction phi is ");
2100 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2101 fprintf (dump_file, "reduction stmt is ");
2102 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2103 }
2104 }
2105 }
2106
2107 /* The iterations of the loop may communicate only through bivs whose
2108 iteration space can be distributed efficiently. */
2109 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2110 {
2111 gimple phi = gsi_stmt (gsi);
2112 tree def = PHI_RESULT (phi);
2113 affine_iv iv;
2114
2115 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2116 {
2117 struct reduction_info *red;
2118
2119 red = reduction_phi (reduction_list, phi);
2120 if (red == NULL)
2121 {
2122 if (dump_file && (dump_flags & TDF_DETAILS))
2123 fprintf (dump_file,
2124 " FAILED: scalar dependency between iterations\n");
2125 return false;
2126 }
2127 }
2128 }
2129
2130
2131 return true;
2132 }
2133
2134 /* Detect parallel loops and generate parallel code using libgomp
2135 primitives. Returns true if some loop was parallelized, false
2136 otherwise. */
2137
2138 bool
2139 parallelize_loops (void)
2140 {
2141 unsigned n_threads = flag_tree_parallelize_loops;
2142 bool changed = false;
2143 struct loop *loop;
2144 struct tree_niter_desc niter_desc;
2145 reduction_info_table_type reduction_list;
2146 struct obstack parloop_obstack;
2147 HOST_WIDE_INT estimated;
2148 LOC loop_loc;
2149
2150 /* Do not parallelize loops in the functions created by parallelization. */
2151 if (parallelized_function_p (cfun->decl))
2152 return false;
2153 if (cfun->has_nonlocal_label)
2154 return false;
2155
2156 gcc_obstack_init (&parloop_obstack);
2157 reduction_list.create (10);
2158 init_stmt_vec_info_vec ();
2159
2160 FOR_EACH_LOOP (loop, 0)
2161 {
2162 reduction_list.empty ();
2163 if (dump_file && (dump_flags & TDF_DETAILS))
2164 {
2165 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2166 if (loop->inner)
2167 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2168 else
2169 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2170 }
2171
2172 /* If we use autopar in graphite pass, we use its marked dependency
2173 checking results. */
2174 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2175 {
2176 if (dump_file && (dump_flags & TDF_DETAILS))
2177 fprintf (dump_file, "loop is not parallel according to graphite\n");
2178 continue;
2179 }
2180
2181 if (!single_dom_exit (loop))
2182 {
2183
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 fprintf (dump_file, "loop is !single_dom_exit\n");
2186
2187 continue;
2188 }
2189
2190 if (/* And of course, the loop must be parallelizable. */
2191 !can_duplicate_loop_p (loop)
2192 || loop_has_blocks_with_irreducible_flag (loop)
2193 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2194 /* FIXME: the check for vector phi nodes could be removed. */
2195 || loop_has_vector_phi_nodes (loop))
2196 continue;
2197
2198 estimated = estimated_stmt_executions_int (loop);
2199 if (estimated == -1)
2200 estimated = max_stmt_executions_int (loop);
2201 /* FIXME: Bypass this check as graphite doesn't update the
2202 count and frequency correctly now. */
2203 if (!flag_loop_parallelize_all
2204 && ((estimated != -1
2205 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2206 /* Do not bother with loops in cold areas. */
2207 || optimize_loop_nest_for_size_p (loop)))
2208 continue;
2209
2210 if (!try_get_loop_niter (loop, &niter_desc))
2211 continue;
2212
2213 if (!try_create_reduction_list (loop, reduction_list))
2214 continue;
2215
2216 if (!flag_loop_parallelize_all
2217 && !loop_parallel_p (loop, &parloop_obstack))
2218 continue;
2219
2220 changed = true;
2221 if (dump_file && (dump_flags & TDF_DETAILS))
2222 {
2223 if (loop->inner)
2224 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2225 else
2226 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2227 loop_loc = find_loop_location (loop);
2228 if (loop_loc != UNKNOWN_LOC)
2229 fprintf (dump_file, "\nloop at %s:%d: ",
2230 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
2231 }
2232 gen_parallel_loop (loop, reduction_list,
2233 n_threads, &niter_desc);
2234 }
2235
2236 free_stmt_vec_info_vec ();
2237 reduction_list.dispose ();
2238 obstack_free (&parloop_obstack, NULL);
2239
2240 /* Parallelization will cause new function calls to be inserted through
2241 which local variables will escape. Reset the points-to solution
2242 for ESCAPED. */
2243 if (changed)
2244 pt_solution_reset (&cfun->gimple_df->escaped);
2245
2246 return changed;
2247 }
2248
2249 /* Parallelization. */
2250
2251 static bool
2252 gate_tree_parallelize_loops (void)
2253 {
2254 return flag_tree_parallelize_loops > 1;
2255 }
2256
2257 static unsigned
2258 tree_parallelize_loops (void)
2259 {
2260 if (number_of_loops (cfun) <= 1)
2261 return 0;
2262
2263 if (parallelize_loops ())
2264 return TODO_cleanup_cfg | TODO_rebuild_alias;
2265 return 0;
2266 }
2267
2268 namespace {
2269
2270 const pass_data pass_data_parallelize_loops =
2271 {
2272 GIMPLE_PASS, /* type */
2273 "parloops", /* name */
2274 OPTGROUP_LOOP, /* optinfo_flags */
2275 true, /* has_gate */
2276 true, /* has_execute */
2277 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2278 ( PROP_cfg | PROP_ssa ), /* properties_required */
2279 0, /* properties_provided */
2280 0, /* properties_destroyed */
2281 0, /* todo_flags_start */
2282 TODO_verify_flow, /* todo_flags_finish */
2283 };
2284
2285 class pass_parallelize_loops : public gimple_opt_pass
2286 {
2287 public:
2288 pass_parallelize_loops (gcc::context *ctxt)
2289 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2290 {}
2291
2292 /* opt_pass methods: */
2293 bool gate () { return gate_tree_parallelize_loops (); }
2294 unsigned int execute () { return tree_parallelize_loops (); }
2295
2296 }; // class pass_parallelize_loops
2297
2298 } // anon namespace
2299
2300 gimple_opt_pass *
2301 make_pass_parallelize_loops (gcc::context *ctxt)
2302 {
2303 return new pass_parallelize_loops (ctxt);
2304 }
2305
2306
2307 #include "gt-tree-parloops.h"