]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-parloops.c
Don't allow latch with phi in try_transform_to_exit_first_loop_alt
[thirdparty/gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
34 #include "gimplify.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
40 #include "tree-cfg.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
46 #include "cfgloop.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
52 #include "omp-low.h"
53 #include "tree-ssa.h"
54 #include "params.h"
55 #include "params-enum.h"
56
57 /* This pass tries to distribute iterations of loops into several threads.
58 The implementation is straightforward -- for each loop we test whether its
59 iterations are independent, and if it is the case (and some additional
60 conditions regarding profitability and correctness are satisfied), we
61 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
62 machinery do its job.
63
64 The most of the complexity is in bringing the code into shape expected
65 by the omp expanders:
66 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
67 variable and that the exit test is at the start of the loop body
68 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
69 variables by accesses through pointers, and breaking up ssa chains
70 by storing the values incoming to the parallelized loop to a structure
71 passed to the new function as an argument (something similar is done
72 in omp gimplification, unfortunately only a small part of the code
73 can be shared).
74
75 TODO:
76 -- if there are several parallelizable loops in a function, it may be
77 possible to generate the threads just once (using synchronization to
78 ensure that cross-loop dependences are obeyed).
79 -- handling of common reduction patterns for outer loops.
80
81 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
82 /*
83 Reduction handling:
84 currently we use vect_force_simple_reduction() to detect reduction patterns.
85 The code transformation will be introduced by an example.
86
87
88 parloop
89 {
90 int sum=1;
91
92 for (i = 0; i < N; i++)
93 {
94 x[i] = i + 3;
95 sum+=x[i];
96 }
97 }
98
99 gimple-like code:
100 header_bb:
101
102 # sum_29 = PHI <sum_11(5), 1(3)>
103 # i_28 = PHI <i_12(5), 0(3)>
104 D.1795_8 = i_28 + 3;
105 x[i_28] = D.1795_8;
106 sum_11 = D.1795_8 + sum_29;
107 i_12 = i_28 + 1;
108 if (N_6(D) > i_12)
109 goto header_bb;
110
111
112 exit_bb:
113
114 # sum_21 = PHI <sum_11(4)>
115 printf (&"%d"[0], sum_21);
116
117
118 after reduction transformation (only relevant parts):
119
120 parloop
121 {
122
123 ....
124
125
126 # Storing the initial value given by the user. #
127
128 .paral_data_store.32.sum.27 = 1;
129
130 #pragma omp parallel num_threads(4)
131
132 #pragma omp for schedule(static)
133
134 # The neutral element corresponding to the particular
135 reduction's operation, e.g. 0 for PLUS_EXPR,
136 1 for MULT_EXPR, etc. replaces the user's initial value. #
137
138 # sum.27_29 = PHI <sum.27_11, 0>
139
140 sum.27_11 = D.1827_8 + sum.27_29;
141
142 GIMPLE_OMP_CONTINUE
143
144 # Adding this reduction phi is done at create_phi_for_local_result() #
145 # sum.27_56 = PHI <sum.27_11, 0>
146 GIMPLE_OMP_RETURN
147
148 # Creating the atomic operation is done at
149 create_call_for_reduction_1() #
150
151 #pragma omp atomic_load
152 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
153 D.1840_60 = sum.27_56 + D.1839_59;
154 #pragma omp atomic_store (D.1840_60);
155
156 GIMPLE_OMP_RETURN
157
158 # collecting the result after the join of the threads is done at
159 create_loads_for_reductions().
160 The value computed by the threads is loaded from the
161 shared struct. #
162
163
164 .paral_data_load.33_52 = &.paral_data_store.32;
165 sum_37 = .paral_data_load.33_52->sum.27;
166 sum_43 = D.1795_41 + sum_37;
167
168 exit bb:
169 # sum_21 = PHI <sum_43, sum_26>
170 printf (&"%d"[0], sum_21);
171
172 ...
173
174 }
175
176 */
177
178 /* Minimal number of iterations of a loop that should be executed in each
179 thread. */
180 #define MIN_PER_THREAD 100
181
182 /* Element of the hashtable, representing a
183 reduction in the current loop. */
184 struct reduction_info
185 {
186 gimple *reduc_stmt; /* reduction statement. */
187 gimple *reduc_phi; /* The phi node defining the reduction. */
188 enum tree_code reduction_code;/* code for the reduction operation. */
189 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
190 result. */
191 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
192 of the reduction variable when existing the loop. */
193 tree initial_value; /* The initial value of the reduction var before entering the loop. */
194 tree field; /* the name of the field in the parloop data structure intended for reduction. */
195 tree init; /* reduction initialization value. */
196 gphi *new_phi; /* (helper field) Newly created phi node whose result
197 will be passed to the atomic operation. Represents
198 the local result each thread computed for the reduction
199 operation. */
200 };
201
202 /* Reduction info hashtable helpers. */
203
204 struct reduction_hasher : free_ptr_hash <reduction_info>
205 {
206 static inline hashval_t hash (const reduction_info *);
207 static inline bool equal (const reduction_info *, const reduction_info *);
208 };
209
210 /* Equality and hash functions for hashtab code. */
211
212 inline bool
213 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
214 {
215 return (a->reduc_phi == b->reduc_phi);
216 }
217
218 inline hashval_t
219 reduction_hasher::hash (const reduction_info *a)
220 {
221 return a->reduc_version;
222 }
223
224 typedef hash_table<reduction_hasher> reduction_info_table_type;
225
226
227 static struct reduction_info *
228 reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
229 {
230 struct reduction_info tmpred, *red;
231
232 if (reduction_list->elements () == 0 || phi == NULL)
233 return NULL;
234
235 if (gimple_uid (phi) == (unsigned int)-1
236 || gimple_uid (phi) == 0)
237 return NULL;
238
239 tmpred.reduc_phi = phi;
240 tmpred.reduc_version = gimple_uid (phi);
241 red = reduction_list->find (&tmpred);
242 gcc_assert (red == NULL || red->reduc_phi == phi);
243
244 return red;
245 }
246
247 /* Element of hashtable of names to copy. */
248
249 struct name_to_copy_elt
250 {
251 unsigned version; /* The version of the name to copy. */
252 tree new_name; /* The new name used in the copy. */
253 tree field; /* The field of the structure used to pass the
254 value. */
255 };
256
257 /* Name copies hashtable helpers. */
258
259 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
260 {
261 static inline hashval_t hash (const name_to_copy_elt *);
262 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
263 };
264
265 /* Equality and hash functions for hashtab code. */
266
267 inline bool
268 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
269 {
270 return a->version == b->version;
271 }
272
273 inline hashval_t
274 name_to_copy_hasher::hash (const name_to_copy_elt *a)
275 {
276 return (hashval_t) a->version;
277 }
278
279 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
280
281 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
282 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
283 represents the denominator for every element in the matrix. */
284 typedef struct lambda_trans_matrix_s
285 {
286 lambda_matrix matrix;
287 int rowsize;
288 int colsize;
289 int denominator;
290 } *lambda_trans_matrix;
291 #define LTM_MATRIX(T) ((T)->matrix)
292 #define LTM_ROWSIZE(T) ((T)->rowsize)
293 #define LTM_COLSIZE(T) ((T)->colsize)
294 #define LTM_DENOMINATOR(T) ((T)->denominator)
295
296 /* Allocate a new transformation matrix. */
297
298 static lambda_trans_matrix
299 lambda_trans_matrix_new (int colsize, int rowsize,
300 struct obstack * lambda_obstack)
301 {
302 lambda_trans_matrix ret;
303
304 ret = (lambda_trans_matrix)
305 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
306 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
307 LTM_ROWSIZE (ret) = rowsize;
308 LTM_COLSIZE (ret) = colsize;
309 LTM_DENOMINATOR (ret) = 1;
310 return ret;
311 }
312
313 /* Multiply a vector VEC by a matrix MAT.
314 MAT is an M*N matrix, and VEC is a vector with length N. The result
315 is stored in DEST which must be a vector of length M. */
316
317 static void
318 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
319 lambda_vector vec, lambda_vector dest)
320 {
321 int i, j;
322
323 lambda_vector_clear (dest, m);
324 for (i = 0; i < m; i++)
325 for (j = 0; j < n; j++)
326 dest[i] += matrix[i][j] * vec[j];
327 }
328
329 /* Return true if TRANS is a legal transformation matrix that respects
330 the dependence vectors in DISTS and DIRS. The conservative answer
331 is false.
332
333 "Wolfe proves that a unimodular transformation represented by the
334 matrix T is legal when applied to a loop nest with a set of
335 lexicographically non-negative distance vectors RDG if and only if
336 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
337 i.e.: if and only if it transforms the lexicographically positive
338 distance vectors to lexicographically positive vectors. Note that
339 a unimodular matrix must transform the zero vector (and only it) to
340 the zero vector." S.Muchnick. */
341
342 static bool
343 lambda_transform_legal_p (lambda_trans_matrix trans,
344 int nb_loops,
345 vec<ddr_p> dependence_relations)
346 {
347 unsigned int i, j;
348 lambda_vector distres;
349 struct data_dependence_relation *ddr;
350
351 gcc_assert (LTM_COLSIZE (trans) == nb_loops
352 && LTM_ROWSIZE (trans) == nb_loops);
353
354 /* When there are no dependences, the transformation is correct. */
355 if (dependence_relations.length () == 0)
356 return true;
357
358 ddr = dependence_relations[0];
359 if (ddr == NULL)
360 return true;
361
362 /* When there is an unknown relation in the dependence_relations, we
363 know that it is no worth looking at this loop nest: give up. */
364 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
365 return false;
366
367 distres = lambda_vector_new (nb_loops);
368
369 /* For each distance vector in the dependence graph. */
370 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
371 {
372 /* Don't care about relations for which we know that there is no
373 dependence, nor about read-read (aka. output-dependences):
374 these data accesses can happen in any order. */
375 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
376 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
377 continue;
378
379 /* Conservatively answer: "this transformation is not valid". */
380 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
381 return false;
382
383 /* If the dependence could not be captured by a distance vector,
384 conservatively answer that the transform is not valid. */
385 if (DDR_NUM_DIST_VECTS (ddr) == 0)
386 return false;
387
388 /* Compute trans.dist_vect */
389 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
390 {
391 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
392 DDR_DIST_VECT (ddr, j), distres);
393
394 if (!lambda_vector_lexico_pos (distres, nb_loops))
395 return false;
396 }
397 }
398 return true;
399 }
400
401 /* Data dependency analysis. Returns true if the iterations of LOOP
402 are independent on each other (that is, if we can execute them
403 in parallel). */
404
405 static bool
406 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
407 {
408 vec<ddr_p> dependence_relations;
409 vec<data_reference_p> datarefs;
410 lambda_trans_matrix trans;
411 bool ret = false;
412
413 if (dump_file && (dump_flags & TDF_DETAILS))
414 {
415 fprintf (dump_file, "Considering loop %d\n", loop->num);
416 if (!loop->inner)
417 fprintf (dump_file, "loop is innermost\n");
418 else
419 fprintf (dump_file, "loop NOT innermost\n");
420 }
421
422 /* Check for problems with dependences. If the loop can be reversed,
423 the iterations are independent. */
424 auto_vec<loop_p, 3> loop_nest;
425 datarefs.create (10);
426 dependence_relations.create (100);
427 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
428 &dependence_relations))
429 {
430 if (dump_file && (dump_flags & TDF_DETAILS))
431 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
432 ret = false;
433 goto end;
434 }
435 if (dump_file && (dump_flags & TDF_DETAILS))
436 dump_data_dependence_relations (dump_file, dependence_relations);
437
438 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
439 LTM_MATRIX (trans)[0][0] = -1;
440
441 if (lambda_transform_legal_p (trans, 1, dependence_relations))
442 {
443 ret = true;
444 if (dump_file && (dump_flags & TDF_DETAILS))
445 fprintf (dump_file, " SUCCESS: may be parallelized\n");
446 }
447 else if (dump_file && (dump_flags & TDF_DETAILS))
448 fprintf (dump_file,
449 " FAILED: data dependencies exist across iterations\n");
450
451 end:
452 free_dependence_relations (dependence_relations);
453 free_data_refs (datarefs);
454
455 return ret;
456 }
457
458 /* Return true when LOOP contains basic blocks marked with the
459 BB_IRREDUCIBLE_LOOP flag. */
460
461 static inline bool
462 loop_has_blocks_with_irreducible_flag (struct loop *loop)
463 {
464 unsigned i;
465 basic_block *bbs = get_loop_body_in_dom_order (loop);
466 bool res = true;
467
468 for (i = 0; i < loop->num_nodes; i++)
469 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
470 goto end;
471
472 res = false;
473 end:
474 free (bbs);
475 return res;
476 }
477
478 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
479 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
480 to their addresses that can be reused. The address of OBJ is known to
481 be invariant in the whole function. Other needed statements are placed
482 right before GSI. */
483
484 static tree
485 take_address_of (tree obj, tree type, edge entry,
486 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
487 {
488 int uid;
489 tree *var_p, name, addr;
490 gassign *stmt;
491 gimple_seq stmts;
492
493 /* Since the address of OBJ is invariant, the trees may be shared.
494 Avoid rewriting unrelated parts of the code. */
495 obj = unshare_expr (obj);
496 for (var_p = &obj;
497 handled_component_p (*var_p);
498 var_p = &TREE_OPERAND (*var_p, 0))
499 continue;
500
501 /* Canonicalize the access to base on a MEM_REF. */
502 if (DECL_P (*var_p))
503 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
504
505 /* Assign a canonical SSA name to the address of the base decl used
506 in the address and share it for all accesses and addresses based
507 on it. */
508 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
509 int_tree_map elt;
510 elt.uid = uid;
511 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
512 if (!slot->to)
513 {
514 if (gsi == NULL)
515 return NULL;
516 addr = TREE_OPERAND (*var_p, 0);
517 const char *obj_name
518 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
519 if (obj_name)
520 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
521 else
522 name = make_ssa_name (TREE_TYPE (addr));
523 stmt = gimple_build_assign (name, addr);
524 gsi_insert_on_edge_immediate (entry, stmt);
525
526 slot->uid = uid;
527 slot->to = name;
528 }
529 else
530 name = slot->to;
531
532 /* Express the address in terms of the canonical SSA name. */
533 TREE_OPERAND (*var_p, 0) = name;
534 if (gsi == NULL)
535 return build_fold_addr_expr_with_type (obj, type);
536
537 name = force_gimple_operand (build_addr (obj),
538 &stmts, true, NULL_TREE);
539 if (!gimple_seq_empty_p (stmts))
540 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
541
542 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
543 {
544 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
545 NULL_TREE);
546 if (!gimple_seq_empty_p (stmts))
547 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
548 }
549
550 return name;
551 }
552
553 static tree
554 reduc_stmt_res (gimple *stmt)
555 {
556 return (gimple_code (stmt) == GIMPLE_PHI
557 ? gimple_phi_result (stmt)
558 : gimple_assign_lhs (stmt));
559 }
560
561 /* Callback for htab_traverse. Create the initialization statement
562 for reduction described in SLOT, and place it at the preheader of
563 the loop described in DATA. */
564
565 int
566 initialize_reductions (reduction_info **slot, struct loop *loop)
567 {
568 tree init;
569 tree type, arg;
570 edge e;
571
572 struct reduction_info *const reduc = *slot;
573
574 /* Create initialization in preheader:
575 reduction_variable = initialization value of reduction. */
576
577 /* In the phi node at the header, replace the argument coming
578 from the preheader with the reduction initialization value. */
579
580 /* Initialize the reduction. */
581 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
582 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
583 reduc->reduction_code, type);
584 reduc->init = init;
585
586 /* Replace the argument representing the initialization value
587 with the initialization value for the reduction (neutral
588 element for the particular operation, e.g. 0 for PLUS_EXPR,
589 1 for MULT_EXPR, etc).
590 Keep the old value in a new variable "reduction_initial",
591 that will be taken in consideration after the parallel
592 computing is done. */
593
594 e = loop_preheader_edge (loop);
595 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
596 /* Create new variable to hold the initial value. */
597
598 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
599 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
600 reduc->initial_value = arg;
601 return 1;
602 }
603
604 struct elv_data
605 {
606 struct walk_stmt_info info;
607 edge entry;
608 int_tree_htab_type *decl_address;
609 gimple_stmt_iterator *gsi;
610 bool changed;
611 bool reset;
612 };
613
614 /* Eliminates references to local variables in *TP out of the single
615 entry single exit region starting at DTA->ENTRY.
616 DECL_ADDRESS contains addresses of the references that had their
617 address taken already. If the expression is changed, CHANGED is
618 set to true. Callback for walk_tree. */
619
620 static tree
621 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
622 {
623 struct elv_data *const dta = (struct elv_data *) data;
624 tree t = *tp, var, addr, addr_type, type, obj;
625
626 if (DECL_P (t))
627 {
628 *walk_subtrees = 0;
629
630 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
631 return NULL_TREE;
632
633 type = TREE_TYPE (t);
634 addr_type = build_pointer_type (type);
635 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
636 dta->gsi);
637 if (dta->gsi == NULL && addr == NULL_TREE)
638 {
639 dta->reset = true;
640 return NULL_TREE;
641 }
642
643 *tp = build_simple_mem_ref (addr);
644
645 dta->changed = true;
646 return NULL_TREE;
647 }
648
649 if (TREE_CODE (t) == ADDR_EXPR)
650 {
651 /* ADDR_EXPR may appear in two contexts:
652 -- as a gimple operand, when the address taken is a function invariant
653 -- as gimple rhs, when the resulting address in not a function
654 invariant
655 We do not need to do anything special in the latter case (the base of
656 the memory reference whose address is taken may be replaced in the
657 DECL_P case). The former case is more complicated, as we need to
658 ensure that the new address is still a gimple operand. Thus, it
659 is not sufficient to replace just the base of the memory reference --
660 we need to move the whole computation of the address out of the
661 loop. */
662 if (!is_gimple_val (t))
663 return NULL_TREE;
664
665 *walk_subtrees = 0;
666 obj = TREE_OPERAND (t, 0);
667 var = get_base_address (obj);
668 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
669 return NULL_TREE;
670
671 addr_type = TREE_TYPE (t);
672 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
673 dta->gsi);
674 if (dta->gsi == NULL && addr == NULL_TREE)
675 {
676 dta->reset = true;
677 return NULL_TREE;
678 }
679 *tp = addr;
680
681 dta->changed = true;
682 return NULL_TREE;
683 }
684
685 if (!EXPR_P (t))
686 *walk_subtrees = 0;
687
688 return NULL_TREE;
689 }
690
691 /* Moves the references to local variables in STMT at *GSI out of the single
692 entry single exit region starting at ENTRY. DECL_ADDRESS contains
693 addresses of the references that had their address taken
694 already. */
695
696 static void
697 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
698 int_tree_htab_type *decl_address)
699 {
700 struct elv_data dta;
701 gimple *stmt = gsi_stmt (*gsi);
702
703 memset (&dta.info, '\0', sizeof (dta.info));
704 dta.entry = entry;
705 dta.decl_address = decl_address;
706 dta.changed = false;
707 dta.reset = false;
708
709 if (gimple_debug_bind_p (stmt))
710 {
711 dta.gsi = NULL;
712 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
713 eliminate_local_variables_1, &dta.info, NULL);
714 if (dta.reset)
715 {
716 gimple_debug_bind_reset_value (stmt);
717 dta.changed = true;
718 }
719 }
720 else if (gimple_clobber_p (stmt))
721 {
722 stmt = gimple_build_nop ();
723 gsi_replace (gsi, stmt, false);
724 dta.changed = true;
725 }
726 else
727 {
728 dta.gsi = gsi;
729 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
730 }
731
732 if (dta.changed)
733 update_stmt (stmt);
734 }
735
736 /* Eliminates the references to local variables from the single entry
737 single exit region between the ENTRY and EXIT edges.
738
739 This includes:
740 1) Taking address of a local variable -- these are moved out of the
741 region (and temporary variable is created to hold the address if
742 necessary).
743
744 2) Dereferencing a local variable -- these are replaced with indirect
745 references. */
746
747 static void
748 eliminate_local_variables (edge entry, edge exit)
749 {
750 basic_block bb;
751 auto_vec<basic_block, 3> body;
752 unsigned i;
753 gimple_stmt_iterator gsi;
754 bool has_debug_stmt = false;
755 int_tree_htab_type decl_address (10);
756 basic_block entry_bb = entry->src;
757 basic_block exit_bb = exit->dest;
758
759 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
760
761 FOR_EACH_VEC_ELT (body, i, bb)
762 if (bb != entry_bb && bb != exit_bb)
763 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
764 if (is_gimple_debug (gsi_stmt (gsi)))
765 {
766 if (gimple_debug_bind_p (gsi_stmt (gsi)))
767 has_debug_stmt = true;
768 }
769 else
770 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
771
772 if (has_debug_stmt)
773 FOR_EACH_VEC_ELT (body, i, bb)
774 if (bb != entry_bb && bb != exit_bb)
775 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
776 if (gimple_debug_bind_p (gsi_stmt (gsi)))
777 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
778 }
779
780 /* Returns true if expression EXPR is not defined between ENTRY and
781 EXIT, i.e. if all its operands are defined outside of the region. */
782
783 static bool
784 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
785 {
786 basic_block entry_bb = entry->src;
787 basic_block exit_bb = exit->dest;
788 basic_block def_bb;
789
790 if (is_gimple_min_invariant (expr))
791 return true;
792
793 if (TREE_CODE (expr) == SSA_NAME)
794 {
795 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
796 if (def_bb
797 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
798 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
799 return false;
800
801 return true;
802 }
803
804 return false;
805 }
806
807 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
808 The copies are stored to NAME_COPIES, if NAME was already duplicated,
809 its duplicate stored in NAME_COPIES is returned.
810
811 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
812 duplicated, storing the copies in DECL_COPIES. */
813
814 static tree
815 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
816 int_tree_htab_type *decl_copies,
817 bool copy_name_p)
818 {
819 tree copy, var, var_copy;
820 unsigned idx, uid, nuid;
821 struct int_tree_map ielt;
822 struct name_to_copy_elt elt, *nelt;
823 name_to_copy_elt **slot;
824 int_tree_map *dslot;
825
826 if (TREE_CODE (name) != SSA_NAME)
827 return name;
828
829 idx = SSA_NAME_VERSION (name);
830 elt.version = idx;
831 slot = name_copies->find_slot_with_hash (&elt, idx,
832 copy_name_p ? INSERT : NO_INSERT);
833 if (slot && *slot)
834 return (*slot)->new_name;
835
836 if (copy_name_p)
837 {
838 copy = duplicate_ssa_name (name, NULL);
839 nelt = XNEW (struct name_to_copy_elt);
840 nelt->version = idx;
841 nelt->new_name = copy;
842 nelt->field = NULL_TREE;
843 *slot = nelt;
844 }
845 else
846 {
847 gcc_assert (!slot);
848 copy = name;
849 }
850
851 var = SSA_NAME_VAR (name);
852 if (!var)
853 return copy;
854
855 uid = DECL_UID (var);
856 ielt.uid = uid;
857 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
858 if (!dslot->to)
859 {
860 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
861 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
862 dslot->uid = uid;
863 dslot->to = var_copy;
864
865 /* Ensure that when we meet this decl next time, we won't duplicate
866 it again. */
867 nuid = DECL_UID (var_copy);
868 ielt.uid = nuid;
869 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
870 gcc_assert (!dslot->to);
871 dslot->uid = nuid;
872 dslot->to = var_copy;
873 }
874 else
875 var_copy = dslot->to;
876
877 replace_ssa_name_symbol (copy, var_copy);
878 return copy;
879 }
880
881 /* Finds the ssa names used in STMT that are defined outside the
882 region between ENTRY and EXIT and replaces such ssa names with
883 their duplicates. The duplicates are stored to NAME_COPIES. Base
884 decls of all ssa names used in STMT (including those defined in
885 LOOP) are replaced with the new temporary variables; the
886 replacement decls are stored in DECL_COPIES. */
887
888 static void
889 separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
890 name_to_copy_table_type *name_copies,
891 int_tree_htab_type *decl_copies)
892 {
893 use_operand_p use;
894 def_operand_p def;
895 ssa_op_iter oi;
896 tree name, copy;
897 bool copy_name_p;
898
899 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
900 {
901 name = DEF_FROM_PTR (def);
902 gcc_assert (TREE_CODE (name) == SSA_NAME);
903 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
904 false);
905 gcc_assert (copy == name);
906 }
907
908 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
909 {
910 name = USE_FROM_PTR (use);
911 if (TREE_CODE (name) != SSA_NAME)
912 continue;
913
914 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
915 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
916 copy_name_p);
917 SET_USE (use, copy);
918 }
919 }
920
921 /* Finds the ssa names used in STMT that are defined outside the
922 region between ENTRY and EXIT and replaces such ssa names with
923 their duplicates. The duplicates are stored to NAME_COPIES. Base
924 decls of all ssa names used in STMT (including those defined in
925 LOOP) are replaced with the new temporary variables; the
926 replacement decls are stored in DECL_COPIES. */
927
928 static bool
929 separate_decls_in_region_debug (gimple *stmt,
930 name_to_copy_table_type *name_copies,
931 int_tree_htab_type *decl_copies)
932 {
933 use_operand_p use;
934 ssa_op_iter oi;
935 tree var, name;
936 struct int_tree_map ielt;
937 struct name_to_copy_elt elt;
938 name_to_copy_elt **slot;
939 int_tree_map *dslot;
940
941 if (gimple_debug_bind_p (stmt))
942 var = gimple_debug_bind_get_var (stmt);
943 else if (gimple_debug_source_bind_p (stmt))
944 var = gimple_debug_source_bind_get_var (stmt);
945 else
946 return true;
947 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
948 return true;
949 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
950 ielt.uid = DECL_UID (var);
951 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
952 if (!dslot)
953 return true;
954 if (gimple_debug_bind_p (stmt))
955 gimple_debug_bind_set_var (stmt, dslot->to);
956 else if (gimple_debug_source_bind_p (stmt))
957 gimple_debug_source_bind_set_var (stmt, dslot->to);
958
959 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
960 {
961 name = USE_FROM_PTR (use);
962 if (TREE_CODE (name) != SSA_NAME)
963 continue;
964
965 elt.version = SSA_NAME_VERSION (name);
966 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
967 if (!slot)
968 {
969 gimple_debug_bind_reset_value (stmt);
970 update_stmt (stmt);
971 break;
972 }
973
974 SET_USE (use, (*slot)->new_name);
975 }
976
977 return false;
978 }
979
980 /* Callback for htab_traverse. Adds a field corresponding to the reduction
981 specified in SLOT. The type is passed in DATA. */
982
983 int
984 add_field_for_reduction (reduction_info **slot, tree type)
985 {
986
987 struct reduction_info *const red = *slot;
988 tree var = reduc_stmt_res (red->reduc_stmt);
989 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
990 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
991
992 insert_field_into_struct (type, field);
993
994 red->field = field;
995
996 return 1;
997 }
998
999 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1000 described in SLOT. The type is passed in DATA. */
1001
1002 int
1003 add_field_for_name (name_to_copy_elt **slot, tree type)
1004 {
1005 struct name_to_copy_elt *const elt = *slot;
1006 tree name = ssa_name (elt->version);
1007 tree field = build_decl (UNKNOWN_LOCATION,
1008 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1009 TREE_TYPE (name));
1010
1011 insert_field_into_struct (type, field);
1012 elt->field = field;
1013
1014 return 1;
1015 }
1016
1017 /* Callback for htab_traverse. A local result is the intermediate result
1018 computed by a single
1019 thread, or the initial value in case no iteration was executed.
1020 This function creates a phi node reflecting these values.
1021 The phi's result will be stored in NEW_PHI field of the
1022 reduction's data structure. */
1023
1024 int
1025 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1026 {
1027 struct reduction_info *const reduc = *slot;
1028 edge e;
1029 gphi *new_phi;
1030 basic_block store_bb, continue_bb;
1031 tree local_res;
1032 source_location locus;
1033
1034 /* STORE_BB is the block where the phi
1035 should be stored. It is the destination of the loop exit.
1036 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1037 continue_bb = single_pred (loop->latch);
1038 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1039
1040 /* STORE_BB has two predecessors. One coming from the loop
1041 (the reduction's result is computed at the loop),
1042 and another coming from a block preceding the loop,
1043 when no iterations
1044 are executed (the initial value should be taken). */
1045 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1046 e = EDGE_PRED (store_bb, 1);
1047 else
1048 e = EDGE_PRED (store_bb, 0);
1049 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1050 local_res = copy_ssa_name (lhs);
1051 locus = gimple_location (reduc->reduc_stmt);
1052 new_phi = create_phi_node (local_res, store_bb);
1053 add_phi_arg (new_phi, reduc->init, e, locus);
1054 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1055 reduc->new_phi = new_phi;
1056
1057 return 1;
1058 }
1059
1060 struct clsn_data
1061 {
1062 tree store;
1063 tree load;
1064
1065 basic_block store_bb;
1066 basic_block load_bb;
1067 };
1068
1069 /* Callback for htab_traverse. Create an atomic instruction for the
1070 reduction described in SLOT.
1071 DATA annotates the place in memory the atomic operation relates to,
1072 and the basic block it needs to be generated in. */
1073
1074 int
1075 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1076 {
1077 struct reduction_info *const reduc = *slot;
1078 gimple_stmt_iterator gsi;
1079 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1080 tree load_struct;
1081 basic_block bb;
1082 basic_block new_bb;
1083 edge e;
1084 tree t, addr, ref, x;
1085 tree tmp_load, name;
1086 gimple *load;
1087
1088 load_struct = build_simple_mem_ref (clsn_data->load);
1089 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1090
1091 addr = build_addr (t);
1092
1093 /* Create phi node. */
1094 bb = clsn_data->load_bb;
1095
1096 gsi = gsi_last_bb (bb);
1097 e = split_block (bb, gsi_stmt (gsi));
1098 new_bb = e->dest;
1099
1100 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1101 tmp_load = make_ssa_name (tmp_load);
1102 load = gimple_build_omp_atomic_load (tmp_load, addr);
1103 SSA_NAME_DEF_STMT (tmp_load) = load;
1104 gsi = gsi_start_bb (new_bb);
1105 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1106
1107 e = split_block (new_bb, load);
1108 new_bb = e->dest;
1109 gsi = gsi_start_bb (new_bb);
1110 ref = tmp_load;
1111 x = fold_build2 (reduc->reduction_code,
1112 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1113 PHI_RESULT (reduc->new_phi));
1114
1115 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1116 GSI_CONTINUE_LINKING);
1117
1118 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1119 return 1;
1120 }
1121
1122 /* Create the atomic operation at the join point of the threads.
1123 REDUCTION_LIST describes the reductions in the LOOP.
1124 LD_ST_DATA describes the shared data structure where
1125 shared data is stored in and loaded from. */
1126 static void
1127 create_call_for_reduction (struct loop *loop,
1128 reduction_info_table_type *reduction_list,
1129 struct clsn_data *ld_st_data)
1130 {
1131 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1132 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1133 basic_block continue_bb = single_pred (loop->latch);
1134 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1135 reduction_list
1136 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1137 }
1138
1139 /* Callback for htab_traverse. Loads the final reduction value at the
1140 join point of all threads, and inserts it in the right place. */
1141
1142 int
1143 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1144 {
1145 struct reduction_info *const red = *slot;
1146 gimple *stmt;
1147 gimple_stmt_iterator gsi;
1148 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1149 tree load_struct;
1150 tree name;
1151 tree x;
1152
1153 /* If there's no exit phi, the result of the reduction is unused. */
1154 if (red->keep_res == NULL)
1155 return 1;
1156
1157 gsi = gsi_after_labels (clsn_data->load_bb);
1158 load_struct = build_simple_mem_ref (clsn_data->load);
1159 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1160 NULL_TREE);
1161
1162 x = load_struct;
1163 name = PHI_RESULT (red->keep_res);
1164 stmt = gimple_build_assign (name, x);
1165
1166 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1167
1168 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1169 !gsi_end_p (gsi); gsi_next (&gsi))
1170 if (gsi_stmt (gsi) == red->keep_res)
1171 {
1172 remove_phi_node (&gsi, false);
1173 return 1;
1174 }
1175 gcc_unreachable ();
1176 }
1177
1178 /* Load the reduction result that was stored in LD_ST_DATA.
1179 REDUCTION_LIST describes the list of reductions that the
1180 loads should be generated for. */
1181 static void
1182 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1183 struct clsn_data *ld_st_data)
1184 {
1185 gimple_stmt_iterator gsi;
1186 tree t;
1187 gimple *stmt;
1188
1189 gsi = gsi_after_labels (ld_st_data->load_bb);
1190 t = build_fold_addr_expr (ld_st_data->store);
1191 stmt = gimple_build_assign (ld_st_data->load, t);
1192
1193 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1194
1195 reduction_list
1196 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1197
1198 }
1199
1200 /* Callback for htab_traverse. Store the neutral value for the
1201 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1202 1 for MULT_EXPR, etc. into the reduction field.
1203 The reduction is specified in SLOT. The store information is
1204 passed in DATA. */
1205
1206 int
1207 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1208 {
1209 struct reduction_info *const red = *slot;
1210 tree t;
1211 gimple *stmt;
1212 gimple_stmt_iterator gsi;
1213 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1214
1215 gsi = gsi_last_bb (clsn_data->store_bb);
1216 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1217 stmt = gimple_build_assign (t, red->initial_value);
1218 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1219
1220 return 1;
1221 }
1222
1223 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1224 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1225 specified in SLOT. */
1226
1227 int
1228 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1229 struct clsn_data *clsn_data)
1230 {
1231 struct name_to_copy_elt *const elt = *slot;
1232 tree t;
1233 gimple *stmt;
1234 gimple_stmt_iterator gsi;
1235 tree type = TREE_TYPE (elt->new_name);
1236 tree load_struct;
1237
1238 gsi = gsi_last_bb (clsn_data->store_bb);
1239 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1240 stmt = gimple_build_assign (t, ssa_name (elt->version));
1241 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1242
1243 gsi = gsi_last_bb (clsn_data->load_bb);
1244 load_struct = build_simple_mem_ref (clsn_data->load);
1245 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1246 stmt = gimple_build_assign (elt->new_name, t);
1247 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1248
1249 return 1;
1250 }
1251
1252 /* Moves all the variables used in LOOP and defined outside of it (including
1253 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1254 name) to a structure created for this purpose. The code
1255
1256 while (1)
1257 {
1258 use (a);
1259 use (b);
1260 }
1261
1262 is transformed this way:
1263
1264 bb0:
1265 old.a = a;
1266 old.b = b;
1267
1268 bb1:
1269 a' = new->a;
1270 b' = new->b;
1271 while (1)
1272 {
1273 use (a');
1274 use (b');
1275 }
1276
1277 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1278 pointer `new' is intentionally not initialized (the loop will be split to a
1279 separate function later, and `new' will be initialized from its arguments).
1280 LD_ST_DATA holds information about the shared data structure used to pass
1281 information among the threads. It is initialized here, and
1282 gen_parallel_loop will pass it to create_call_for_reduction that
1283 needs this information. REDUCTION_LIST describes the reductions
1284 in LOOP. */
1285
1286 static void
1287 separate_decls_in_region (edge entry, edge exit,
1288 reduction_info_table_type *reduction_list,
1289 tree *arg_struct, tree *new_arg_struct,
1290 struct clsn_data *ld_st_data)
1291
1292 {
1293 basic_block bb1 = split_edge (entry);
1294 basic_block bb0 = single_pred (bb1);
1295 name_to_copy_table_type name_copies (10);
1296 int_tree_htab_type decl_copies (10);
1297 unsigned i;
1298 tree type, type_name, nvar;
1299 gimple_stmt_iterator gsi;
1300 struct clsn_data clsn_data;
1301 auto_vec<basic_block, 3> body;
1302 basic_block bb;
1303 basic_block entry_bb = bb1;
1304 basic_block exit_bb = exit->dest;
1305 bool has_debug_stmt = false;
1306
1307 entry = single_succ_edge (entry_bb);
1308 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1309
1310 FOR_EACH_VEC_ELT (body, i, bb)
1311 {
1312 if (bb != entry_bb && bb != exit_bb)
1313 {
1314 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1315 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1316 &name_copies, &decl_copies);
1317
1318 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1319 {
1320 gimple *stmt = gsi_stmt (gsi);
1321
1322 if (is_gimple_debug (stmt))
1323 has_debug_stmt = true;
1324 else
1325 separate_decls_in_region_stmt (entry, exit, stmt,
1326 &name_copies, &decl_copies);
1327 }
1328 }
1329 }
1330
1331 /* Now process debug bind stmts. We must not create decls while
1332 processing debug stmts, so we defer their processing so as to
1333 make sure we will have debug info for as many variables as
1334 possible (all of those that were dealt with in the loop above),
1335 and discard those for which we know there's nothing we can
1336 do. */
1337 if (has_debug_stmt)
1338 FOR_EACH_VEC_ELT (body, i, bb)
1339 if (bb != entry_bb && bb != exit_bb)
1340 {
1341 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1342 {
1343 gimple *stmt = gsi_stmt (gsi);
1344
1345 if (is_gimple_debug (stmt))
1346 {
1347 if (separate_decls_in_region_debug (stmt, &name_copies,
1348 &decl_copies))
1349 {
1350 gsi_remove (&gsi, true);
1351 continue;
1352 }
1353 }
1354
1355 gsi_next (&gsi);
1356 }
1357 }
1358
1359 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1360 {
1361 /* It may happen that there is nothing to copy (if there are only
1362 loop carried and external variables in the loop). */
1363 *arg_struct = NULL;
1364 *new_arg_struct = NULL;
1365 }
1366 else
1367 {
1368 /* Create the type for the structure to store the ssa names to. */
1369 type = lang_hooks.types.make_type (RECORD_TYPE);
1370 type_name = build_decl (UNKNOWN_LOCATION,
1371 TYPE_DECL, create_tmp_var_name (".paral_data"),
1372 type);
1373 TYPE_NAME (type) = type_name;
1374
1375 name_copies.traverse <tree, add_field_for_name> (type);
1376 if (reduction_list && reduction_list->elements () > 0)
1377 {
1378 /* Create the fields for reductions. */
1379 reduction_list->traverse <tree, add_field_for_reduction> (type);
1380 }
1381 layout_type (type);
1382
1383 /* Create the loads and stores. */
1384 *arg_struct = create_tmp_var (type, ".paral_data_store");
1385 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1386 *new_arg_struct = make_ssa_name (nvar);
1387
1388 ld_st_data->store = *arg_struct;
1389 ld_st_data->load = *new_arg_struct;
1390 ld_st_data->store_bb = bb0;
1391 ld_st_data->load_bb = bb1;
1392
1393 name_copies
1394 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1395 (ld_st_data);
1396
1397 /* Load the calculation from memory (after the join of the threads). */
1398
1399 if (reduction_list && reduction_list->elements () > 0)
1400 {
1401 reduction_list
1402 ->traverse <struct clsn_data *, create_stores_for_reduction>
1403 (ld_st_data);
1404 clsn_data.load = make_ssa_name (nvar);
1405 clsn_data.load_bb = exit->dest;
1406 clsn_data.store = ld_st_data->store;
1407 create_final_loads_for_reduction (reduction_list, &clsn_data);
1408 }
1409 }
1410 }
1411
1412 /* Returns true if FN was created to run in parallel. */
1413
1414 bool
1415 parallelized_function_p (tree fndecl)
1416 {
1417 cgraph_node *node = cgraph_node::get (fndecl);
1418 gcc_assert (node != NULL);
1419 return node->parallelized_function;
1420 }
1421
1422 /* Creates and returns an empty function that will receive the body of
1423 a parallelized loop. */
1424
1425 static tree
1426 create_loop_fn (location_t loc)
1427 {
1428 char buf[100];
1429 char *tname;
1430 tree decl, type, name, t;
1431 struct function *act_cfun = cfun;
1432 static unsigned loopfn_num;
1433
1434 loc = LOCATION_LOCUS (loc);
1435 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1436 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1437 clean_symbol_name (tname);
1438 name = get_identifier (tname);
1439 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1440
1441 decl = build_decl (loc, FUNCTION_DECL, name, type);
1442 TREE_STATIC (decl) = 1;
1443 TREE_USED (decl) = 1;
1444 DECL_ARTIFICIAL (decl) = 1;
1445 DECL_IGNORED_P (decl) = 0;
1446 TREE_PUBLIC (decl) = 0;
1447 DECL_UNINLINABLE (decl) = 1;
1448 DECL_EXTERNAL (decl) = 0;
1449 DECL_CONTEXT (decl) = NULL_TREE;
1450 DECL_INITIAL (decl) = make_node (BLOCK);
1451
1452 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1453 DECL_ARTIFICIAL (t) = 1;
1454 DECL_IGNORED_P (t) = 1;
1455 DECL_RESULT (decl) = t;
1456
1457 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1458 ptr_type_node);
1459 DECL_ARTIFICIAL (t) = 1;
1460 DECL_ARG_TYPE (t) = ptr_type_node;
1461 DECL_CONTEXT (t) = decl;
1462 TREE_USED (t) = 1;
1463 DECL_ARGUMENTS (decl) = t;
1464
1465 allocate_struct_function (decl, false);
1466
1467 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1468 it. */
1469 set_cfun (act_cfun);
1470
1471 return decl;
1472 }
1473
1474 /* Replace uses of NAME by VAL in block BB. */
1475
1476 static void
1477 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1478 {
1479 gimple *use_stmt;
1480 imm_use_iterator imm_iter;
1481
1482 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1483 {
1484 if (gimple_bb (use_stmt) != bb)
1485 continue;
1486
1487 use_operand_p use_p;
1488 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1489 SET_USE (use_p, val);
1490 }
1491 }
1492
1493 /* Do transformation from:
1494
1495 <bb preheader>:
1496 ...
1497 goto <bb header>
1498
1499 <bb header>:
1500 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1501 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1502 ...
1503 use (ivtmp_a)
1504 ...
1505 sum_b = sum_a + sum_update
1506 ...
1507 if (ivtmp_a < n)
1508 goto <bb latch>;
1509 else
1510 goto <bb exit>;
1511
1512 <bb latch>:
1513 ivtmp_b = ivtmp_a + 1;
1514 goto <bb header>
1515
1516 <bb exit>:
1517 sum_z = PHI <sum_b (cond[1]), ...>
1518
1519 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1520 that's <bb header>.
1521
1522 to:
1523
1524 <bb preheader>:
1525 ...
1526 goto <bb newheader>
1527
1528 <bb header>:
1529 ivtmp_a = PHI <ivtmp_c (latch)>
1530 sum_a = PHI <sum_c (latch)>
1531 ...
1532 use (ivtmp_a)
1533 ...
1534 sum_b = sum_a + sum_update
1535 ...
1536 goto <bb latch>;
1537
1538 <bb newheader>:
1539 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1540 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1541 if (ivtmp_c < n + 1)
1542 goto <bb header>;
1543 else
1544 goto <bb newexit>;
1545
1546 <bb latch>:
1547 ivtmp_b = ivtmp_a + 1;
1548 goto <bb newheader>
1549
1550 <bb newexit>:
1551 sum_y = PHI <sum_c (newheader)>
1552
1553 <bb exit>:
1554 sum_z = PHI <sum_y (newexit), ...>
1555
1556
1557 In unified diff format:
1558
1559 <bb preheader>:
1560 ...
1561 - goto <bb header>
1562 + goto <bb newheader>
1563
1564 <bb header>:
1565 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1566 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1567 + ivtmp_a = PHI <ivtmp_c (latch)>
1568 + sum_a = PHI <sum_c (latch)>
1569 ...
1570 use (ivtmp_a)
1571 ...
1572 sum_b = sum_a + sum_update
1573 ...
1574 - if (ivtmp_a < n)
1575 - goto <bb latch>;
1576 + goto <bb latch>;
1577 +
1578 + <bb newheader>:
1579 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1580 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1581 + if (ivtmp_c < n + 1)
1582 + goto <bb header>;
1583 else
1584 goto <bb exit>;
1585
1586 <bb latch>:
1587 ivtmp_b = ivtmp_a + 1;
1588 - goto <bb header>
1589 + goto <bb newheader>
1590
1591 + <bb newexit>:
1592 + sum_y = PHI <sum_c (newheader)>
1593
1594 <bb exit>:
1595 - sum_z = PHI <sum_b (cond[1]), ...>
1596 + sum_z = PHI <sum_y (newexit), ...>
1597
1598 Note: the example does not show any virtual phis, but these are handled more
1599 or less as reductions.
1600
1601
1602 Moves the exit condition of LOOP to the beginning of its header.
1603 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1604 bound. */
1605
1606 static void
1607 transform_to_exit_first_loop_alt (struct loop *loop,
1608 reduction_info_table_type *reduction_list,
1609 tree bound)
1610 {
1611 basic_block header = loop->header;
1612 basic_block latch = loop->latch;
1613 edge exit = single_dom_exit (loop);
1614 basic_block exit_block = exit->dest;
1615 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1616 tree control = gimple_cond_lhs (cond_stmt);
1617 edge e;
1618
1619 /* Rewriting virtuals into loop-closed ssa normal form makes this
1620 transformation simpler. It also ensures that the virtuals are in
1621 loop-closed ssa normal from after the transformation, which is required by
1622 create_parallel_loop. */
1623 rewrite_virtuals_into_loop_closed_ssa (loop);
1624
1625 /* Create the new_header block. */
1626 basic_block new_header = split_block_before_cond_jump (exit->src);
1627 edge edge_at_split = single_pred_edge (new_header);
1628
1629 /* Redirect entry edge to new_header. */
1630 edge entry = loop_preheader_edge (loop);
1631 e = redirect_edge_and_branch (entry, new_header);
1632 gcc_assert (e == entry);
1633
1634 /* Redirect post_inc_edge to new_header. */
1635 edge post_inc_edge = single_succ_edge (latch);
1636 e = redirect_edge_and_branch (post_inc_edge, new_header);
1637 gcc_assert (e == post_inc_edge);
1638
1639 /* Redirect post_cond_edge to header. */
1640 edge post_cond_edge = single_pred_edge (latch);
1641 e = redirect_edge_and_branch (post_cond_edge, header);
1642 gcc_assert (e == post_cond_edge);
1643
1644 /* Redirect edge_at_split to latch. */
1645 e = redirect_edge_and_branch (edge_at_split, latch);
1646 gcc_assert (e == edge_at_split);
1647
1648 /* Set the new loop bound. */
1649 gimple_cond_set_rhs (cond_stmt, bound);
1650 update_stmt (cond_stmt);
1651
1652 /* Repair the ssa. */
1653 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1654 edge_var_map *vm;
1655 gphi_iterator gsi;
1656 int i;
1657 for (gsi = gsi_start_phis (header), i = 0;
1658 !gsi_end_p (gsi) && v->iterate (i, &vm);
1659 gsi_next (&gsi), i++)
1660 {
1661 gphi *phi = gsi.phi ();
1662 tree res_a = PHI_RESULT (phi);
1663
1664 /* Create new phi. */
1665 tree res_c = copy_ssa_name (res_a, phi);
1666 gphi *nphi = create_phi_node (res_c, new_header);
1667
1668 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1669 replace_uses_in_bb_by (res_a, res_c, new_header);
1670
1671 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1672 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1673
1674 /* Replace sum_b with sum_c in exit phi. */
1675 tree res_b = redirect_edge_var_map_def (vm);
1676 replace_uses_in_bb_by (res_b, res_c, exit_block);
1677
1678 struct reduction_info *red = reduction_phi (reduction_list, phi);
1679 gcc_assert (virtual_operand_p (res_a)
1680 || res_a == control
1681 || red != NULL);
1682
1683 if (red)
1684 {
1685 /* Register the new reduction phi. */
1686 red->reduc_phi = nphi;
1687 gimple_set_uid (red->reduc_phi, red->reduc_version);
1688 }
1689 }
1690 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1691
1692 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1693 flush_pending_stmts (entry);
1694
1695 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1696 flush_pending_stmts (post_inc_edge);
1697
1698
1699 basic_block new_exit_block = NULL;
1700 if (!single_pred_p (exit->dest))
1701 {
1702 /* Create a new empty exit block, inbetween the new loop header and the
1703 old exit block. The function separate_decls_in_region needs this block
1704 to insert code that is active on loop exit, but not any other path. */
1705 new_exit_block = split_edge (exit);
1706 }
1707
1708 /* Insert and register the reduction exit phis. */
1709 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1710 !gsi_end_p (gsi);
1711 gsi_next (&gsi))
1712 {
1713 gphi *phi = gsi.phi ();
1714 gphi *nphi = NULL;
1715 tree res_z = PHI_RESULT (phi);
1716 tree res_c;
1717
1718 if (new_exit_block != NULL)
1719 {
1720 /* Now that we have a new exit block, duplicate the phi of the old
1721 exit block in the new exit block to preserve loop-closed ssa. */
1722 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1723 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1724 tree res_y = copy_ssa_name (res_z, phi);
1725 nphi = create_phi_node (res_y, new_exit_block);
1726 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1727 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1728 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1729 }
1730 else
1731 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1732
1733 if (virtual_operand_p (res_z))
1734 continue;
1735
1736 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
1737 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1738 if (red != NULL)
1739 red->keep_res = (nphi != NULL
1740 ? nphi
1741 : phi);
1742 }
1743
1744 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1745 then we're still using some fields, so only bother about fields that are
1746 still used: header and latch.
1747 The loop has a new header bb, so we update it. The latch bb stays the
1748 same. */
1749 loop->header = new_header;
1750
1751 /* Recalculate dominance info. */
1752 free_dominance_info (CDI_DOMINATORS);
1753 calculate_dominance_info (CDI_DOMINATORS);
1754
1755 checking_verify_ssa (true, true);
1756 }
1757
1758 /* Tries to moves the exit condition of LOOP to the beginning of its header
1759 without duplication of the loop body. NIT is the number of iterations of the
1760 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1761 transformation is successful. */
1762
1763 static bool
1764 try_transform_to_exit_first_loop_alt (struct loop *loop,
1765 reduction_info_table_type *reduction_list,
1766 tree nit)
1767 {
1768 /* Check whether the latch contains a single statement. */
1769 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1770 return false;
1771
1772 /* Check whether the latch contains no phis. */
1773 if (phi_nodes (loop->latch) != NULL)
1774 return false;
1775
1776 /* Check whether the latch contains the loop iv increment. */
1777 edge back = single_succ_edge (loop->latch);
1778 edge exit = single_dom_exit (loop);
1779 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1780 tree control = gimple_cond_lhs (cond_stmt);
1781 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1782 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1783 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1784 return false;
1785
1786 /* Check whether there's no code between the loop condition and the latch. */
1787 if (!single_pred_p (loop->latch)
1788 || single_pred (loop->latch) != exit->src)
1789 return false;
1790
1791 tree alt_bound = NULL_TREE;
1792 tree nit_type = TREE_TYPE (nit);
1793
1794 /* Figure out whether nit + 1 overflows. */
1795 if (TREE_CODE (nit) == INTEGER_CST)
1796 {
1797 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1798 {
1799 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1800 nit, build_one_cst (nit_type));
1801
1802 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1803 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1804 return true;
1805 }
1806 else
1807 {
1808 /* Todo: Figure out if we can trigger this, if it's worth to handle
1809 optimally, and if we can handle it optimally. */
1810 return false;
1811 }
1812 }
1813
1814 gcc_assert (TREE_CODE (nit) == SSA_NAME);
1815
1816 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1817 iv with base 0 and step 1 that is incremented in the latch, like this:
1818
1819 <bb header>:
1820 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1821 ...
1822 if (iv_1 < nit)
1823 goto <bb latch>;
1824 else
1825 goto <bb exit>;
1826
1827 <bb latch>:
1828 iv_2 = iv_1 + 1;
1829 goto <bb header>;
1830
1831 The range of iv_1 is [0, nit]. The latch edge is taken for
1832 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1833 number of latch executions is equal to nit.
1834
1835 The function max_loop_iterations gives us the maximum number of latch
1836 executions, so it gives us the maximum value of nit. */
1837 widest_int nit_max;
1838 if (!max_loop_iterations (loop, &nit_max))
1839 return false;
1840
1841 /* Check if nit + 1 overflows. */
1842 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1843 if (!wi::lts_p (nit_max, type_max))
1844 return false;
1845
1846 gimple *def = SSA_NAME_DEF_STMT (nit);
1847
1848 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1849 if (def
1850 && is_gimple_assign (def)
1851 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1852 {
1853 tree op1 = gimple_assign_rhs1 (def);
1854 tree op2 = gimple_assign_rhs2 (def);
1855 if (integer_minus_onep (op1))
1856 alt_bound = op2;
1857 else if (integer_minus_onep (op2))
1858 alt_bound = op1;
1859 }
1860
1861 /* If not found, insert nit + 1. */
1862 if (alt_bound == NULL_TREE)
1863 {
1864 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1865 build_int_cst_type (nit_type, 1));
1866
1867 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1868
1869 alt_bound
1870 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1871 GSI_CONTINUE_LINKING);
1872 }
1873
1874 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1875 return true;
1876 }
1877
1878 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1879 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1880 LOOP. */
1881
1882 static void
1883 transform_to_exit_first_loop (struct loop *loop,
1884 reduction_info_table_type *reduction_list,
1885 tree nit)
1886 {
1887 basic_block *bbs, *nbbs, ex_bb, orig_header;
1888 unsigned n;
1889 bool ok;
1890 edge exit = single_dom_exit (loop), hpred;
1891 tree control, control_name, res, t;
1892 gphi *phi, *nphi;
1893 gassign *stmt;
1894 gcond *cond_stmt, *cond_nit;
1895 tree nit_1;
1896
1897 split_block_after_labels (loop->header);
1898 orig_header = single_succ (loop->header);
1899 hpred = single_succ_edge (loop->header);
1900
1901 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1902 control = gimple_cond_lhs (cond_stmt);
1903 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1904
1905 /* Make sure that we have phi nodes on exit for all loop header phis
1906 (create_parallel_loop requires that). */
1907 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1908 !gsi_end_p (gsi);
1909 gsi_next (&gsi))
1910 {
1911 phi = gsi.phi ();
1912 res = PHI_RESULT (phi);
1913 t = copy_ssa_name (res, phi);
1914 SET_PHI_RESULT (phi, t);
1915 nphi = create_phi_node (res, orig_header);
1916 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1917
1918 if (res == control)
1919 {
1920 gimple_cond_set_lhs (cond_stmt, t);
1921 update_stmt (cond_stmt);
1922 control = t;
1923 }
1924 }
1925
1926 bbs = get_loop_body_in_dom_order (loop);
1927
1928 for (n = 0; bbs[n] != exit->src; n++)
1929 continue;
1930 nbbs = XNEWVEC (basic_block, n);
1931 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1932 bbs + 1, n, nbbs);
1933 gcc_assert (ok);
1934 free (bbs);
1935 ex_bb = nbbs[0];
1936 free (nbbs);
1937
1938 /* Other than reductions, the only gimple reg that should be copied
1939 out of the loop is the control variable. */
1940 exit = single_dom_exit (loop);
1941 control_name = NULL_TREE;
1942 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1943 !gsi_end_p (gsi); )
1944 {
1945 phi = gsi.phi ();
1946 res = PHI_RESULT (phi);
1947 if (virtual_operand_p (res))
1948 {
1949 gsi_next (&gsi);
1950 continue;
1951 }
1952
1953 /* Check if it is a part of reduction. If it is,
1954 keep the phi at the reduction's keep_res field. The
1955 PHI_RESULT of this phi is the resulting value of the reduction
1956 variable when exiting the loop. */
1957
1958 if (reduction_list->elements () > 0)
1959 {
1960 struct reduction_info *red;
1961
1962 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1963 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1964 if (red)
1965 {
1966 red->keep_res = phi;
1967 gsi_next (&gsi);
1968 continue;
1969 }
1970 }
1971 gcc_assert (control_name == NULL_TREE
1972 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1973 control_name = res;
1974 remove_phi_node (&gsi, false);
1975 }
1976 gcc_assert (control_name != NULL_TREE);
1977
1978 /* Initialize the control variable to number of iterations
1979 according to the rhs of the exit condition. */
1980 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1981 cond_nit = as_a <gcond *> (last_stmt (exit->src));
1982 nit_1 = gimple_cond_rhs (cond_nit);
1983 nit_1 = force_gimple_operand_gsi (&gsi,
1984 fold_convert (TREE_TYPE (control_name), nit_1),
1985 false, NULL_TREE, false, GSI_SAME_STMT);
1986 stmt = gimple_build_assign (control_name, nit_1);
1987 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1988 }
1989
1990 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1991 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1992 NEW_DATA is the variable that should be initialized from the argument
1993 of LOOP_FN. N_THREADS is the requested number of threads. */
1994
1995 static void
1996 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1997 tree new_data, unsigned n_threads, location_t loc)
1998 {
1999 gimple_stmt_iterator gsi;
2000 basic_block bb, paral_bb, for_bb, ex_bb, continue_bb;
2001 tree t, param;
2002 gomp_parallel *omp_par_stmt;
2003 gimple *omp_return_stmt1, *omp_return_stmt2;
2004 gimple *phi;
2005 gcond *cond_stmt;
2006 gomp_for *for_stmt;
2007 gomp_continue *omp_cont_stmt;
2008 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2009 edge exit, nexit, guard, end, e;
2010
2011 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2012 bb = loop_preheader_edge (loop)->src;
2013 paral_bb = single_pred (bb);
2014 gsi = gsi_last_bb (paral_bb);
2015
2016 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2017 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2018 = build_int_cst (integer_type_node, n_threads);
2019 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2020 gimple_set_location (omp_par_stmt, loc);
2021
2022 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2023
2024 /* Initialize NEW_DATA. */
2025 if (data)
2026 {
2027 gassign *assign_stmt;
2028
2029 gsi = gsi_after_labels (bb);
2030
2031 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2032 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2033 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2034
2035 assign_stmt = gimple_build_assign (new_data,
2036 fold_convert (TREE_TYPE (new_data), param));
2037 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2038 }
2039
2040 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2041 bb = split_loop_exit_edge (single_dom_exit (loop));
2042 gsi = gsi_last_bb (bb);
2043 omp_return_stmt1 = gimple_build_omp_return (false);
2044 gimple_set_location (omp_return_stmt1, loc);
2045 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2046
2047 /* Extract data for GIMPLE_OMP_FOR. */
2048 gcc_assert (loop->header == single_dom_exit (loop)->src);
2049 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2050
2051 cvar = gimple_cond_lhs (cond_stmt);
2052 cvar_base = SSA_NAME_VAR (cvar);
2053 phi = SSA_NAME_DEF_STMT (cvar);
2054 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2055 initvar = copy_ssa_name (cvar);
2056 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2057 initvar);
2058 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2059
2060 gsi = gsi_last_nondebug_bb (loop->latch);
2061 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2062 gsi_remove (&gsi, true);
2063
2064 /* Prepare cfg. */
2065 for_bb = split_edge (loop_preheader_edge (loop));
2066 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2067 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2068 gcc_assert (exit == single_dom_exit (loop));
2069
2070 guard = make_edge (for_bb, ex_bb, 0);
2071 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2072 loop->latch = split_edge (single_succ_edge (loop->latch));
2073 single_pred_edge (loop->latch)->flags = 0;
2074 end = make_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2075 rescan_loop_exit (end, true, false);
2076
2077 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2078 !gsi_end_p (gpi); gsi_next (&gpi))
2079 {
2080 source_location locus;
2081 gphi *phi = gpi.phi ();
2082 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2083 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2084
2085 /* If the exit phi is not connected to a header phi in the same loop, this
2086 value is not modified in the loop, and we're done with this phi. */
2087 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2088 && gimple_bb (def_stmt) == loop->header))
2089 continue;
2090
2091 gphi *stmt = as_a <gphi *> (def_stmt);
2092 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2093 locus = gimple_phi_arg_location_from_edge (stmt,
2094 loop_preheader_edge (loop));
2095 add_phi_arg (phi, def, guard, locus);
2096
2097 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2098 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2099 add_phi_arg (phi, def, end, locus);
2100 }
2101 e = redirect_edge_and_branch (exit, nexit->dest);
2102 PENDING_STMT (e) = NULL;
2103
2104 /* Emit GIMPLE_OMP_FOR. */
2105 gimple_cond_set_lhs (cond_stmt, cvar_base);
2106 type = TREE_TYPE (cvar);
2107 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2108 int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
2109 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2110 = (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2111 switch (schedule_type)
2112 {
2113 case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2114 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2115 break;
2116 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2117 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2118 break;
2119 case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2120 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2121 break;
2122 case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2123 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2124 chunk_size = 0;
2125 break;
2126 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2127 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2128 chunk_size = 0;
2129 break;
2130 default:
2131 gcc_unreachable ();
2132 }
2133 if (chunk_size != 0)
2134 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2135 = build_int_cst (integer_type_node, chunk_size);
2136
2137 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
2138 gimple_set_location (for_stmt, loc);
2139 gimple_omp_for_set_index (for_stmt, 0, initvar);
2140 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2141 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2142 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2143 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2144 cvar_base,
2145 build_int_cst (type, 1)));
2146
2147 gsi = gsi_last_bb (for_bb);
2148 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2149 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2150
2151 /* Emit GIMPLE_OMP_CONTINUE. */
2152 continue_bb = single_pred (loop->latch);
2153 gsi = gsi_last_bb (continue_bb);
2154 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2155 gimple_set_location (omp_cont_stmt, loc);
2156 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2157 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2158
2159 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2160 gsi = gsi_last_bb (ex_bb);
2161 omp_return_stmt2 = gimple_build_omp_return (true);
2162 gimple_set_location (omp_return_stmt2, loc);
2163 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2164
2165 /* After the above dom info is hosed. Re-compute it. */
2166 free_dominance_info (CDI_DOMINATORS);
2167 calculate_dominance_info (CDI_DOMINATORS);
2168 }
2169
2170 /* Generates code to execute the iterations of LOOP in N_THREADS
2171 threads in parallel.
2172
2173 NITER describes number of iterations of LOOP.
2174 REDUCTION_LIST describes the reductions existent in the LOOP. */
2175
2176 static void
2177 gen_parallel_loop (struct loop *loop,
2178 reduction_info_table_type *reduction_list,
2179 unsigned n_threads, struct tree_niter_desc *niter)
2180 {
2181 tree many_iterations_cond, type, nit;
2182 tree arg_struct, new_arg_struct;
2183 gimple_seq stmts;
2184 edge entry, exit;
2185 struct clsn_data clsn_data;
2186 unsigned prob;
2187 location_t loc;
2188 gimple *cond_stmt;
2189 unsigned int m_p_thread=2;
2190
2191 /* From
2192
2193 ---------------------------------------------------------------------
2194 loop
2195 {
2196 IV = phi (INIT, IV + STEP)
2197 BODY1;
2198 if (COND)
2199 break;
2200 BODY2;
2201 }
2202 ---------------------------------------------------------------------
2203
2204 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2205 we generate the following code:
2206
2207 ---------------------------------------------------------------------
2208
2209 if (MAY_BE_ZERO
2210 || NITER < MIN_PER_THREAD * N_THREADS)
2211 goto original;
2212
2213 BODY1;
2214 store all local loop-invariant variables used in body of the loop to DATA.
2215 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2216 load the variables from DATA.
2217 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2218 BODY2;
2219 BODY1;
2220 GIMPLE_OMP_CONTINUE;
2221 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2222 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2223 goto end;
2224
2225 original:
2226 loop
2227 {
2228 IV = phi (INIT, IV + STEP)
2229 BODY1;
2230 if (COND)
2231 break;
2232 BODY2;
2233 }
2234
2235 end:
2236
2237 */
2238
2239 /* Create two versions of the loop -- in the old one, we know that the
2240 number of iterations is large enough, and we will transform it into the
2241 loop that will be split to loop_fn, the new one will be used for the
2242 remaining iterations. */
2243
2244 /* We should compute a better number-of-iterations value for outer loops.
2245 That is, if we have
2246
2247 for (i = 0; i < n; ++i)
2248 for (j = 0; j < m; ++j)
2249 ...
2250
2251 we should compute nit = n * m, not nit = n.
2252 Also may_be_zero handling would need to be adjusted. */
2253
2254 type = TREE_TYPE (niter->niter);
2255 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2256 NULL_TREE);
2257 if (stmts)
2258 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2259
2260 if (loop->inner)
2261 m_p_thread=2;
2262 else
2263 m_p_thread=MIN_PER_THREAD;
2264
2265 many_iterations_cond =
2266 fold_build2 (GE_EXPR, boolean_type_node,
2267 nit, build_int_cst (type, m_p_thread * n_threads));
2268
2269 many_iterations_cond
2270 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2271 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2272 many_iterations_cond);
2273 many_iterations_cond
2274 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2275 if (stmts)
2276 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2277 if (!is_gimple_condexpr (many_iterations_cond))
2278 {
2279 many_iterations_cond
2280 = force_gimple_operand (many_iterations_cond, &stmts,
2281 true, NULL_TREE);
2282 if (stmts)
2283 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2284 }
2285
2286 initialize_original_copy_tables ();
2287
2288 /* We assume that the loop usually iterates a lot. */
2289 prob = 4 * REG_BR_PROB_BASE / 5;
2290 loop_version (loop, many_iterations_cond, NULL,
2291 prob, prob, REG_BR_PROB_BASE - prob, true);
2292 update_ssa (TODO_update_ssa);
2293 free_original_copy_tables ();
2294
2295 /* Base all the induction variables in LOOP on a single control one. */
2296 canonicalize_loop_ivs (loop, &nit, true);
2297
2298 /* Ensure that the exit condition is the first statement in the loop.
2299 The common case is that latch of the loop is empty (apart from the
2300 increment) and immediately follows the loop exit test. Attempt to move the
2301 entry of the loop directly before the exit check and increase the number of
2302 iterations of the loop by one. */
2303 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2304 {
2305 if (dump_file
2306 && (dump_flags & TDF_DETAILS))
2307 fprintf (dump_file,
2308 "alternative exit-first loop transform succeeded"
2309 " for loop %d\n", loop->num);
2310 }
2311 else
2312 {
2313 /* Fall back on the method that handles more cases, but duplicates the
2314 loop body: move the exit condition of LOOP to the beginning of its
2315 header, and duplicate the part of the last iteration that gets disabled
2316 to the exit of the loop. */
2317 transform_to_exit_first_loop (loop, reduction_list, nit);
2318 }
2319
2320 /* Generate initializations for reductions. */
2321 if (reduction_list->elements () > 0)
2322 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2323
2324 /* Eliminate the references to local variables from the loop. */
2325 gcc_assert (single_exit (loop));
2326 entry = loop_preheader_edge (loop);
2327 exit = single_dom_exit (loop);
2328
2329 eliminate_local_variables (entry, exit);
2330 /* In the old loop, move all variables non-local to the loop to a structure
2331 and back, and create separate decls for the variables used in loop. */
2332 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2333 &new_arg_struct, &clsn_data);
2334
2335 /* Create the parallel constructs. */
2336 loc = UNKNOWN_LOCATION;
2337 cond_stmt = last_stmt (loop->header);
2338 if (cond_stmt)
2339 loc = gimple_location (cond_stmt);
2340 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2341 new_arg_struct, n_threads, loc);
2342 if (reduction_list->elements () > 0)
2343 create_call_for_reduction (loop, reduction_list, &clsn_data);
2344
2345 scev_reset ();
2346
2347 /* Free loop bound estimations that could contain references to
2348 removed statements. */
2349 FOR_EACH_LOOP (loop, 0)
2350 free_numbers_of_iterations_estimates_loop (loop);
2351 }
2352
2353 /* Returns true when LOOP contains vector phi nodes. */
2354
2355 static bool
2356 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2357 {
2358 unsigned i;
2359 basic_block *bbs = get_loop_body_in_dom_order (loop);
2360 gphi_iterator gsi;
2361 bool res = true;
2362
2363 for (i = 0; i < loop->num_nodes; i++)
2364 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2365 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2366 goto end;
2367
2368 res = false;
2369 end:
2370 free (bbs);
2371 return res;
2372 }
2373
2374 /* Create a reduction_info struct, initialize it with REDUC_STMT
2375 and PHI, insert it to the REDUCTION_LIST. */
2376
2377 static void
2378 build_new_reduction (reduction_info_table_type *reduction_list,
2379 gimple *reduc_stmt, gphi *phi)
2380 {
2381 reduction_info **slot;
2382 struct reduction_info *new_reduction;
2383 enum tree_code reduction_code;
2384
2385 gcc_assert (reduc_stmt);
2386
2387 if (dump_file && (dump_flags & TDF_DETAILS))
2388 {
2389 fprintf (dump_file,
2390 "Detected reduction. reduction stmt is:\n");
2391 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2392 fprintf (dump_file, "\n");
2393 }
2394
2395 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
2396 {
2397 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
2398 gimple *def1 = SSA_NAME_DEF_STMT (op1);
2399 reduction_code = gimple_assign_rhs_code (def1);
2400 }
2401
2402 else
2403 reduction_code = gimple_assign_rhs_code (reduc_stmt);
2404
2405 new_reduction = XCNEW (struct reduction_info);
2406
2407 new_reduction->reduc_stmt = reduc_stmt;
2408 new_reduction->reduc_phi = phi;
2409 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2410 new_reduction->reduction_code = reduction_code;
2411 slot = reduction_list->find_slot (new_reduction, INSERT);
2412 *slot = new_reduction;
2413 }
2414
2415 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2416
2417 int
2418 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2419 {
2420 struct reduction_info *const red = *slot;
2421 gimple_set_uid (red->reduc_phi, red->reduc_version);
2422 return 1;
2423 }
2424
2425 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2426
2427 static void
2428 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2429 {
2430 gphi_iterator gsi;
2431 loop_vec_info simple_loop_info;
2432 loop_vec_info simple_inner_loop_info = NULL;
2433 bool allow_double_reduc = true;
2434
2435 if (!stmt_vec_info_vec.exists ())
2436 init_stmt_vec_info_vec ();
2437
2438 simple_loop_info = vect_analyze_loop_form (loop);
2439 if (simple_loop_info == NULL)
2440 goto gather_done;
2441
2442 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2443 {
2444 gphi *phi = gsi.phi ();
2445 affine_iv iv;
2446 tree res = PHI_RESULT (phi);
2447 bool double_reduc;
2448
2449 if (virtual_operand_p (res))
2450 continue;
2451
2452 if (simple_iv (loop, loop, res, &iv, true))
2453 continue;
2454
2455 gimple *reduc_stmt
2456 = vect_force_simple_reduction (simple_loop_info, phi, true,
2457 &double_reduc, true);
2458 if (!reduc_stmt)
2459 continue;
2460
2461 if (double_reduc)
2462 {
2463 if (!allow_double_reduc
2464 || loop->inner->inner != NULL)
2465 continue;
2466
2467 if (!simple_inner_loop_info)
2468 {
2469 simple_inner_loop_info = vect_analyze_loop_form (loop->inner);
2470 if (!simple_inner_loop_info)
2471 {
2472 allow_double_reduc = false;
2473 continue;
2474 }
2475 }
2476
2477 use_operand_p use_p;
2478 gimple *inner_stmt;
2479 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
2480 gcc_assert (single_use_p);
2481 if (gimple_code (inner_stmt) != GIMPLE_PHI)
2482 continue;
2483 gphi *inner_phi = as_a <gphi *> (inner_stmt);
2484 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2485 &iv, true))
2486 continue;
2487
2488 gimple *inner_reduc_stmt
2489 = vect_force_simple_reduction (simple_inner_loop_info, inner_phi,
2490 true, &double_reduc, true);
2491 gcc_assert (!double_reduc);
2492 if (inner_reduc_stmt == NULL)
2493 continue;
2494 }
2495
2496 build_new_reduction (reduction_list, reduc_stmt, phi);
2497 }
2498 destroy_loop_vec_info (simple_loop_info, true);
2499 destroy_loop_vec_info (simple_inner_loop_info, true);
2500
2501 gather_done:
2502 /* Release the claim on gimple_uid. */
2503 free_stmt_vec_info_vec ();
2504
2505 if (reduction_list->elements () == 0)
2506 return;
2507
2508 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2509 and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only
2510 now. */
2511 basic_block bb;
2512 FOR_EACH_BB_FN (bb, cfun)
2513 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2514 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
2515 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2516 }
2517
2518 /* Try to initialize NITER for code generation part. */
2519
2520 static bool
2521 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2522 {
2523 edge exit = single_dom_exit (loop);
2524
2525 gcc_assert (exit);
2526
2527 /* We need to know # of iterations, and there should be no uses of values
2528 defined inside loop outside of it, unless the values are invariants of
2529 the loop. */
2530 if (!number_of_iterations_exit (loop, exit, niter, false))
2531 {
2532 if (dump_file && (dump_flags & TDF_DETAILS))
2533 fprintf (dump_file, " FAILED: number of iterations not known\n");
2534 return false;
2535 }
2536
2537 return true;
2538 }
2539
2540 /* Try to initialize REDUCTION_LIST for code generation part.
2541 REDUCTION_LIST describes the reductions. */
2542
2543 static bool
2544 try_create_reduction_list (loop_p loop,
2545 reduction_info_table_type *reduction_list)
2546 {
2547 edge exit = single_dom_exit (loop);
2548 gphi_iterator gsi;
2549
2550 gcc_assert (exit);
2551
2552 /* Try to get rid of exit phis. */
2553 final_value_replacement_loop (loop);
2554
2555 gather_scalar_reductions (loop, reduction_list);
2556
2557
2558 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2559 {
2560 gphi *phi = gsi.phi ();
2561 struct reduction_info *red;
2562 imm_use_iterator imm_iter;
2563 use_operand_p use_p;
2564 gimple *reduc_phi;
2565 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2566
2567 if (!virtual_operand_p (val))
2568 {
2569 if (dump_file && (dump_flags & TDF_DETAILS))
2570 {
2571 fprintf (dump_file, "phi is ");
2572 print_gimple_stmt (dump_file, phi, 0, 0);
2573 fprintf (dump_file, "arg of phi to exit: value ");
2574 print_generic_expr (dump_file, val, 0);
2575 fprintf (dump_file, " used outside loop\n");
2576 fprintf (dump_file,
2577 " checking if it is part of reduction pattern:\n");
2578 }
2579 if (reduction_list->elements () == 0)
2580 {
2581 if (dump_file && (dump_flags & TDF_DETAILS))
2582 fprintf (dump_file,
2583 " FAILED: it is not a part of reduction.\n");
2584 return false;
2585 }
2586 reduc_phi = NULL;
2587 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2588 {
2589 if (!gimple_debug_bind_p (USE_STMT (use_p))
2590 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2591 {
2592 reduc_phi = USE_STMT (use_p);
2593 break;
2594 }
2595 }
2596 red = reduction_phi (reduction_list, reduc_phi);
2597 if (red == NULL)
2598 {
2599 if (dump_file && (dump_flags & TDF_DETAILS))
2600 fprintf (dump_file,
2601 " FAILED: it is not a part of reduction.\n");
2602 return false;
2603 }
2604 if (red->keep_res != NULL)
2605 {
2606 if (dump_file && (dump_flags & TDF_DETAILS))
2607 fprintf (dump_file,
2608 " FAILED: reduction has multiple exit phis.\n");
2609 return false;
2610 }
2611 red->keep_res = phi;
2612 if (dump_file && (dump_flags & TDF_DETAILS))
2613 {
2614 fprintf (dump_file, "reduction phi is ");
2615 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2616 fprintf (dump_file, "reduction stmt is ");
2617 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2618 }
2619 }
2620 }
2621
2622 /* The iterations of the loop may communicate only through bivs whose
2623 iteration space can be distributed efficiently. */
2624 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2625 {
2626 gphi *phi = gsi.phi ();
2627 tree def = PHI_RESULT (phi);
2628 affine_iv iv;
2629
2630 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2631 {
2632 struct reduction_info *red;
2633
2634 red = reduction_phi (reduction_list, phi);
2635 if (red == NULL)
2636 {
2637 if (dump_file && (dump_flags & TDF_DETAILS))
2638 fprintf (dump_file,
2639 " FAILED: scalar dependency between iterations\n");
2640 return false;
2641 }
2642 }
2643 }
2644
2645
2646 return true;
2647 }
2648
2649 /* Return true if LOOP contains phis with ADDR_EXPR in args. */
2650
2651 static bool
2652 loop_has_phi_with_address_arg (struct loop *loop)
2653 {
2654 basic_block *bbs = get_loop_body (loop);
2655 bool res = false;
2656
2657 unsigned i, j;
2658 gphi_iterator gsi;
2659 for (i = 0; i < loop->num_nodes; i++)
2660 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2661 {
2662 gphi *phi = gsi.phi ();
2663 for (j = 0; j < gimple_phi_num_args (phi); j++)
2664 {
2665 tree arg = gimple_phi_arg_def (phi, j);
2666 if (TREE_CODE (arg) == ADDR_EXPR)
2667 {
2668 /* This should be handled by eliminate_local_variables, but that
2669 function currently ignores phis. */
2670 res = true;
2671 goto end;
2672 }
2673 }
2674 }
2675 end:
2676 free (bbs);
2677 return res;
2678 }
2679
2680 /* Detect parallel loops and generate parallel code using libgomp
2681 primitives. Returns true if some loop was parallelized, false
2682 otherwise. */
2683
2684 static bool
2685 parallelize_loops (void)
2686 {
2687 unsigned n_threads = flag_tree_parallelize_loops;
2688 bool changed = false;
2689 struct loop *loop;
2690 struct loop *skip_loop = NULL;
2691 struct tree_niter_desc niter_desc;
2692 struct obstack parloop_obstack;
2693 HOST_WIDE_INT estimated;
2694 source_location loop_loc;
2695
2696 /* Do not parallelize loops in the functions created by parallelization. */
2697 if (parallelized_function_p (cfun->decl))
2698 return false;
2699 if (cfun->has_nonlocal_label)
2700 return false;
2701
2702 gcc_obstack_init (&parloop_obstack);
2703 reduction_info_table_type reduction_list (10);
2704
2705 FOR_EACH_LOOP (loop, 0)
2706 {
2707 if (loop == skip_loop)
2708 {
2709 if (dump_file && (dump_flags & TDF_DETAILS))
2710 fprintf (dump_file,
2711 "Skipping loop %d as inner loop of parallelized loop\n",
2712 loop->num);
2713
2714 skip_loop = loop->inner;
2715 continue;
2716 }
2717 else
2718 skip_loop = NULL;
2719
2720 reduction_list.empty ();
2721 if (dump_file && (dump_flags & TDF_DETAILS))
2722 {
2723 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2724 if (loop->inner)
2725 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2726 else
2727 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2728 }
2729
2730 /* If we use autopar in graphite pass, we use its marked dependency
2731 checking results. */
2732 if (flag_loop_parallelize_all && !loop->can_be_parallel)
2733 {
2734 if (dump_file && (dump_flags & TDF_DETAILS))
2735 fprintf (dump_file, "loop is not parallel according to graphite\n");
2736 continue;
2737 }
2738
2739 if (!single_dom_exit (loop))
2740 {
2741
2742 if (dump_file && (dump_flags & TDF_DETAILS))
2743 fprintf (dump_file, "loop is !single_dom_exit\n");
2744
2745 continue;
2746 }
2747
2748 if (/* And of course, the loop must be parallelizable. */
2749 !can_duplicate_loop_p (loop)
2750 || loop_has_blocks_with_irreducible_flag (loop)
2751 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
2752 /* FIXME: the check for vector phi nodes could be removed. */
2753 || loop_has_vector_phi_nodes (loop))
2754 continue;
2755
2756 estimated = estimated_stmt_executions_int (loop);
2757 if (estimated == -1)
2758 estimated = max_stmt_executions_int (loop);
2759 /* FIXME: Bypass this check as graphite doesn't update the
2760 count and frequency correctly now. */
2761 if (!flag_loop_parallelize_all
2762 && ((estimated != -1
2763 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
2764 /* Do not bother with loops in cold areas. */
2765 || optimize_loop_nest_for_size_p (loop)))
2766 continue;
2767
2768 if (!try_get_loop_niter (loop, &niter_desc))
2769 continue;
2770
2771 if (!try_create_reduction_list (loop, &reduction_list))
2772 continue;
2773
2774 if (loop_has_phi_with_address_arg (loop))
2775 continue;
2776
2777 if (!flag_loop_parallelize_all
2778 && !loop_parallel_p (loop, &parloop_obstack))
2779 continue;
2780
2781 changed = true;
2782 skip_loop = loop->inner;
2783 if (dump_file && (dump_flags & TDF_DETAILS))
2784 {
2785 if (loop->inner)
2786 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
2787 else
2788 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2789 loop_loc = find_loop_location (loop);
2790 if (loop_loc != UNKNOWN_LOCATION)
2791 fprintf (dump_file, "\nloop at %s:%d: ",
2792 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
2793 }
2794 gen_parallel_loop (loop, &reduction_list,
2795 n_threads, &niter_desc);
2796 }
2797
2798 obstack_free (&parloop_obstack, NULL);
2799
2800 /* Parallelization will cause new function calls to be inserted through
2801 which local variables will escape. Reset the points-to solution
2802 for ESCAPED. */
2803 if (changed)
2804 pt_solution_reset (&cfun->gimple_df->escaped);
2805
2806 return changed;
2807 }
2808
2809 /* Parallelization. */
2810
2811 namespace {
2812
2813 const pass_data pass_data_parallelize_loops =
2814 {
2815 GIMPLE_PASS, /* type */
2816 "parloops", /* name */
2817 OPTGROUP_LOOP, /* optinfo_flags */
2818 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2819 ( PROP_cfg | PROP_ssa ), /* properties_required */
2820 0, /* properties_provided */
2821 0, /* properties_destroyed */
2822 0, /* todo_flags_start */
2823 0, /* todo_flags_finish */
2824 };
2825
2826 class pass_parallelize_loops : public gimple_opt_pass
2827 {
2828 public:
2829 pass_parallelize_loops (gcc::context *ctxt)
2830 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2831 {}
2832
2833 /* opt_pass methods: */
2834 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
2835 virtual unsigned int execute (function *);
2836
2837 }; // class pass_parallelize_loops
2838
2839 unsigned
2840 pass_parallelize_loops::execute (function *fun)
2841 {
2842 if (number_of_loops (fun) <= 1)
2843 return 0;
2844
2845 tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
2846 if (nthreads == NULL_TREE)
2847 return 0;
2848
2849 if (parallelize_loops ())
2850 {
2851 fun->curr_properties &= ~(PROP_gimple_eomp);
2852
2853 checking_verify_loop_structure ();
2854
2855 return TODO_update_ssa;
2856 }
2857
2858 return 0;
2859 }
2860
2861 } // anon namespace
2862
2863 gimple_opt_pass *
2864 make_pass_parallelize_loops (gcc::context *ctxt)
2865 {
2866 return new pass_parallelize_loops (ctxt);
2867 }