]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-parloops.c
Use "oacc kernels" attribute for OpenACC kernels
[thirdparty/gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "cfghooks.h"
29 #include "tree-pass.h"
30 #include "ssa.h"
31 #include "cgraph.h"
32 #include "gimple-pretty-print.h"
33 #include "fold-const.h"
34 #include "gimplify.h"
35 #include "gimple-iterator.h"
36 #include "gimplify-me.h"
37 #include "gimple-walk.h"
38 #include "stor-layout.h"
39 #include "tree-nested.h"
40 #include "tree-cfg.h"
41 #include "tree-ssa-loop-ivopts.h"
42 #include "tree-ssa-loop-manip.h"
43 #include "tree-ssa-loop-niter.h"
44 #include "tree-ssa-loop.h"
45 #include "tree-into-ssa.h"
46 #include "cfgloop.h"
47 #include "tree-scalar-evolution.h"
48 #include "langhooks.h"
49 #include "tree-vectorizer.h"
50 #include "tree-hasher.h"
51 #include "tree-parloops.h"
52 #include "omp-general.h"
53 #include "omp-low.h"
54 #include "tree-ssa.h"
55 #include "params.h"
56 #include "params-enum.h"
57 #include "tree-ssa-alias.h"
58 #include "tree-eh.h"
59 #include "gomp-constants.h"
60 #include "tree-dfa.h"
61
62 /* This pass tries to distribute iterations of loops into several threads.
63 The implementation is straightforward -- for each loop we test whether its
64 iterations are independent, and if it is the case (and some additional
65 conditions regarding profitability and correctness are satisfied), we
66 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67 machinery do its job.
68
69 The most of the complexity is in bringing the code into shape expected
70 by the omp expanders:
71 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72 variable and that the exit test is at the start of the loop body
73 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
74 variables by accesses through pointers, and breaking up ssa chains
75 by storing the values incoming to the parallelized loop to a structure
76 passed to the new function as an argument (something similar is done
77 in omp gimplification, unfortunately only a small part of the code
78 can be shared).
79
80 TODO:
81 -- if there are several parallelizable loops in a function, it may be
82 possible to generate the threads just once (using synchronization to
83 ensure that cross-loop dependences are obeyed).
84 -- handling of common reduction patterns for outer loops.
85
86 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
87 /*
88 Reduction handling:
89 currently we use vect_force_simple_reduction() to detect reduction patterns.
90 The code transformation will be introduced by an example.
91
92
93 parloop
94 {
95 int sum=1;
96
97 for (i = 0; i < N; i++)
98 {
99 x[i] = i + 3;
100 sum+=x[i];
101 }
102 }
103
104 gimple-like code:
105 header_bb:
106
107 # sum_29 = PHI <sum_11(5), 1(3)>
108 # i_28 = PHI <i_12(5), 0(3)>
109 D.1795_8 = i_28 + 3;
110 x[i_28] = D.1795_8;
111 sum_11 = D.1795_8 + sum_29;
112 i_12 = i_28 + 1;
113 if (N_6(D) > i_12)
114 goto header_bb;
115
116
117 exit_bb:
118
119 # sum_21 = PHI <sum_11(4)>
120 printf (&"%d"[0], sum_21);
121
122
123 after reduction transformation (only relevant parts):
124
125 parloop
126 {
127
128 ....
129
130
131 # Storing the initial value given by the user. #
132
133 .paral_data_store.32.sum.27 = 1;
134
135 #pragma omp parallel num_threads(4)
136
137 #pragma omp for schedule(static)
138
139 # The neutral element corresponding to the particular
140 reduction's operation, e.g. 0 for PLUS_EXPR,
141 1 for MULT_EXPR, etc. replaces the user's initial value. #
142
143 # sum.27_29 = PHI <sum.27_11, 0>
144
145 sum.27_11 = D.1827_8 + sum.27_29;
146
147 GIMPLE_OMP_CONTINUE
148
149 # Adding this reduction phi is done at create_phi_for_local_result() #
150 # sum.27_56 = PHI <sum.27_11, 0>
151 GIMPLE_OMP_RETURN
152
153 # Creating the atomic operation is done at
154 create_call_for_reduction_1() #
155
156 #pragma omp atomic_load
157 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
158 D.1840_60 = sum.27_56 + D.1839_59;
159 #pragma omp atomic_store (D.1840_60);
160
161 GIMPLE_OMP_RETURN
162
163 # collecting the result after the join of the threads is done at
164 create_loads_for_reductions().
165 The value computed by the threads is loaded from the
166 shared struct. #
167
168
169 .paral_data_load.33_52 = &.paral_data_store.32;
170 sum_37 = .paral_data_load.33_52->sum.27;
171 sum_43 = D.1795_41 + sum_37;
172
173 exit bb:
174 # sum_21 = PHI <sum_43, sum_26>
175 printf (&"%d"[0], sum_21);
176
177 ...
178
179 }
180
181 */
182
183 /* Minimal number of iterations of a loop that should be executed in each
184 thread. */
185 #define MIN_PER_THREAD 100
186
187 /* Element of the hashtable, representing a
188 reduction in the current loop. */
189 struct reduction_info
190 {
191 gimple *reduc_stmt; /* reduction statement. */
192 gimple *reduc_phi; /* The phi node defining the reduction. */
193 enum tree_code reduction_code;/* code for the reduction operation. */
194 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
195 result. */
196 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
197 of the reduction variable when existing the loop. */
198 tree initial_value; /* The initial value of the reduction var before entering the loop. */
199 tree field; /* the name of the field in the parloop data structure intended for reduction. */
200 tree reduc_addr; /* The address of the reduction variable for
201 openacc reductions. */
202 tree init; /* reduction initialization value. */
203 gphi *new_phi; /* (helper field) Newly created phi node whose result
204 will be passed to the atomic operation. Represents
205 the local result each thread computed for the reduction
206 operation. */
207 };
208
209 /* Reduction info hashtable helpers. */
210
211 struct reduction_hasher : free_ptr_hash <reduction_info>
212 {
213 static inline hashval_t hash (const reduction_info *);
214 static inline bool equal (const reduction_info *, const reduction_info *);
215 };
216
217 /* Equality and hash functions for hashtab code. */
218
219 inline bool
220 reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
221 {
222 return (a->reduc_phi == b->reduc_phi);
223 }
224
225 inline hashval_t
226 reduction_hasher::hash (const reduction_info *a)
227 {
228 return a->reduc_version;
229 }
230
231 typedef hash_table<reduction_hasher> reduction_info_table_type;
232
233
234 static struct reduction_info *
235 reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
236 {
237 struct reduction_info tmpred, *red;
238
239 if (reduction_list->elements () == 0 || phi == NULL)
240 return NULL;
241
242 if (gimple_uid (phi) == (unsigned int)-1
243 || gimple_uid (phi) == 0)
244 return NULL;
245
246 tmpred.reduc_phi = phi;
247 tmpred.reduc_version = gimple_uid (phi);
248 red = reduction_list->find (&tmpred);
249 gcc_assert (red == NULL || red->reduc_phi == phi);
250
251 return red;
252 }
253
254 /* Element of hashtable of names to copy. */
255
256 struct name_to_copy_elt
257 {
258 unsigned version; /* The version of the name to copy. */
259 tree new_name; /* The new name used in the copy. */
260 tree field; /* The field of the structure used to pass the
261 value. */
262 };
263
264 /* Name copies hashtable helpers. */
265
266 struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
267 {
268 static inline hashval_t hash (const name_to_copy_elt *);
269 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
270 };
271
272 /* Equality and hash functions for hashtab code. */
273
274 inline bool
275 name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
276 {
277 return a->version == b->version;
278 }
279
280 inline hashval_t
281 name_to_copy_hasher::hash (const name_to_copy_elt *a)
282 {
283 return (hashval_t) a->version;
284 }
285
286 typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
287
288 /* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
289 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
290 represents the denominator for every element in the matrix. */
291 typedef struct lambda_trans_matrix_s
292 {
293 lambda_matrix matrix;
294 int rowsize;
295 int colsize;
296 int denominator;
297 } *lambda_trans_matrix;
298 #define LTM_MATRIX(T) ((T)->matrix)
299 #define LTM_ROWSIZE(T) ((T)->rowsize)
300 #define LTM_COLSIZE(T) ((T)->colsize)
301 #define LTM_DENOMINATOR(T) ((T)->denominator)
302
303 /* Allocate a new transformation matrix. */
304
305 static lambda_trans_matrix
306 lambda_trans_matrix_new (int colsize, int rowsize,
307 struct obstack * lambda_obstack)
308 {
309 lambda_trans_matrix ret;
310
311 ret = (lambda_trans_matrix)
312 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
313 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
314 LTM_ROWSIZE (ret) = rowsize;
315 LTM_COLSIZE (ret) = colsize;
316 LTM_DENOMINATOR (ret) = 1;
317 return ret;
318 }
319
320 /* Multiply a vector VEC by a matrix MAT.
321 MAT is an M*N matrix, and VEC is a vector with length N. The result
322 is stored in DEST which must be a vector of length M. */
323
324 static void
325 lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
326 lambda_vector vec, lambda_vector dest)
327 {
328 int i, j;
329
330 lambda_vector_clear (dest, m);
331 for (i = 0; i < m; i++)
332 for (j = 0; j < n; j++)
333 dest[i] += matrix[i][j] * vec[j];
334 }
335
336 /* Return true if TRANS is a legal transformation matrix that respects
337 the dependence vectors in DISTS and DIRS. The conservative answer
338 is false.
339
340 "Wolfe proves that a unimodular transformation represented by the
341 matrix T is legal when applied to a loop nest with a set of
342 lexicographically non-negative distance vectors RDG if and only if
343 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
344 i.e.: if and only if it transforms the lexicographically positive
345 distance vectors to lexicographically positive vectors. Note that
346 a unimodular matrix must transform the zero vector (and only it) to
347 the zero vector." S.Muchnick. */
348
349 static bool
350 lambda_transform_legal_p (lambda_trans_matrix trans,
351 int nb_loops,
352 vec<ddr_p> dependence_relations)
353 {
354 unsigned int i, j;
355 lambda_vector distres;
356 struct data_dependence_relation *ddr;
357
358 gcc_assert (LTM_COLSIZE (trans) == nb_loops
359 && LTM_ROWSIZE (trans) == nb_loops);
360
361 /* When there are no dependences, the transformation is correct. */
362 if (dependence_relations.length () == 0)
363 return true;
364
365 ddr = dependence_relations[0];
366 if (ddr == NULL)
367 return true;
368
369 /* When there is an unknown relation in the dependence_relations, we
370 know that it is no worth looking at this loop nest: give up. */
371 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
372 return false;
373
374 distres = lambda_vector_new (nb_loops);
375
376 /* For each distance vector in the dependence graph. */
377 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
378 {
379 /* Don't care about relations for which we know that there is no
380 dependence, nor about read-read (aka. output-dependences):
381 these data accesses can happen in any order. */
382 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
383 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
384 continue;
385
386 /* Conservatively answer: "this transformation is not valid". */
387 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
388 return false;
389
390 /* If the dependence could not be captured by a distance vector,
391 conservatively answer that the transform is not valid. */
392 if (DDR_NUM_DIST_VECTS (ddr) == 0)
393 return false;
394
395 /* Compute trans.dist_vect */
396 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
397 {
398 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
399 DDR_DIST_VECT (ddr, j), distres);
400
401 if (!lambda_vector_lexico_pos (distres, nb_loops))
402 return false;
403 }
404 }
405 return true;
406 }
407
408 /* Data dependency analysis. Returns true if the iterations of LOOP
409 are independent on each other (that is, if we can execute them
410 in parallel). */
411
412 static bool
413 loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
414 {
415 vec<ddr_p> dependence_relations;
416 vec<data_reference_p> datarefs;
417 lambda_trans_matrix trans;
418 bool ret = false;
419
420 if (dump_file && (dump_flags & TDF_DETAILS))
421 {
422 fprintf (dump_file, "Considering loop %d\n", loop->num);
423 if (!loop->inner)
424 fprintf (dump_file, "loop is innermost\n");
425 else
426 fprintf (dump_file, "loop NOT innermost\n");
427 }
428
429 /* Check for problems with dependences. If the loop can be reversed,
430 the iterations are independent. */
431 auto_vec<loop_p, 3> loop_nest;
432 datarefs.create (10);
433 dependence_relations.create (100);
434 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
435 &dependence_relations))
436 {
437 if (dump_file && (dump_flags & TDF_DETAILS))
438 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
439 ret = false;
440 goto end;
441 }
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 dump_data_dependence_relations (dump_file, dependence_relations);
444
445 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
446 LTM_MATRIX (trans)[0][0] = -1;
447
448 if (lambda_transform_legal_p (trans, 1, dependence_relations))
449 {
450 ret = true;
451 if (dump_file && (dump_flags & TDF_DETAILS))
452 fprintf (dump_file, " SUCCESS: may be parallelized\n");
453 }
454 else if (dump_file && (dump_flags & TDF_DETAILS))
455 fprintf (dump_file,
456 " FAILED: data dependencies exist across iterations\n");
457
458 end:
459 free_dependence_relations (dependence_relations);
460 free_data_refs (datarefs);
461
462 return ret;
463 }
464
465 /* Return true when LOOP contains basic blocks marked with the
466 BB_IRREDUCIBLE_LOOP flag. */
467
468 static inline bool
469 loop_has_blocks_with_irreducible_flag (struct loop *loop)
470 {
471 unsigned i;
472 basic_block *bbs = get_loop_body_in_dom_order (loop);
473 bool res = true;
474
475 for (i = 0; i < loop->num_nodes; i++)
476 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
477 goto end;
478
479 res = false;
480 end:
481 free (bbs);
482 return res;
483 }
484
485 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
486 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
487 to their addresses that can be reused. The address of OBJ is known to
488 be invariant in the whole function. Other needed statements are placed
489 right before GSI. */
490
491 static tree
492 take_address_of (tree obj, tree type, edge entry,
493 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
494 {
495 int uid;
496 tree *var_p, name, addr;
497 gassign *stmt;
498 gimple_seq stmts;
499
500 /* Since the address of OBJ is invariant, the trees may be shared.
501 Avoid rewriting unrelated parts of the code. */
502 obj = unshare_expr (obj);
503 for (var_p = &obj;
504 handled_component_p (*var_p);
505 var_p = &TREE_OPERAND (*var_p, 0))
506 continue;
507
508 /* Canonicalize the access to base on a MEM_REF. */
509 if (DECL_P (*var_p))
510 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
511
512 /* Assign a canonical SSA name to the address of the base decl used
513 in the address and share it for all accesses and addresses based
514 on it. */
515 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
516 int_tree_map elt;
517 elt.uid = uid;
518 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
519 if (!slot->to)
520 {
521 if (gsi == NULL)
522 return NULL;
523 addr = TREE_OPERAND (*var_p, 0);
524 const char *obj_name
525 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
526 if (obj_name)
527 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
528 else
529 name = make_ssa_name (TREE_TYPE (addr));
530 stmt = gimple_build_assign (name, addr);
531 gsi_insert_on_edge_immediate (entry, stmt);
532
533 slot->uid = uid;
534 slot->to = name;
535 }
536 else
537 name = slot->to;
538
539 /* Express the address in terms of the canonical SSA name. */
540 TREE_OPERAND (*var_p, 0) = name;
541 if (gsi == NULL)
542 return build_fold_addr_expr_with_type (obj, type);
543
544 name = force_gimple_operand (build_addr (obj),
545 &stmts, true, NULL_TREE);
546 if (!gimple_seq_empty_p (stmts))
547 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
548
549 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
550 {
551 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
552 NULL_TREE);
553 if (!gimple_seq_empty_p (stmts))
554 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
555 }
556
557 return name;
558 }
559
560 static tree
561 reduc_stmt_res (gimple *stmt)
562 {
563 return (gimple_code (stmt) == GIMPLE_PHI
564 ? gimple_phi_result (stmt)
565 : gimple_assign_lhs (stmt));
566 }
567
568 /* Callback for htab_traverse. Create the initialization statement
569 for reduction described in SLOT, and place it at the preheader of
570 the loop described in DATA. */
571
572 int
573 initialize_reductions (reduction_info **slot, struct loop *loop)
574 {
575 tree init;
576 tree type, arg;
577 edge e;
578
579 struct reduction_info *const reduc = *slot;
580
581 /* Create initialization in preheader:
582 reduction_variable = initialization value of reduction. */
583
584 /* In the phi node at the header, replace the argument coming
585 from the preheader with the reduction initialization value. */
586
587 /* Initialize the reduction. */
588 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
589 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
590 reduc->reduction_code, type);
591 reduc->init = init;
592
593 /* Replace the argument representing the initialization value
594 with the initialization value for the reduction (neutral
595 element for the particular operation, e.g. 0 for PLUS_EXPR,
596 1 for MULT_EXPR, etc).
597 Keep the old value in a new variable "reduction_initial",
598 that will be taken in consideration after the parallel
599 computing is done. */
600
601 e = loop_preheader_edge (loop);
602 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
603 /* Create new variable to hold the initial value. */
604
605 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
606 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
607 reduc->initial_value = arg;
608 return 1;
609 }
610
611 struct elv_data
612 {
613 struct walk_stmt_info info;
614 edge entry;
615 int_tree_htab_type *decl_address;
616 gimple_stmt_iterator *gsi;
617 bool changed;
618 bool reset;
619 };
620
621 /* Eliminates references to local variables in *TP out of the single
622 entry single exit region starting at DTA->ENTRY.
623 DECL_ADDRESS contains addresses of the references that had their
624 address taken already. If the expression is changed, CHANGED is
625 set to true. Callback for walk_tree. */
626
627 static tree
628 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
629 {
630 struct elv_data *const dta = (struct elv_data *) data;
631 tree t = *tp, var, addr, addr_type, type, obj;
632
633 if (DECL_P (t))
634 {
635 *walk_subtrees = 0;
636
637 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
638 return NULL_TREE;
639
640 type = TREE_TYPE (t);
641 addr_type = build_pointer_type (type);
642 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
643 dta->gsi);
644 if (dta->gsi == NULL && addr == NULL_TREE)
645 {
646 dta->reset = true;
647 return NULL_TREE;
648 }
649
650 *tp = build_simple_mem_ref (addr);
651
652 dta->changed = true;
653 return NULL_TREE;
654 }
655
656 if (TREE_CODE (t) == ADDR_EXPR)
657 {
658 /* ADDR_EXPR may appear in two contexts:
659 -- as a gimple operand, when the address taken is a function invariant
660 -- as gimple rhs, when the resulting address in not a function
661 invariant
662 We do not need to do anything special in the latter case (the base of
663 the memory reference whose address is taken may be replaced in the
664 DECL_P case). The former case is more complicated, as we need to
665 ensure that the new address is still a gimple operand. Thus, it
666 is not sufficient to replace just the base of the memory reference --
667 we need to move the whole computation of the address out of the
668 loop. */
669 if (!is_gimple_val (t))
670 return NULL_TREE;
671
672 *walk_subtrees = 0;
673 obj = TREE_OPERAND (t, 0);
674 var = get_base_address (obj);
675 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
676 return NULL_TREE;
677
678 addr_type = TREE_TYPE (t);
679 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
680 dta->gsi);
681 if (dta->gsi == NULL && addr == NULL_TREE)
682 {
683 dta->reset = true;
684 return NULL_TREE;
685 }
686 *tp = addr;
687
688 dta->changed = true;
689 return NULL_TREE;
690 }
691
692 if (!EXPR_P (t))
693 *walk_subtrees = 0;
694
695 return NULL_TREE;
696 }
697
698 /* Moves the references to local variables in STMT at *GSI out of the single
699 entry single exit region starting at ENTRY. DECL_ADDRESS contains
700 addresses of the references that had their address taken
701 already. */
702
703 static void
704 eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
705 int_tree_htab_type *decl_address)
706 {
707 struct elv_data dta;
708 gimple *stmt = gsi_stmt (*gsi);
709
710 memset (&dta.info, '\0', sizeof (dta.info));
711 dta.entry = entry;
712 dta.decl_address = decl_address;
713 dta.changed = false;
714 dta.reset = false;
715
716 if (gimple_debug_bind_p (stmt))
717 {
718 dta.gsi = NULL;
719 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
720 eliminate_local_variables_1, &dta.info, NULL);
721 if (dta.reset)
722 {
723 gimple_debug_bind_reset_value (stmt);
724 dta.changed = true;
725 }
726 }
727 else if (gimple_clobber_p (stmt))
728 {
729 unlink_stmt_vdef (stmt);
730 stmt = gimple_build_nop ();
731 gsi_replace (gsi, stmt, false);
732 dta.changed = true;
733 }
734 else
735 {
736 dta.gsi = gsi;
737 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
738 }
739
740 if (dta.changed)
741 update_stmt (stmt);
742 }
743
744 /* Eliminates the references to local variables from the single entry
745 single exit region between the ENTRY and EXIT edges.
746
747 This includes:
748 1) Taking address of a local variable -- these are moved out of the
749 region (and temporary variable is created to hold the address if
750 necessary).
751
752 2) Dereferencing a local variable -- these are replaced with indirect
753 references. */
754
755 static void
756 eliminate_local_variables (edge entry, edge exit)
757 {
758 basic_block bb;
759 auto_vec<basic_block, 3> body;
760 unsigned i;
761 gimple_stmt_iterator gsi;
762 bool has_debug_stmt = false;
763 int_tree_htab_type decl_address (10);
764 basic_block entry_bb = entry->src;
765 basic_block exit_bb = exit->dest;
766
767 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
768
769 FOR_EACH_VEC_ELT (body, i, bb)
770 if (bb != entry_bb && bb != exit_bb)
771 {
772 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
773 if (is_gimple_debug (gsi_stmt (gsi)))
774 {
775 if (gimple_debug_bind_p (gsi_stmt (gsi)))
776 has_debug_stmt = true;
777 }
778 else
779 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
780 }
781
782 if (has_debug_stmt)
783 FOR_EACH_VEC_ELT (body, i, bb)
784 if (bb != entry_bb && bb != exit_bb)
785 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
786 if (gimple_debug_bind_p (gsi_stmt (gsi)))
787 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
788 }
789
790 /* Returns true if expression EXPR is not defined between ENTRY and
791 EXIT, i.e. if all its operands are defined outside of the region. */
792
793 static bool
794 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
795 {
796 basic_block entry_bb = entry->src;
797 basic_block exit_bb = exit->dest;
798 basic_block def_bb;
799
800 if (is_gimple_min_invariant (expr))
801 return true;
802
803 if (TREE_CODE (expr) == SSA_NAME)
804 {
805 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
806 if (def_bb
807 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
808 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
809 return false;
810
811 return true;
812 }
813
814 return false;
815 }
816
817 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
818 The copies are stored to NAME_COPIES, if NAME was already duplicated,
819 its duplicate stored in NAME_COPIES is returned.
820
821 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
822 duplicated, storing the copies in DECL_COPIES. */
823
824 static tree
825 separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
826 int_tree_htab_type *decl_copies,
827 bool copy_name_p)
828 {
829 tree copy, var, var_copy;
830 unsigned idx, uid, nuid;
831 struct int_tree_map ielt;
832 struct name_to_copy_elt elt, *nelt;
833 name_to_copy_elt **slot;
834 int_tree_map *dslot;
835
836 if (TREE_CODE (name) != SSA_NAME)
837 return name;
838
839 idx = SSA_NAME_VERSION (name);
840 elt.version = idx;
841 slot = name_copies->find_slot_with_hash (&elt, idx,
842 copy_name_p ? INSERT : NO_INSERT);
843 if (slot && *slot)
844 return (*slot)->new_name;
845
846 if (copy_name_p)
847 {
848 copy = duplicate_ssa_name (name, NULL);
849 nelt = XNEW (struct name_to_copy_elt);
850 nelt->version = idx;
851 nelt->new_name = copy;
852 nelt->field = NULL_TREE;
853 *slot = nelt;
854 }
855 else
856 {
857 gcc_assert (!slot);
858 copy = name;
859 }
860
861 var = SSA_NAME_VAR (name);
862 if (!var)
863 return copy;
864
865 uid = DECL_UID (var);
866 ielt.uid = uid;
867 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
868 if (!dslot->to)
869 {
870 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
871 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
872 dslot->uid = uid;
873 dslot->to = var_copy;
874
875 /* Ensure that when we meet this decl next time, we won't duplicate
876 it again. */
877 nuid = DECL_UID (var_copy);
878 ielt.uid = nuid;
879 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
880 gcc_assert (!dslot->to);
881 dslot->uid = nuid;
882 dslot->to = var_copy;
883 }
884 else
885 var_copy = dslot->to;
886
887 replace_ssa_name_symbol (copy, var_copy);
888 return copy;
889 }
890
891 /* Finds the ssa names used in STMT that are defined outside the
892 region between ENTRY and EXIT and replaces such ssa names with
893 their duplicates. The duplicates are stored to NAME_COPIES. Base
894 decls of all ssa names used in STMT (including those defined in
895 LOOP) are replaced with the new temporary variables; the
896 replacement decls are stored in DECL_COPIES. */
897
898 static void
899 separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
900 name_to_copy_table_type *name_copies,
901 int_tree_htab_type *decl_copies)
902 {
903 use_operand_p use;
904 def_operand_p def;
905 ssa_op_iter oi;
906 tree name, copy;
907 bool copy_name_p;
908
909 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
910 {
911 name = DEF_FROM_PTR (def);
912 gcc_assert (TREE_CODE (name) == SSA_NAME);
913 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
914 false);
915 gcc_assert (copy == name);
916 }
917
918 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
919 {
920 name = USE_FROM_PTR (use);
921 if (TREE_CODE (name) != SSA_NAME)
922 continue;
923
924 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
925 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
926 copy_name_p);
927 SET_USE (use, copy);
928 }
929 }
930
931 /* Finds the ssa names used in STMT that are defined outside the
932 region between ENTRY and EXIT and replaces such ssa names with
933 their duplicates. The duplicates are stored to NAME_COPIES. Base
934 decls of all ssa names used in STMT (including those defined in
935 LOOP) are replaced with the new temporary variables; the
936 replacement decls are stored in DECL_COPIES. */
937
938 static bool
939 separate_decls_in_region_debug (gimple *stmt,
940 name_to_copy_table_type *name_copies,
941 int_tree_htab_type *decl_copies)
942 {
943 use_operand_p use;
944 ssa_op_iter oi;
945 tree var, name;
946 struct int_tree_map ielt;
947 struct name_to_copy_elt elt;
948 name_to_copy_elt **slot;
949 int_tree_map *dslot;
950
951 if (gimple_debug_bind_p (stmt))
952 var = gimple_debug_bind_get_var (stmt);
953 else if (gimple_debug_source_bind_p (stmt))
954 var = gimple_debug_source_bind_get_var (stmt);
955 else
956 return true;
957 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
958 return true;
959 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
960 ielt.uid = DECL_UID (var);
961 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
962 if (!dslot)
963 return true;
964 if (gimple_debug_bind_p (stmt))
965 gimple_debug_bind_set_var (stmt, dslot->to);
966 else if (gimple_debug_source_bind_p (stmt))
967 gimple_debug_source_bind_set_var (stmt, dslot->to);
968
969 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
970 {
971 name = USE_FROM_PTR (use);
972 if (TREE_CODE (name) != SSA_NAME)
973 continue;
974
975 elt.version = SSA_NAME_VERSION (name);
976 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
977 if (!slot)
978 {
979 gimple_debug_bind_reset_value (stmt);
980 update_stmt (stmt);
981 break;
982 }
983
984 SET_USE (use, (*slot)->new_name);
985 }
986
987 return false;
988 }
989
990 /* Callback for htab_traverse. Adds a field corresponding to the reduction
991 specified in SLOT. The type is passed in DATA. */
992
993 int
994 add_field_for_reduction (reduction_info **slot, tree type)
995 {
996
997 struct reduction_info *const red = *slot;
998 tree var = reduc_stmt_res (red->reduc_stmt);
999 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1000 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
1001
1002 insert_field_into_struct (type, field);
1003
1004 red->field = field;
1005
1006 return 1;
1007 }
1008
1009 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
1010 described in SLOT. The type is passed in DATA. */
1011
1012 int
1013 add_field_for_name (name_to_copy_elt **slot, tree type)
1014 {
1015 struct name_to_copy_elt *const elt = *slot;
1016 tree name = ssa_name (elt->version);
1017 tree field = build_decl (UNKNOWN_LOCATION,
1018 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1019 TREE_TYPE (name));
1020
1021 insert_field_into_struct (type, field);
1022 elt->field = field;
1023
1024 return 1;
1025 }
1026
1027 /* Callback for htab_traverse. A local result is the intermediate result
1028 computed by a single
1029 thread, or the initial value in case no iteration was executed.
1030 This function creates a phi node reflecting these values.
1031 The phi's result will be stored in NEW_PHI field of the
1032 reduction's data structure. */
1033
1034 int
1035 create_phi_for_local_result (reduction_info **slot, struct loop *loop)
1036 {
1037 struct reduction_info *const reduc = *slot;
1038 edge e;
1039 gphi *new_phi;
1040 basic_block store_bb, continue_bb;
1041 tree local_res;
1042 source_location locus;
1043
1044 /* STORE_BB is the block where the phi
1045 should be stored. It is the destination of the loop exit.
1046 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
1047 continue_bb = single_pred (loop->latch);
1048 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
1049
1050 /* STORE_BB has two predecessors. One coming from the loop
1051 (the reduction's result is computed at the loop),
1052 and another coming from a block preceding the loop,
1053 when no iterations
1054 are executed (the initial value should be taken). */
1055 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
1056 e = EDGE_PRED (store_bb, 1);
1057 else
1058 e = EDGE_PRED (store_bb, 0);
1059 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1060 local_res = copy_ssa_name (lhs);
1061 locus = gimple_location (reduc->reduc_stmt);
1062 new_phi = create_phi_node (local_res, store_bb);
1063 add_phi_arg (new_phi, reduc->init, e, locus);
1064 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
1065 reduc->new_phi = new_phi;
1066
1067 return 1;
1068 }
1069
1070 struct clsn_data
1071 {
1072 tree store;
1073 tree load;
1074
1075 basic_block store_bb;
1076 basic_block load_bb;
1077 };
1078
1079 /* Callback for htab_traverse. Create an atomic instruction for the
1080 reduction described in SLOT.
1081 DATA annotates the place in memory the atomic operation relates to,
1082 and the basic block it needs to be generated in. */
1083
1084 int
1085 create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
1086 {
1087 struct reduction_info *const reduc = *slot;
1088 gimple_stmt_iterator gsi;
1089 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
1090 tree load_struct;
1091 basic_block bb;
1092 basic_block new_bb;
1093 edge e;
1094 tree t, addr, ref, x;
1095 tree tmp_load, name;
1096 gimple *load;
1097
1098 if (reduc->reduc_addr == NULL_TREE)
1099 {
1100 load_struct = build_simple_mem_ref (clsn_data->load);
1101 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
1102
1103 addr = build_addr (t);
1104 }
1105 else
1106 {
1107 /* Set the address for the atomic store. */
1108 addr = reduc->reduc_addr;
1109
1110 /* Remove the non-atomic store '*addr = sum'. */
1111 tree res = PHI_RESULT (reduc->keep_res);
1112 use_operand_p use_p;
1113 gimple *stmt;
1114 bool single_use_p = single_imm_use (res, &use_p, &stmt);
1115 gcc_assert (single_use_p);
1116 replace_uses_by (gimple_vdef (stmt),
1117 gimple_vuse (stmt));
1118 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1119 gsi_remove (&gsi, true);
1120 }
1121
1122 /* Create phi node. */
1123 bb = clsn_data->load_bb;
1124
1125 gsi = gsi_last_bb (bb);
1126 e = split_block (bb, gsi_stmt (gsi));
1127 new_bb = e->dest;
1128
1129 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1130 tmp_load = make_ssa_name (tmp_load);
1131 load = gimple_build_omp_atomic_load (tmp_load, addr);
1132 SSA_NAME_DEF_STMT (tmp_load) = load;
1133 gsi = gsi_start_bb (new_bb);
1134 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
1135
1136 e = split_block (new_bb, load);
1137 new_bb = e->dest;
1138 gsi = gsi_start_bb (new_bb);
1139 ref = tmp_load;
1140 x = fold_build2 (reduc->reduction_code,
1141 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1142 PHI_RESULT (reduc->new_phi));
1143
1144 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1145 GSI_CONTINUE_LINKING);
1146
1147 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
1148 return 1;
1149 }
1150
1151 /* Create the atomic operation at the join point of the threads.
1152 REDUCTION_LIST describes the reductions in the LOOP.
1153 LD_ST_DATA describes the shared data structure where
1154 shared data is stored in and loaded from. */
1155 static void
1156 create_call_for_reduction (struct loop *loop,
1157 reduction_info_table_type *reduction_list,
1158 struct clsn_data *ld_st_data)
1159 {
1160 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
1161 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
1162 basic_block continue_bb = single_pred (loop->latch);
1163 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
1164 reduction_list
1165 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
1166 }
1167
1168 /* Callback for htab_traverse. Loads the final reduction value at the
1169 join point of all threads, and inserts it in the right place. */
1170
1171 int
1172 create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
1173 {
1174 struct reduction_info *const red = *slot;
1175 gimple *stmt;
1176 gimple_stmt_iterator gsi;
1177 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1178 tree load_struct;
1179 tree name;
1180 tree x;
1181
1182 /* If there's no exit phi, the result of the reduction is unused. */
1183 if (red->keep_res == NULL)
1184 return 1;
1185
1186 gsi = gsi_after_labels (clsn_data->load_bb);
1187 load_struct = build_simple_mem_ref (clsn_data->load);
1188 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1189 NULL_TREE);
1190
1191 x = load_struct;
1192 name = PHI_RESULT (red->keep_res);
1193 stmt = gimple_build_assign (name, x);
1194
1195 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1196
1197 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1198 !gsi_end_p (gsi); gsi_next (&gsi))
1199 if (gsi_stmt (gsi) == red->keep_res)
1200 {
1201 remove_phi_node (&gsi, false);
1202 return 1;
1203 }
1204 gcc_unreachable ();
1205 }
1206
1207 /* Load the reduction result that was stored in LD_ST_DATA.
1208 REDUCTION_LIST describes the list of reductions that the
1209 loads should be generated for. */
1210 static void
1211 create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
1212 struct clsn_data *ld_st_data)
1213 {
1214 gimple_stmt_iterator gsi;
1215 tree t;
1216 gimple *stmt;
1217
1218 gsi = gsi_after_labels (ld_st_data->load_bb);
1219 t = build_fold_addr_expr (ld_st_data->store);
1220 stmt = gimple_build_assign (ld_st_data->load, t);
1221
1222 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1223
1224 reduction_list
1225 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
1226
1227 }
1228
1229 /* Callback for htab_traverse. Store the neutral value for the
1230 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1231 1 for MULT_EXPR, etc. into the reduction field.
1232 The reduction is specified in SLOT. The store information is
1233 passed in DATA. */
1234
1235 int
1236 create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
1237 {
1238 struct reduction_info *const red = *slot;
1239 tree t;
1240 gimple *stmt;
1241 gimple_stmt_iterator gsi;
1242 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
1243
1244 gsi = gsi_last_bb (clsn_data->store_bb);
1245 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1246 stmt = gimple_build_assign (t, red->initial_value);
1247 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1248
1249 return 1;
1250 }
1251
1252 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1253 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1254 specified in SLOT. */
1255
1256 int
1257 create_loads_and_stores_for_name (name_to_copy_elt **slot,
1258 struct clsn_data *clsn_data)
1259 {
1260 struct name_to_copy_elt *const elt = *slot;
1261 tree t;
1262 gimple *stmt;
1263 gimple_stmt_iterator gsi;
1264 tree type = TREE_TYPE (elt->new_name);
1265 tree load_struct;
1266
1267 gsi = gsi_last_bb (clsn_data->store_bb);
1268 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1269 stmt = gimple_build_assign (t, ssa_name (elt->version));
1270 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1271
1272 gsi = gsi_last_bb (clsn_data->load_bb);
1273 load_struct = build_simple_mem_ref (clsn_data->load);
1274 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1275 stmt = gimple_build_assign (elt->new_name, t);
1276 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1277
1278 return 1;
1279 }
1280
1281 /* Moves all the variables used in LOOP and defined outside of it (including
1282 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1283 name) to a structure created for this purpose. The code
1284
1285 while (1)
1286 {
1287 use (a);
1288 use (b);
1289 }
1290
1291 is transformed this way:
1292
1293 bb0:
1294 old.a = a;
1295 old.b = b;
1296
1297 bb1:
1298 a' = new->a;
1299 b' = new->b;
1300 while (1)
1301 {
1302 use (a');
1303 use (b');
1304 }
1305
1306 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1307 pointer `new' is intentionally not initialized (the loop will be split to a
1308 separate function later, and `new' will be initialized from its arguments).
1309 LD_ST_DATA holds information about the shared data structure used to pass
1310 information among the threads. It is initialized here, and
1311 gen_parallel_loop will pass it to create_call_for_reduction that
1312 needs this information. REDUCTION_LIST describes the reductions
1313 in LOOP. */
1314
1315 static void
1316 separate_decls_in_region (edge entry, edge exit,
1317 reduction_info_table_type *reduction_list,
1318 tree *arg_struct, tree *new_arg_struct,
1319 struct clsn_data *ld_st_data)
1320
1321 {
1322 basic_block bb1 = split_edge (entry);
1323 basic_block bb0 = single_pred (bb1);
1324 name_to_copy_table_type name_copies (10);
1325 int_tree_htab_type decl_copies (10);
1326 unsigned i;
1327 tree type, type_name, nvar;
1328 gimple_stmt_iterator gsi;
1329 struct clsn_data clsn_data;
1330 auto_vec<basic_block, 3> body;
1331 basic_block bb;
1332 basic_block entry_bb = bb1;
1333 basic_block exit_bb = exit->dest;
1334 bool has_debug_stmt = false;
1335
1336 entry = single_succ_edge (entry_bb);
1337 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1338
1339 FOR_EACH_VEC_ELT (body, i, bb)
1340 {
1341 if (bb != entry_bb && bb != exit_bb)
1342 {
1343 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1344 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1345 &name_copies, &decl_copies);
1346
1347 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1348 {
1349 gimple *stmt = gsi_stmt (gsi);
1350
1351 if (is_gimple_debug (stmt))
1352 has_debug_stmt = true;
1353 else
1354 separate_decls_in_region_stmt (entry, exit, stmt,
1355 &name_copies, &decl_copies);
1356 }
1357 }
1358 }
1359
1360 /* Now process debug bind stmts. We must not create decls while
1361 processing debug stmts, so we defer their processing so as to
1362 make sure we will have debug info for as many variables as
1363 possible (all of those that were dealt with in the loop above),
1364 and discard those for which we know there's nothing we can
1365 do. */
1366 if (has_debug_stmt)
1367 FOR_EACH_VEC_ELT (body, i, bb)
1368 if (bb != entry_bb && bb != exit_bb)
1369 {
1370 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1371 {
1372 gimple *stmt = gsi_stmt (gsi);
1373
1374 if (is_gimple_debug (stmt))
1375 {
1376 if (separate_decls_in_region_debug (stmt, &name_copies,
1377 &decl_copies))
1378 {
1379 gsi_remove (&gsi, true);
1380 continue;
1381 }
1382 }
1383
1384 gsi_next (&gsi);
1385 }
1386 }
1387
1388 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
1389 {
1390 /* It may happen that there is nothing to copy (if there are only
1391 loop carried and external variables in the loop). */
1392 *arg_struct = NULL;
1393 *new_arg_struct = NULL;
1394 }
1395 else
1396 {
1397 /* Create the type for the structure to store the ssa names to. */
1398 type = lang_hooks.types.make_type (RECORD_TYPE);
1399 type_name = build_decl (UNKNOWN_LOCATION,
1400 TYPE_DECL, create_tmp_var_name (".paral_data"),
1401 type);
1402 TYPE_NAME (type) = type_name;
1403
1404 name_copies.traverse <tree, add_field_for_name> (type);
1405 if (reduction_list && reduction_list->elements () > 0)
1406 {
1407 /* Create the fields for reductions. */
1408 reduction_list->traverse <tree, add_field_for_reduction> (type);
1409 }
1410 layout_type (type);
1411
1412 /* Create the loads and stores. */
1413 *arg_struct = create_tmp_var (type, ".paral_data_store");
1414 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1415 *new_arg_struct = make_ssa_name (nvar);
1416
1417 ld_st_data->store = *arg_struct;
1418 ld_st_data->load = *new_arg_struct;
1419 ld_st_data->store_bb = bb0;
1420 ld_st_data->load_bb = bb1;
1421
1422 name_copies
1423 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1424 (ld_st_data);
1425
1426 /* Load the calculation from memory (after the join of the threads). */
1427
1428 if (reduction_list && reduction_list->elements () > 0)
1429 {
1430 reduction_list
1431 ->traverse <struct clsn_data *, create_stores_for_reduction>
1432 (ld_st_data);
1433 clsn_data.load = make_ssa_name (nvar);
1434 clsn_data.load_bb = exit->dest;
1435 clsn_data.store = ld_st_data->store;
1436 create_final_loads_for_reduction (reduction_list, &clsn_data);
1437 }
1438 }
1439 }
1440
1441 /* Returns true if FN was created to run in parallel. */
1442
1443 bool
1444 parallelized_function_p (tree fndecl)
1445 {
1446 cgraph_node *node = cgraph_node::get (fndecl);
1447 gcc_assert (node != NULL);
1448 return node->parallelized_function;
1449 }
1450
1451 /* Creates and returns an empty function that will receive the body of
1452 a parallelized loop. */
1453
1454 static tree
1455 create_loop_fn (location_t loc)
1456 {
1457 char buf[100];
1458 char *tname;
1459 tree decl, type, name, t;
1460 struct function *act_cfun = cfun;
1461 static unsigned loopfn_num;
1462
1463 loc = LOCATION_LOCUS (loc);
1464 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1465 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1466 clean_symbol_name (tname);
1467 name = get_identifier (tname);
1468 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1469
1470 decl = build_decl (loc, FUNCTION_DECL, name, type);
1471 TREE_STATIC (decl) = 1;
1472 TREE_USED (decl) = 1;
1473 DECL_ARTIFICIAL (decl) = 1;
1474 DECL_IGNORED_P (decl) = 0;
1475 TREE_PUBLIC (decl) = 0;
1476 DECL_UNINLINABLE (decl) = 1;
1477 DECL_EXTERNAL (decl) = 0;
1478 DECL_CONTEXT (decl) = NULL_TREE;
1479 DECL_INITIAL (decl) = make_node (BLOCK);
1480 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
1481
1482 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
1483 DECL_ARTIFICIAL (t) = 1;
1484 DECL_IGNORED_P (t) = 1;
1485 DECL_RESULT (decl) = t;
1486
1487 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
1488 ptr_type_node);
1489 DECL_ARTIFICIAL (t) = 1;
1490 DECL_ARG_TYPE (t) = ptr_type_node;
1491 DECL_CONTEXT (t) = decl;
1492 TREE_USED (t) = 1;
1493 DECL_ARGUMENTS (decl) = t;
1494
1495 allocate_struct_function (decl, false);
1496
1497 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1498 it. */
1499 set_cfun (act_cfun);
1500
1501 return decl;
1502 }
1503
1504 /* Replace uses of NAME by VAL in block BB. */
1505
1506 static void
1507 replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1508 {
1509 gimple *use_stmt;
1510 imm_use_iterator imm_iter;
1511
1512 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1513 {
1514 if (gimple_bb (use_stmt) != bb)
1515 continue;
1516
1517 use_operand_p use_p;
1518 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1519 SET_USE (use_p, val);
1520 }
1521 }
1522
1523 /* Do transformation from:
1524
1525 <bb preheader>:
1526 ...
1527 goto <bb header>
1528
1529 <bb header>:
1530 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1531 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1532 ...
1533 use (ivtmp_a)
1534 ...
1535 sum_b = sum_a + sum_update
1536 ...
1537 if (ivtmp_a < n)
1538 goto <bb latch>;
1539 else
1540 goto <bb exit>;
1541
1542 <bb latch>:
1543 ivtmp_b = ivtmp_a + 1;
1544 goto <bb header>
1545
1546 <bb exit>:
1547 sum_z = PHI <sum_b (cond[1]), ...>
1548
1549 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1550 that's <bb header>.
1551
1552 to:
1553
1554 <bb preheader>:
1555 ...
1556 goto <bb newheader>
1557
1558 <bb header>:
1559 ivtmp_a = PHI <ivtmp_c (latch)>
1560 sum_a = PHI <sum_c (latch)>
1561 ...
1562 use (ivtmp_a)
1563 ...
1564 sum_b = sum_a + sum_update
1565 ...
1566 goto <bb latch>;
1567
1568 <bb newheader>:
1569 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1570 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1571 if (ivtmp_c < n + 1)
1572 goto <bb header>;
1573 else
1574 goto <bb newexit>;
1575
1576 <bb latch>:
1577 ivtmp_b = ivtmp_a + 1;
1578 goto <bb newheader>
1579
1580 <bb newexit>:
1581 sum_y = PHI <sum_c (newheader)>
1582
1583 <bb exit>:
1584 sum_z = PHI <sum_y (newexit), ...>
1585
1586
1587 In unified diff format:
1588
1589 <bb preheader>:
1590 ...
1591 - goto <bb header>
1592 + goto <bb newheader>
1593
1594 <bb header>:
1595 - ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1596 - sum_a = PHI <sum_init (preheader), sum_b (latch)>
1597 + ivtmp_a = PHI <ivtmp_c (latch)>
1598 + sum_a = PHI <sum_c (latch)>
1599 ...
1600 use (ivtmp_a)
1601 ...
1602 sum_b = sum_a + sum_update
1603 ...
1604 - if (ivtmp_a < n)
1605 - goto <bb latch>;
1606 + goto <bb latch>;
1607 +
1608 + <bb newheader>:
1609 + ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1610 + sum_c = PHI <sum_init (preheader), sum_b (latch)>
1611 + if (ivtmp_c < n + 1)
1612 + goto <bb header>;
1613 else
1614 goto <bb exit>;
1615
1616 <bb latch>:
1617 ivtmp_b = ivtmp_a + 1;
1618 - goto <bb header>
1619 + goto <bb newheader>
1620
1621 + <bb newexit>:
1622 + sum_y = PHI <sum_c (newheader)>
1623
1624 <bb exit>:
1625 - sum_z = PHI <sum_b (cond[1]), ...>
1626 + sum_z = PHI <sum_y (newexit), ...>
1627
1628 Note: the example does not show any virtual phis, but these are handled more
1629 or less as reductions.
1630
1631
1632 Moves the exit condition of LOOP to the beginning of its header.
1633 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1634 bound. */
1635
1636 static void
1637 transform_to_exit_first_loop_alt (struct loop *loop,
1638 reduction_info_table_type *reduction_list,
1639 tree bound)
1640 {
1641 basic_block header = loop->header;
1642 basic_block latch = loop->latch;
1643 edge exit = single_dom_exit (loop);
1644 basic_block exit_block = exit->dest;
1645 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1646 tree control = gimple_cond_lhs (cond_stmt);
1647 edge e;
1648
1649 /* Rewriting virtuals into loop-closed ssa normal form makes this
1650 transformation simpler. It also ensures that the virtuals are in
1651 loop-closed ssa normal from after the transformation, which is required by
1652 create_parallel_loop. */
1653 rewrite_virtuals_into_loop_closed_ssa (loop);
1654
1655 /* Create the new_header block. */
1656 basic_block new_header = split_block_before_cond_jump (exit->src);
1657 edge edge_at_split = single_pred_edge (new_header);
1658
1659 /* Redirect entry edge to new_header. */
1660 edge entry = loop_preheader_edge (loop);
1661 e = redirect_edge_and_branch (entry, new_header);
1662 gcc_assert (e == entry);
1663
1664 /* Redirect post_inc_edge to new_header. */
1665 edge post_inc_edge = single_succ_edge (latch);
1666 e = redirect_edge_and_branch (post_inc_edge, new_header);
1667 gcc_assert (e == post_inc_edge);
1668
1669 /* Redirect post_cond_edge to header. */
1670 edge post_cond_edge = single_pred_edge (latch);
1671 e = redirect_edge_and_branch (post_cond_edge, header);
1672 gcc_assert (e == post_cond_edge);
1673
1674 /* Redirect edge_at_split to latch. */
1675 e = redirect_edge_and_branch (edge_at_split, latch);
1676 gcc_assert (e == edge_at_split);
1677
1678 /* Set the new loop bound. */
1679 gimple_cond_set_rhs (cond_stmt, bound);
1680 update_stmt (cond_stmt);
1681
1682 /* Repair the ssa. */
1683 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1684 edge_var_map *vm;
1685 gphi_iterator gsi;
1686 int i;
1687 for (gsi = gsi_start_phis (header), i = 0;
1688 !gsi_end_p (gsi) && v->iterate (i, &vm);
1689 gsi_next (&gsi), i++)
1690 {
1691 gphi *phi = gsi.phi ();
1692 tree res_a = PHI_RESULT (phi);
1693
1694 /* Create new phi. */
1695 tree res_c = copy_ssa_name (res_a, phi);
1696 gphi *nphi = create_phi_node (res_c, new_header);
1697
1698 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1699 replace_uses_in_bb_by (res_a, res_c, new_header);
1700
1701 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1702 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1703
1704 /* Replace sum_b with sum_c in exit phi. */
1705 tree res_b = redirect_edge_var_map_def (vm);
1706 replace_uses_in_bb_by (res_b, res_c, exit_block);
1707
1708 struct reduction_info *red = reduction_phi (reduction_list, phi);
1709 gcc_assert (virtual_operand_p (res_a)
1710 || res_a == control
1711 || red != NULL);
1712
1713 if (red)
1714 {
1715 /* Register the new reduction phi. */
1716 red->reduc_phi = nphi;
1717 gimple_set_uid (red->reduc_phi, red->reduc_version);
1718 }
1719 }
1720 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
1721
1722 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1723 flush_pending_stmts (entry);
1724
1725 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1726 flush_pending_stmts (post_inc_edge);
1727
1728
1729 basic_block new_exit_block = NULL;
1730 if (!single_pred_p (exit->dest))
1731 {
1732 /* Create a new empty exit block, inbetween the new loop header and the
1733 old exit block. The function separate_decls_in_region needs this block
1734 to insert code that is active on loop exit, but not any other path. */
1735 new_exit_block = split_edge (exit);
1736 }
1737
1738 /* Insert and register the reduction exit phis. */
1739 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1740 !gsi_end_p (gsi);
1741 gsi_next (&gsi))
1742 {
1743 gphi *phi = gsi.phi ();
1744 gphi *nphi = NULL;
1745 tree res_z = PHI_RESULT (phi);
1746 tree res_c;
1747
1748 if (new_exit_block != NULL)
1749 {
1750 /* Now that we have a new exit block, duplicate the phi of the old
1751 exit block in the new exit block to preserve loop-closed ssa. */
1752 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1753 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1754 tree res_y = copy_ssa_name (res_z, phi);
1755 nphi = create_phi_node (res_y, new_exit_block);
1756 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1757 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1758 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1759 }
1760 else
1761 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1762
1763 if (virtual_operand_p (res_z))
1764 continue;
1765
1766 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
1767 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1768 if (red != NULL)
1769 red->keep_res = (nphi != NULL
1770 ? nphi
1771 : phi);
1772 }
1773
1774 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1775 then we're still using some fields, so only bother about fields that are
1776 still used: header and latch.
1777 The loop has a new header bb, so we update it. The latch bb stays the
1778 same. */
1779 loop->header = new_header;
1780
1781 /* Recalculate dominance info. */
1782 free_dominance_info (CDI_DOMINATORS);
1783 calculate_dominance_info (CDI_DOMINATORS);
1784
1785 checking_verify_ssa (true, true);
1786 }
1787
1788 /* Tries to moves the exit condition of LOOP to the beginning of its header
1789 without duplication of the loop body. NIT is the number of iterations of the
1790 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1791 transformation is successful. */
1792
1793 static bool
1794 try_transform_to_exit_first_loop_alt (struct loop *loop,
1795 reduction_info_table_type *reduction_list,
1796 tree nit)
1797 {
1798 /* Check whether the latch contains a single statement. */
1799 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1800 return false;
1801
1802 /* Check whether the latch contains no phis. */
1803 if (phi_nodes (loop->latch) != NULL)
1804 return false;
1805
1806 /* Check whether the latch contains the loop iv increment. */
1807 edge back = single_succ_edge (loop->latch);
1808 edge exit = single_dom_exit (loop);
1809 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1810 tree control = gimple_cond_lhs (cond_stmt);
1811 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1812 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1813 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1814 return false;
1815
1816 /* Check whether there's no code between the loop condition and the latch. */
1817 if (!single_pred_p (loop->latch)
1818 || single_pred (loop->latch) != exit->src)
1819 return false;
1820
1821 tree alt_bound = NULL_TREE;
1822 tree nit_type = TREE_TYPE (nit);
1823
1824 /* Figure out whether nit + 1 overflows. */
1825 if (TREE_CODE (nit) == INTEGER_CST)
1826 {
1827 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1828 {
1829 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1830 nit, build_one_cst (nit_type));
1831
1832 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
1833 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1834 return true;
1835 }
1836 else
1837 {
1838 /* Todo: Figure out if we can trigger this, if it's worth to handle
1839 optimally, and if we can handle it optimally. */
1840 return false;
1841 }
1842 }
1843
1844 gcc_assert (TREE_CODE (nit) == SSA_NAME);
1845
1846 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1847 iv with base 0 and step 1 that is incremented in the latch, like this:
1848
1849 <bb header>:
1850 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1851 ...
1852 if (iv_1 < nit)
1853 goto <bb latch>;
1854 else
1855 goto <bb exit>;
1856
1857 <bb latch>:
1858 iv_2 = iv_1 + 1;
1859 goto <bb header>;
1860
1861 The range of iv_1 is [0, nit]. The latch edge is taken for
1862 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1863 number of latch executions is equal to nit.
1864
1865 The function max_loop_iterations gives us the maximum number of latch
1866 executions, so it gives us the maximum value of nit. */
1867 widest_int nit_max;
1868 if (!max_loop_iterations (loop, &nit_max))
1869 return false;
1870
1871 /* Check if nit + 1 overflows. */
1872 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1873 if (nit_max >= type_max)
1874 return false;
1875
1876 gimple *def = SSA_NAME_DEF_STMT (nit);
1877
1878 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
1879 if (def
1880 && is_gimple_assign (def)
1881 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1882 {
1883 tree op1 = gimple_assign_rhs1 (def);
1884 tree op2 = gimple_assign_rhs2 (def);
1885 if (integer_minus_onep (op1))
1886 alt_bound = op2;
1887 else if (integer_minus_onep (op2))
1888 alt_bound = op1;
1889 }
1890
1891 /* If not found, insert nit + 1. */
1892 if (alt_bound == NULL_TREE)
1893 {
1894 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1895 build_int_cst_type (nit_type, 1));
1896
1897 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1898
1899 alt_bound
1900 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1901 GSI_CONTINUE_LINKING);
1902 }
1903
1904 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1905 return true;
1906 }
1907
1908 /* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1909 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1910 LOOP. */
1911
1912 static void
1913 transform_to_exit_first_loop (struct loop *loop,
1914 reduction_info_table_type *reduction_list,
1915 tree nit)
1916 {
1917 basic_block *bbs, *nbbs, ex_bb, orig_header;
1918 unsigned n;
1919 bool ok;
1920 edge exit = single_dom_exit (loop), hpred;
1921 tree control, control_name, res, t;
1922 gphi *phi, *nphi;
1923 gassign *stmt;
1924 gcond *cond_stmt, *cond_nit;
1925 tree nit_1;
1926
1927 split_block_after_labels (loop->header);
1928 orig_header = single_succ (loop->header);
1929 hpred = single_succ_edge (loop->header);
1930
1931 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1932 control = gimple_cond_lhs (cond_stmt);
1933 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1934
1935 /* Make sure that we have phi nodes on exit for all loop header phis
1936 (create_parallel_loop requires that). */
1937 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1938 !gsi_end_p (gsi);
1939 gsi_next (&gsi))
1940 {
1941 phi = gsi.phi ();
1942 res = PHI_RESULT (phi);
1943 t = copy_ssa_name (res, phi);
1944 SET_PHI_RESULT (phi, t);
1945 nphi = create_phi_node (res, orig_header);
1946 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
1947
1948 if (res == control)
1949 {
1950 gimple_cond_set_lhs (cond_stmt, t);
1951 update_stmt (cond_stmt);
1952 control = t;
1953 }
1954 }
1955
1956 bbs = get_loop_body_in_dom_order (loop);
1957
1958 for (n = 0; bbs[n] != exit->src; n++)
1959 continue;
1960 nbbs = XNEWVEC (basic_block, n);
1961 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1962 bbs + 1, n, nbbs);
1963 gcc_assert (ok);
1964 free (bbs);
1965 ex_bb = nbbs[0];
1966 free (nbbs);
1967
1968 /* Other than reductions, the only gimple reg that should be copied
1969 out of the loop is the control variable. */
1970 exit = single_dom_exit (loop);
1971 control_name = NULL_TREE;
1972 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1973 !gsi_end_p (gsi); )
1974 {
1975 phi = gsi.phi ();
1976 res = PHI_RESULT (phi);
1977 if (virtual_operand_p (res))
1978 {
1979 gsi_next (&gsi);
1980 continue;
1981 }
1982
1983 /* Check if it is a part of reduction. If it is,
1984 keep the phi at the reduction's keep_res field. The
1985 PHI_RESULT of this phi is the resulting value of the reduction
1986 variable when exiting the loop. */
1987
1988 if (reduction_list->elements () > 0)
1989 {
1990 struct reduction_info *red;
1991
1992 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1993 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1994 if (red)
1995 {
1996 red->keep_res = phi;
1997 gsi_next (&gsi);
1998 continue;
1999 }
2000 }
2001 gcc_assert (control_name == NULL_TREE
2002 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
2003 control_name = res;
2004 remove_phi_node (&gsi, false);
2005 }
2006 gcc_assert (control_name != NULL_TREE);
2007
2008 /* Initialize the control variable to number of iterations
2009 according to the rhs of the exit condition. */
2010 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2011 cond_nit = as_a <gcond *> (last_stmt (exit->src));
2012 nit_1 = gimple_cond_rhs (cond_nit);
2013 nit_1 = force_gimple_operand_gsi (&gsi,
2014 fold_convert (TREE_TYPE (control_name), nit_1),
2015 false, NULL_TREE, false, GSI_SAME_STMT);
2016 stmt = gimple_build_assign (control_name, nit_1);
2017 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
2018 }
2019
2020 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
2021 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
2022 NEW_DATA is the variable that should be initialized from the argument
2023 of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
2024 that number is to be determined later. */
2025
2026 static void
2027 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
2028 tree new_data, unsigned n_threads, location_t loc,
2029 bool oacc_kernels_p)
2030 {
2031 gimple_stmt_iterator gsi;
2032 basic_block for_bb, ex_bb, continue_bb;
2033 tree t, param;
2034 gomp_parallel *omp_par_stmt;
2035 gimple *omp_return_stmt1, *omp_return_stmt2;
2036 gimple *phi;
2037 gcond *cond_stmt;
2038 gomp_for *for_stmt;
2039 gomp_continue *omp_cont_stmt;
2040 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
2041 edge exit, nexit, guard, end, e;
2042
2043 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2044 if (oacc_kernels_p)
2045 {
2046 gcc_checking_assert (lookup_attribute ("oacc kernels",
2047 DECL_ATTRIBUTES (cfun->decl)));
2048
2049 tree clause = build_omp_clause (loc, OMP_CLAUSE_NUM_GANGS);
2050 OMP_CLAUSE_NUM_GANGS_EXPR (clause)
2051 = build_int_cst (integer_type_node, n_threads);
2052 oacc_set_fn_attrib (cfun->decl, clause, NULL);
2053 }
2054 else
2055 {
2056 basic_block bb = loop_preheader_edge (loop)->src;
2057 basic_block paral_bb = single_pred (bb);
2058 gsi = gsi_last_bb (paral_bb);
2059
2060 gcc_checking_assert (n_threads != 0);
2061 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2062 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2063 = build_int_cst (integer_type_node, n_threads);
2064 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2065 gimple_set_location (omp_par_stmt, loc);
2066
2067 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
2068
2069 /* Initialize NEW_DATA. */
2070 if (data)
2071 {
2072 gassign *assign_stmt;
2073
2074 gsi = gsi_after_labels (bb);
2075
2076 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2077 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2078 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2079
2080 assign_stmt = gimple_build_assign (new_data,
2081 fold_convert (TREE_TYPE (new_data), param));
2082 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2083 }
2084
2085 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2086 bb = split_loop_exit_edge (single_dom_exit (loop));
2087 gsi = gsi_last_bb (bb);
2088 omp_return_stmt1 = gimple_build_omp_return (false);
2089 gimple_set_location (omp_return_stmt1, loc);
2090 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2091 }
2092
2093 /* Extract data for GIMPLE_OMP_FOR. */
2094 gcc_assert (loop->header == single_dom_exit (loop)->src);
2095 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
2096
2097 cvar = gimple_cond_lhs (cond_stmt);
2098 cvar_base = SSA_NAME_VAR (cvar);
2099 phi = SSA_NAME_DEF_STMT (cvar);
2100 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
2101 initvar = copy_ssa_name (cvar);
2102 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2103 initvar);
2104 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2105
2106 gsi = gsi_last_nondebug_bb (loop->latch);
2107 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2108 gsi_remove (&gsi, true);
2109
2110 /* Prepare cfg. */
2111 for_bb = split_edge (loop_preheader_edge (loop));
2112 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2113 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2114 gcc_assert (exit == single_dom_exit (loop));
2115
2116 guard = make_edge (for_bb, ex_bb, 0);
2117 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2118 loop->latch = split_edge (single_succ_edge (loop->latch));
2119 single_pred_edge (loop->latch)->flags = 0;
2120 end = make_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2121 rescan_loop_exit (end, true, false);
2122
2123 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2124 !gsi_end_p (gpi); gsi_next (&gpi))
2125 {
2126 source_location locus;
2127 gphi *phi = gpi.phi ();
2128 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2129 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
2130
2131 /* If the exit phi is not connected to a header phi in the same loop, this
2132 value is not modified in the loop, and we're done with this phi. */
2133 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2134 && gimple_bb (def_stmt) == loop->header))
2135 {
2136 locus = gimple_phi_arg_location_from_edge (phi, exit);
2137 add_phi_arg (phi, def, guard, locus);
2138 add_phi_arg (phi, def, end, locus);
2139 continue;
2140 }
2141
2142 gphi *stmt = as_a <gphi *> (def_stmt);
2143 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
2144 locus = gimple_phi_arg_location_from_edge (stmt,
2145 loop_preheader_edge (loop));
2146 add_phi_arg (phi, def, guard, locus);
2147
2148 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2149 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
2150 add_phi_arg (phi, def, end, locus);
2151 }
2152 e = redirect_edge_and_branch (exit, nexit->dest);
2153 PENDING_STMT (e) = NULL;
2154
2155 /* Emit GIMPLE_OMP_FOR. */
2156 if (oacc_kernels_p)
2157 /* In combination with the NUM_GANGS on the parallel. */
2158 t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2159 else
2160 {
2161 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2162 int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
2163 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2164 = (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2165 switch (schedule_type)
2166 {
2167 case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2168 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2169 break;
2170 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2171 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2172 break;
2173 case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2174 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2175 break;
2176 case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2177 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2178 chunk_size = 0;
2179 break;
2180 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2181 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2182 chunk_size = 0;
2183 break;
2184 default:
2185 gcc_unreachable ();
2186 }
2187 if (chunk_size != 0)
2188 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2189 = build_int_cst (integer_type_node, chunk_size);
2190 }
2191
2192 for_stmt = gimple_build_omp_for (NULL,
2193 (oacc_kernels_p
2194 ? GF_OMP_FOR_KIND_OACC_LOOP
2195 : GF_OMP_FOR_KIND_FOR),
2196 t, 1, NULL);
2197
2198 gimple_cond_set_lhs (cond_stmt, cvar_base);
2199 type = TREE_TYPE (cvar);
2200 gimple_set_location (for_stmt, loc);
2201 gimple_omp_for_set_index (for_stmt, 0, initvar);
2202 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2203 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2204 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2205 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2206 cvar_base,
2207 build_int_cst (type, 1)));
2208
2209 gsi = gsi_last_bb (for_bb);
2210 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
2211 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2212
2213 /* Emit GIMPLE_OMP_CONTINUE. */
2214 continue_bb = single_pred (loop->latch);
2215 gsi = gsi_last_bb (continue_bb);
2216 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2217 gimple_set_location (omp_cont_stmt, loc);
2218 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2219 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
2220
2221 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2222 gsi = gsi_last_bb (ex_bb);
2223 omp_return_stmt2 = gimple_build_omp_return (true);
2224 gimple_set_location (omp_return_stmt2, loc);
2225 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
2226
2227 /* After the above dom info is hosed. Re-compute it. */
2228 free_dominance_info (CDI_DOMINATORS);
2229 calculate_dominance_info (CDI_DOMINATORS);
2230 }
2231
2232 /* Generates code to execute the iterations of LOOP in N_THREADS
2233 threads in parallel, which can be 0 if that number is to be determined
2234 later.
2235
2236 NITER describes number of iterations of LOOP.
2237 REDUCTION_LIST describes the reductions existent in the LOOP. */
2238
2239 static void
2240 gen_parallel_loop (struct loop *loop,
2241 reduction_info_table_type *reduction_list,
2242 unsigned n_threads, struct tree_niter_desc *niter,
2243 bool oacc_kernels_p)
2244 {
2245 tree many_iterations_cond, type, nit;
2246 tree arg_struct, new_arg_struct;
2247 gimple_seq stmts;
2248 edge entry, exit;
2249 struct clsn_data clsn_data;
2250 unsigned prob;
2251 location_t loc;
2252 gimple *cond_stmt;
2253 unsigned int m_p_thread=2;
2254
2255 /* From
2256
2257 ---------------------------------------------------------------------
2258 loop
2259 {
2260 IV = phi (INIT, IV + STEP)
2261 BODY1;
2262 if (COND)
2263 break;
2264 BODY2;
2265 }
2266 ---------------------------------------------------------------------
2267
2268 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2269 we generate the following code:
2270
2271 ---------------------------------------------------------------------
2272
2273 if (MAY_BE_ZERO
2274 || NITER < MIN_PER_THREAD * N_THREADS)
2275 goto original;
2276
2277 BODY1;
2278 store all local loop-invariant variables used in body of the loop to DATA.
2279 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
2280 load the variables from DATA.
2281 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
2282 BODY2;
2283 BODY1;
2284 GIMPLE_OMP_CONTINUE;
2285 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2286 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
2287 goto end;
2288
2289 original:
2290 loop
2291 {
2292 IV = phi (INIT, IV + STEP)
2293 BODY1;
2294 if (COND)
2295 break;
2296 BODY2;
2297 }
2298
2299 end:
2300
2301 */
2302
2303 /* Create two versions of the loop -- in the old one, we know that the
2304 number of iterations is large enough, and we will transform it into the
2305 loop that will be split to loop_fn, the new one will be used for the
2306 remaining iterations. */
2307
2308 /* We should compute a better number-of-iterations value for outer loops.
2309 That is, if we have
2310
2311 for (i = 0; i < n; ++i)
2312 for (j = 0; j < m; ++j)
2313 ...
2314
2315 we should compute nit = n * m, not nit = n.
2316 Also may_be_zero handling would need to be adjusted. */
2317
2318 type = TREE_TYPE (niter->niter);
2319 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2320 NULL_TREE);
2321 if (stmts)
2322 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2323
2324 if (!oacc_kernels_p)
2325 {
2326 if (loop->inner)
2327 m_p_thread=2;
2328 else
2329 m_p_thread=MIN_PER_THREAD;
2330
2331 gcc_checking_assert (n_threads != 0);
2332 many_iterations_cond =
2333 fold_build2 (GE_EXPR, boolean_type_node,
2334 nit, build_int_cst (type, m_p_thread * n_threads));
2335
2336 many_iterations_cond
2337 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2338 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2339 many_iterations_cond);
2340 many_iterations_cond
2341 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
2342 if (stmts)
2343 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
2344 if (!is_gimple_condexpr (many_iterations_cond))
2345 {
2346 many_iterations_cond
2347 = force_gimple_operand (many_iterations_cond, &stmts,
2348 true, NULL_TREE);
2349 if (stmts)
2350 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
2351 stmts);
2352 }
2353
2354 initialize_original_copy_tables ();
2355
2356 /* We assume that the loop usually iterates a lot. */
2357 prob = 4 * REG_BR_PROB_BASE / 5;
2358 loop_version (loop, many_iterations_cond, NULL,
2359 prob, REG_BR_PROB_BASE - prob,
2360 prob, REG_BR_PROB_BASE - prob, true);
2361 update_ssa (TODO_update_ssa);
2362 free_original_copy_tables ();
2363 }
2364
2365 /* Base all the induction variables in LOOP on a single control one. */
2366 canonicalize_loop_ivs (loop, &nit, true);
2367
2368 /* Ensure that the exit condition is the first statement in the loop.
2369 The common case is that latch of the loop is empty (apart from the
2370 increment) and immediately follows the loop exit test. Attempt to move the
2371 entry of the loop directly before the exit check and increase the number of
2372 iterations of the loop by one. */
2373 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2374 {
2375 if (dump_file
2376 && (dump_flags & TDF_DETAILS))
2377 fprintf (dump_file,
2378 "alternative exit-first loop transform succeeded"
2379 " for loop %d\n", loop->num);
2380 }
2381 else
2382 {
2383 if (oacc_kernels_p)
2384 n_threads = 1;
2385
2386 /* Fall back on the method that handles more cases, but duplicates the
2387 loop body: move the exit condition of LOOP to the beginning of its
2388 header, and duplicate the part of the last iteration that gets disabled
2389 to the exit of the loop. */
2390 transform_to_exit_first_loop (loop, reduction_list, nit);
2391 }
2392
2393 /* Generate initializations for reductions. */
2394 if (reduction_list->elements () > 0)
2395 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
2396
2397 /* Eliminate the references to local variables from the loop. */
2398 gcc_assert (single_exit (loop));
2399 entry = loop_preheader_edge (loop);
2400 exit = single_dom_exit (loop);
2401
2402 /* This rewrites the body in terms of new variables. This has already
2403 been done for oacc_kernels_p in pass_lower_omp/lower_omp (). */
2404 if (!oacc_kernels_p)
2405 {
2406 eliminate_local_variables (entry, exit);
2407 /* In the old loop, move all variables non-local to the loop to a
2408 structure and back, and create separate decls for the variables used in
2409 loop. */
2410 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2411 &new_arg_struct, &clsn_data);
2412 }
2413 else
2414 {
2415 arg_struct = NULL_TREE;
2416 new_arg_struct = NULL_TREE;
2417 clsn_data.load = NULL_TREE;
2418 clsn_data.load_bb = exit->dest;
2419 clsn_data.store = NULL_TREE;
2420 clsn_data.store_bb = NULL;
2421 }
2422
2423 /* Create the parallel constructs. */
2424 loc = UNKNOWN_LOCATION;
2425 cond_stmt = last_stmt (loop->header);
2426 if (cond_stmt)
2427 loc = gimple_location (cond_stmt);
2428 create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
2429 n_threads, loc, oacc_kernels_p);
2430 if (reduction_list->elements () > 0)
2431 create_call_for_reduction (loop, reduction_list, &clsn_data);
2432
2433 scev_reset ();
2434
2435 /* Free loop bound estimations that could contain references to
2436 removed statements. */
2437 FOR_EACH_LOOP (loop, 0)
2438 free_numbers_of_iterations_estimates_loop (loop);
2439 }
2440
2441 /* Returns true when LOOP contains vector phi nodes. */
2442
2443 static bool
2444 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
2445 {
2446 unsigned i;
2447 basic_block *bbs = get_loop_body_in_dom_order (loop);
2448 gphi_iterator gsi;
2449 bool res = true;
2450
2451 for (i = 0; i < loop->num_nodes; i++)
2452 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2453 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
2454 goto end;
2455
2456 res = false;
2457 end:
2458 free (bbs);
2459 return res;
2460 }
2461
2462 /* Create a reduction_info struct, initialize it with REDUC_STMT
2463 and PHI, insert it to the REDUCTION_LIST. */
2464
2465 static void
2466 build_new_reduction (reduction_info_table_type *reduction_list,
2467 gimple *reduc_stmt, gphi *phi)
2468 {
2469 reduction_info **slot;
2470 struct reduction_info *new_reduction;
2471 enum tree_code reduction_code;
2472
2473 gcc_assert (reduc_stmt);
2474
2475 if (dump_file && (dump_flags & TDF_DETAILS))
2476 {
2477 fprintf (dump_file,
2478 "Detected reduction. reduction stmt is:\n");
2479 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2480 fprintf (dump_file, "\n");
2481 }
2482
2483 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
2484 {
2485 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
2486 gimple *def1 = SSA_NAME_DEF_STMT (op1);
2487 reduction_code = gimple_assign_rhs_code (def1);
2488 }
2489
2490 else
2491 reduction_code = gimple_assign_rhs_code (reduc_stmt);
2492
2493 new_reduction = XCNEW (struct reduction_info);
2494
2495 new_reduction->reduc_stmt = reduc_stmt;
2496 new_reduction->reduc_phi = phi;
2497 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
2498 new_reduction->reduction_code = reduction_code;
2499 slot = reduction_list->find_slot (new_reduction, INSERT);
2500 *slot = new_reduction;
2501 }
2502
2503 /* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2504
2505 int
2506 set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
2507 {
2508 struct reduction_info *const red = *slot;
2509 gimple_set_uid (red->reduc_phi, red->reduc_version);
2510 return 1;
2511 }
2512
2513 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2514
2515 static void
2516 gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
2517 {
2518 gphi_iterator gsi;
2519 loop_vec_info simple_loop_info;
2520 auto_vec<gphi *, 4> double_reduc_phis;
2521 auto_vec<gimple *, 4> double_reduc_stmts;
2522
2523 if (!stmt_vec_info_vec.exists ())
2524 init_stmt_vec_info_vec ();
2525
2526 simple_loop_info = vect_analyze_loop_form (loop);
2527 if (simple_loop_info == NULL)
2528 goto gather_done;
2529
2530 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2531 {
2532 gphi *phi = gsi.phi ();
2533 affine_iv iv;
2534 tree res = PHI_RESULT (phi);
2535 bool double_reduc;
2536
2537 if (virtual_operand_p (res))
2538 continue;
2539
2540 if (simple_iv (loop, loop, res, &iv, true))
2541 continue;
2542
2543 gimple *reduc_stmt
2544 = vect_force_simple_reduction (simple_loop_info, phi, true,
2545 &double_reduc, true);
2546 if (!reduc_stmt)
2547 continue;
2548
2549 if (double_reduc)
2550 {
2551 if (loop->inner->inner != NULL)
2552 continue;
2553
2554 double_reduc_phis.safe_push (phi);
2555 double_reduc_stmts.safe_push (reduc_stmt);
2556 continue;
2557 }
2558
2559 build_new_reduction (reduction_list, reduc_stmt, phi);
2560 }
2561 destroy_loop_vec_info (simple_loop_info, true);
2562
2563 if (!double_reduc_phis.is_empty ())
2564 {
2565 simple_loop_info = vect_analyze_loop_form (loop->inner);
2566 if (simple_loop_info)
2567 {
2568 gphi *phi;
2569 unsigned int i;
2570
2571 FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
2572 {
2573 affine_iv iv;
2574 tree res = PHI_RESULT (phi);
2575 bool double_reduc;
2576
2577 use_operand_p use_p;
2578 gimple *inner_stmt;
2579 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
2580 gcc_assert (single_use_p);
2581 if (gimple_code (inner_stmt) != GIMPLE_PHI)
2582 continue;
2583 gphi *inner_phi = as_a <gphi *> (inner_stmt);
2584 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2585 &iv, true))
2586 continue;
2587
2588 gimple *inner_reduc_stmt
2589 = vect_force_simple_reduction (simple_loop_info, inner_phi,
2590 true, &double_reduc, true);
2591 gcc_assert (!double_reduc);
2592 if (inner_reduc_stmt == NULL)
2593 continue;
2594
2595 build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
2596 }
2597 destroy_loop_vec_info (simple_loop_info, true);
2598 }
2599 }
2600
2601 gather_done:
2602 /* Release the claim on gimple_uid. */
2603 free_stmt_vec_info_vec ();
2604
2605 if (reduction_list->elements () == 0)
2606 return;
2607
2608 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2609 and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only
2610 now. */
2611 basic_block bb;
2612 FOR_EACH_BB_FN (bb, cfun)
2613 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2614 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
2615 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
2616 }
2617
2618 /* Try to initialize NITER for code generation part. */
2619
2620 static bool
2621 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2622 {
2623 edge exit = single_dom_exit (loop);
2624
2625 gcc_assert (exit);
2626
2627 /* We need to know # of iterations, and there should be no uses of values
2628 defined inside loop outside of it, unless the values are invariants of
2629 the loop. */
2630 if (!number_of_iterations_exit (loop, exit, niter, false))
2631 {
2632 if (dump_file && (dump_flags & TDF_DETAILS))
2633 fprintf (dump_file, " FAILED: number of iterations not known\n");
2634 return false;
2635 }
2636
2637 return true;
2638 }
2639
2640 /* Return the default def of the first function argument. */
2641
2642 static tree
2643 get_omp_data_i_param (void)
2644 {
2645 tree decl = DECL_ARGUMENTS (cfun->decl);
2646 gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
2647 return ssa_default_def (cfun, decl);
2648 }
2649
2650 /* For PHI in loop header of LOOP, look for pattern:
2651
2652 <bb preheader>
2653 .omp_data_i = &.omp_data_arr;
2654 addr = .omp_data_i->sum;
2655 sum_a = *addr;
2656
2657 <bb header>:
2658 sum_b = PHI <sum_a (preheader), sum_c (latch)>
2659
2660 and return addr. Otherwise, return NULL_TREE. */
2661
2662 static tree
2663 find_reduc_addr (struct loop *loop, gphi *phi)
2664 {
2665 edge e = loop_preheader_edge (loop);
2666 tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
2667 gimple *stmt = SSA_NAME_DEF_STMT (arg);
2668 if (!gimple_assign_single_p (stmt))
2669 return NULL_TREE;
2670 tree memref = gimple_assign_rhs1 (stmt);
2671 if (TREE_CODE (memref) != MEM_REF)
2672 return NULL_TREE;
2673 tree addr = TREE_OPERAND (memref, 0);
2674
2675 gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
2676 if (!gimple_assign_single_p (stmt2))
2677 return NULL_TREE;
2678 tree compref = gimple_assign_rhs1 (stmt2);
2679 if (TREE_CODE (compref) != COMPONENT_REF)
2680 return NULL_TREE;
2681 tree addr2 = TREE_OPERAND (compref, 0);
2682 if (TREE_CODE (addr2) != MEM_REF)
2683 return NULL_TREE;
2684 addr2 = TREE_OPERAND (addr2, 0);
2685 if (TREE_CODE (addr2) != SSA_NAME
2686 || addr2 != get_omp_data_i_param ())
2687 return NULL_TREE;
2688
2689 return addr;
2690 }
2691
2692 /* Try to initialize REDUCTION_LIST for code generation part.
2693 REDUCTION_LIST describes the reductions. */
2694
2695 static bool
2696 try_create_reduction_list (loop_p loop,
2697 reduction_info_table_type *reduction_list,
2698 bool oacc_kernels_p)
2699 {
2700 edge exit = single_dom_exit (loop);
2701 gphi_iterator gsi;
2702
2703 gcc_assert (exit);
2704
2705 /* Try to get rid of exit phis. */
2706 final_value_replacement_loop (loop);
2707
2708 gather_scalar_reductions (loop, reduction_list);
2709
2710
2711 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2712 {
2713 gphi *phi = gsi.phi ();
2714 struct reduction_info *red;
2715 imm_use_iterator imm_iter;
2716 use_operand_p use_p;
2717 gimple *reduc_phi;
2718 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2719
2720 if (!virtual_operand_p (val))
2721 {
2722 if (dump_file && (dump_flags & TDF_DETAILS))
2723 {
2724 fprintf (dump_file, "phi is ");
2725 print_gimple_stmt (dump_file, phi, 0, 0);
2726 fprintf (dump_file, "arg of phi to exit: value ");
2727 print_generic_expr (dump_file, val, 0);
2728 fprintf (dump_file, " used outside loop\n");
2729 fprintf (dump_file,
2730 " checking if it is part of reduction pattern:\n");
2731 }
2732 if (reduction_list->elements () == 0)
2733 {
2734 if (dump_file && (dump_flags & TDF_DETAILS))
2735 fprintf (dump_file,
2736 " FAILED: it is not a part of reduction.\n");
2737 return false;
2738 }
2739 reduc_phi = NULL;
2740 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2741 {
2742 if (!gimple_debug_bind_p (USE_STMT (use_p))
2743 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
2744 {
2745 reduc_phi = USE_STMT (use_p);
2746 break;
2747 }
2748 }
2749 red = reduction_phi (reduction_list, reduc_phi);
2750 if (red == NULL)
2751 {
2752 if (dump_file && (dump_flags & TDF_DETAILS))
2753 fprintf (dump_file,
2754 " FAILED: it is not a part of reduction.\n");
2755 return false;
2756 }
2757 if (red->keep_res != NULL)
2758 {
2759 if (dump_file && (dump_flags & TDF_DETAILS))
2760 fprintf (dump_file,
2761 " FAILED: reduction has multiple exit phis.\n");
2762 return false;
2763 }
2764 red->keep_res = phi;
2765 if (dump_file && (dump_flags & TDF_DETAILS))
2766 {
2767 fprintf (dump_file, "reduction phi is ");
2768 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2769 fprintf (dump_file, "reduction stmt is ");
2770 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2771 }
2772 }
2773 }
2774
2775 /* The iterations of the loop may communicate only through bivs whose
2776 iteration space can be distributed efficiently. */
2777 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2778 {
2779 gphi *phi = gsi.phi ();
2780 tree def = PHI_RESULT (phi);
2781 affine_iv iv;
2782
2783 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
2784 {
2785 struct reduction_info *red;
2786
2787 red = reduction_phi (reduction_list, phi);
2788 if (red == NULL)
2789 {
2790 if (dump_file && (dump_flags & TDF_DETAILS))
2791 fprintf (dump_file,
2792 " FAILED: scalar dependency between iterations\n");
2793 return false;
2794 }
2795 }
2796 }
2797
2798 if (oacc_kernels_p)
2799 {
2800 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
2801 gsi_next (&gsi))
2802 {
2803 gphi *phi = gsi.phi ();
2804 tree def = PHI_RESULT (phi);
2805 affine_iv iv;
2806
2807 if (!virtual_operand_p (def)
2808 && !simple_iv (loop, loop, def, &iv, true))
2809 {
2810 tree addr = find_reduc_addr (loop, phi);
2811 if (addr == NULL_TREE)
2812 return false;
2813 struct reduction_info *red = reduction_phi (reduction_list, phi);
2814 red->reduc_addr = addr;
2815 }
2816 }
2817 }
2818
2819 return true;
2820 }
2821
2822 /* Return true if LOOP contains phis with ADDR_EXPR in args. */
2823
2824 static bool
2825 loop_has_phi_with_address_arg (struct loop *loop)
2826 {
2827 basic_block *bbs = get_loop_body (loop);
2828 bool res = false;
2829
2830 unsigned i, j;
2831 gphi_iterator gsi;
2832 for (i = 0; i < loop->num_nodes; i++)
2833 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2834 {
2835 gphi *phi = gsi.phi ();
2836 for (j = 0; j < gimple_phi_num_args (phi); j++)
2837 {
2838 tree arg = gimple_phi_arg_def (phi, j);
2839 if (TREE_CODE (arg) == ADDR_EXPR)
2840 {
2841 /* This should be handled by eliminate_local_variables, but that
2842 function currently ignores phis. */
2843 res = true;
2844 goto end;
2845 }
2846 }
2847 }
2848 end:
2849 free (bbs);
2850
2851 return res;
2852 }
2853
2854 /* Return true if memory ref REF (corresponding to the stmt at GSI in
2855 REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
2856 or the statements in REGIONS_BB[I + n]. REF_IS_STORE indicates if REF is a
2857 store. Ignore conflicts with SKIP_STMT. */
2858
2859 static bool
2860 ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
2861 bool ref_is_store, vec<basic_block> region_bbs,
2862 unsigned int i, gimple *skip_stmt)
2863 {
2864 basic_block bb = region_bbs[i];
2865 gsi_next (&gsi);
2866
2867 while (true)
2868 {
2869 for (; !gsi_end_p (gsi);
2870 gsi_next (&gsi))
2871 {
2872 gimple *stmt = gsi_stmt (gsi);
2873 if (stmt == skip_stmt)
2874 {
2875 if (dump_file)
2876 {
2877 fprintf (dump_file, "skipping reduction store: ");
2878 print_gimple_stmt (dump_file, stmt, 0, 0);
2879 }
2880 continue;
2881 }
2882
2883 if (!gimple_vdef (stmt)
2884 && !gimple_vuse (stmt))
2885 continue;
2886
2887 if (gimple_code (stmt) == GIMPLE_RETURN)
2888 continue;
2889
2890 if (ref_is_store)
2891 {
2892 if (ref_maybe_used_by_stmt_p (stmt, ref))
2893 {
2894 if (dump_file)
2895 {
2896 fprintf (dump_file, "Stmt ");
2897 print_gimple_stmt (dump_file, stmt, 0, 0);
2898 }
2899 return true;
2900 }
2901 }
2902 else
2903 {
2904 if (stmt_may_clobber_ref_p_1 (stmt, ref))
2905 {
2906 if (dump_file)
2907 {
2908 fprintf (dump_file, "Stmt ");
2909 print_gimple_stmt (dump_file, stmt, 0, 0);
2910 }
2911 return true;
2912 }
2913 }
2914 }
2915 i++;
2916 if (i == region_bbs.length ())
2917 break;
2918 bb = region_bbs[i];
2919 gsi = gsi_start_bb (bb);
2920 }
2921
2922 return false;
2923 }
2924
2925 /* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
2926 in parallel with REGION_BBS containing the loop. Return the stores of
2927 reduction results in REDUCTION_STORES. */
2928
2929 static bool
2930 oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
2931 reduction_info_table_type *reduction_list,
2932 bitmap reduction_stores)
2933 {
2934 tree omp_data_i = get_omp_data_i_param ();
2935
2936 unsigned i;
2937 basic_block bb;
2938 FOR_EACH_VEC_ELT (region_bbs, i, bb)
2939 {
2940 if (bitmap_bit_p (in_loop_bbs, bb->index))
2941 continue;
2942
2943 gimple_stmt_iterator gsi;
2944 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2945 gsi_next (&gsi))
2946 {
2947 gimple *stmt = gsi_stmt (gsi);
2948 gimple *skip_stmt = NULL;
2949
2950 if (is_gimple_debug (stmt)
2951 || gimple_code (stmt) == GIMPLE_COND)
2952 continue;
2953
2954 ao_ref ref;
2955 bool ref_is_store = false;
2956 if (gimple_assign_load_p (stmt))
2957 {
2958 tree rhs = gimple_assign_rhs1 (stmt);
2959 tree base = get_base_address (rhs);
2960 if (TREE_CODE (base) == MEM_REF
2961 && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
2962 continue;
2963
2964 tree lhs = gimple_assign_lhs (stmt);
2965 if (TREE_CODE (lhs) == SSA_NAME
2966 && has_single_use (lhs))
2967 {
2968 use_operand_p use_p;
2969 gimple *use_stmt;
2970 single_imm_use (lhs, &use_p, &use_stmt);
2971 if (gimple_code (use_stmt) == GIMPLE_PHI)
2972 {
2973 struct reduction_info *red;
2974 red = reduction_phi (reduction_list, use_stmt);
2975 tree val = PHI_RESULT (red->keep_res);
2976 if (has_single_use (val))
2977 {
2978 single_imm_use (val, &use_p, &use_stmt);
2979 if (gimple_store_p (use_stmt))
2980 {
2981 unsigned int id
2982 = SSA_NAME_VERSION (gimple_vdef (use_stmt));
2983 bitmap_set_bit (reduction_stores, id);
2984 skip_stmt = use_stmt;
2985 if (dump_file)
2986 {
2987 fprintf (dump_file, "found reduction load: ");
2988 print_gimple_stmt (dump_file, stmt, 0, 0);
2989 }
2990 }
2991 }
2992 }
2993 }
2994
2995 ao_ref_init (&ref, rhs);
2996 }
2997 else if (gimple_store_p (stmt))
2998 {
2999 ao_ref_init (&ref, gimple_assign_lhs (stmt));
3000 ref_is_store = true;
3001 }
3002 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3003 continue;
3004 else if (!gimple_has_side_effects (stmt)
3005 && !gimple_could_trap_p (stmt)
3006 && !stmt_could_throw_p (stmt)
3007 && !gimple_vdef (stmt)
3008 && !gimple_vuse (stmt))
3009 continue;
3010 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
3011 continue;
3012 else if (gimple_code (stmt) == GIMPLE_RETURN)
3013 continue;
3014 else
3015 {
3016 if (dump_file)
3017 {
3018 fprintf (dump_file, "Unhandled stmt in entry/exit: ");
3019 print_gimple_stmt (dump_file, stmt, 0, 0);
3020 }
3021 return false;
3022 }
3023
3024 if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3025 i, skip_stmt))
3026 {
3027 if (dump_file)
3028 {
3029 fprintf (dump_file, "conflicts with entry/exit stmt: ");
3030 print_gimple_stmt (dump_file, stmt, 0, 0);
3031 }
3032 return false;
3033 }
3034 }
3035 }
3036
3037 return true;
3038 }
3039
3040 /* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3041 gang_pos == 0, except when the stores are REDUCTION_STORES. Return true
3042 if any changes were made. */
3043
3044 static bool
3045 oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3046 bitmap reduction_stores)
3047 {
3048 tree gang_pos = NULL_TREE;
3049 bool changed = false;
3050
3051 unsigned i;
3052 basic_block bb;
3053 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3054 {
3055 if (bitmap_bit_p (in_loop_bbs, bb->index))
3056 continue;
3057
3058 gimple_stmt_iterator gsi;
3059 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3060 {
3061 gimple *stmt = gsi_stmt (gsi);
3062
3063 if (!gimple_store_p (stmt))
3064 {
3065 /* Update gsi to point to next stmt. */
3066 gsi_next (&gsi);
3067 continue;
3068 }
3069
3070 if (bitmap_bit_p (reduction_stores,
3071 SSA_NAME_VERSION (gimple_vdef (stmt))))
3072 {
3073 if (dump_file)
3074 {
3075 fprintf (dump_file,
3076 "skipped reduction store for single-gang"
3077 " neutering: ");
3078 print_gimple_stmt (dump_file, stmt, 0, 0);
3079 }
3080
3081 /* Update gsi to point to next stmt. */
3082 gsi_next (&gsi);
3083 continue;
3084 }
3085
3086 changed = true;
3087
3088 if (gang_pos == NULL_TREE)
3089 {
3090 tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3091 gcall *gang_single
3092 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3093 gang_pos = make_ssa_name (integer_type_node);
3094 gimple_call_set_lhs (gang_single, gang_pos);
3095 gimple_stmt_iterator start
3096 = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3097 tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3098 gimple_set_vuse (gang_single, vuse);
3099 gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3100 }
3101
3102 if (dump_file)
3103 {
3104 fprintf (dump_file,
3105 "found store that needs single-gang neutering: ");
3106 print_gimple_stmt (dump_file, stmt, 0, 0);
3107 }
3108
3109 {
3110 /* Split block before store. */
3111 gimple_stmt_iterator gsi2 = gsi;
3112 gsi_prev (&gsi2);
3113 edge e;
3114 if (gsi_end_p (gsi2))
3115 {
3116 e = split_block_after_labels (bb);
3117 gsi2 = gsi_last_bb (bb);
3118 }
3119 else
3120 e = split_block (bb, gsi_stmt (gsi2));
3121 basic_block bb2 = e->dest;
3122
3123 /* Split block after store. */
3124 gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3125 edge e2 = split_block (bb2, gsi_stmt (gsi3));
3126 basic_block bb3 = e2->dest;
3127
3128 gimple *cond
3129 = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3130 NULL_TREE, NULL_TREE);
3131 gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3132
3133 edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
3134 e->flags = EDGE_TRUE_VALUE;
3135
3136 tree vdef = gimple_vdef (stmt);
3137 tree vuse = gimple_vuse (stmt);
3138
3139 tree phi_res = copy_ssa_name (vdef);
3140 gphi *new_phi = create_phi_node (phi_res, bb3);
3141 replace_uses_by (vdef, phi_res);
3142 add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3143 add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3144
3145 /* Update gsi to point to next stmt. */
3146 bb = bb3;
3147 gsi = gsi_start_bb (bb);
3148 }
3149 }
3150 }
3151
3152 return changed;
3153 }
3154
3155 /* Return true if the statements before and after the LOOP can be executed in
3156 parallel with the function containing the loop. Resolve conflicting stores
3157 outside LOOP by guarding them such that only a single gang executes them. */
3158
3159 static bool
3160 oacc_entry_exit_ok (struct loop *loop,
3161 reduction_info_table_type *reduction_list)
3162 {
3163 basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3164 vec<basic_block> region_bbs
3165 = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3166
3167 bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3168 bitmap_clear (in_loop_bbs);
3169 for (unsigned int i = 0; i < loop->num_nodes; i++)
3170 bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3171
3172 bitmap reduction_stores = BITMAP_ALLOC (NULL);
3173 bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3174 reduction_stores);
3175
3176 if (res)
3177 {
3178 bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3179 reduction_stores);
3180 if (changed)
3181 {
3182 free_dominance_info (CDI_DOMINATORS);
3183 calculate_dominance_info (CDI_DOMINATORS);
3184 }
3185 }
3186
3187 region_bbs.release ();
3188 free (loop_bbs);
3189
3190 BITMAP_FREE (in_loop_bbs);
3191 BITMAP_FREE (reduction_stores);
3192
3193 return res;
3194 }
3195
3196 /* Detect parallel loops and generate parallel code using libgomp
3197 primitives. Returns true if some loop was parallelized, false
3198 otherwise. */
3199
3200 static bool
3201 parallelize_loops (bool oacc_kernels_p)
3202 {
3203 unsigned n_threads;
3204 bool changed = false;
3205 struct loop *loop;
3206 struct loop *skip_loop = NULL;
3207 struct tree_niter_desc niter_desc;
3208 struct obstack parloop_obstack;
3209 HOST_WIDE_INT estimated;
3210 source_location loop_loc;
3211
3212 /* Do not parallelize loops in the functions created by parallelization. */
3213 if (!oacc_kernels_p
3214 && parallelized_function_p (cfun->decl))
3215 return false;
3216
3217 /* Do not parallelize loops in offloaded functions. */
3218 if (!oacc_kernels_p
3219 && oacc_get_fn_attrib (cfun->decl) != NULL)
3220 return false;
3221
3222 if (cfun->has_nonlocal_label)
3223 return false;
3224
3225 /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
3226 the argument to -ftree-parallelize-loops. */
3227 if (oacc_kernels_p)
3228 n_threads = 0;
3229 else
3230 n_threads = flag_tree_parallelize_loops;
3231
3232 gcc_obstack_init (&parloop_obstack);
3233 reduction_info_table_type reduction_list (10);
3234
3235 calculate_dominance_info (CDI_DOMINATORS);
3236
3237 FOR_EACH_LOOP (loop, 0)
3238 {
3239 if (loop == skip_loop)
3240 {
3241 if (!loop->in_oacc_kernels_region
3242 && dump_file && (dump_flags & TDF_DETAILS))
3243 fprintf (dump_file,
3244 "Skipping loop %d as inner loop of parallelized loop\n",
3245 loop->num);
3246
3247 skip_loop = loop->inner;
3248 continue;
3249 }
3250 else
3251 skip_loop = NULL;
3252
3253 reduction_list.empty ();
3254
3255 if (oacc_kernels_p)
3256 {
3257 if (!loop->in_oacc_kernels_region)
3258 continue;
3259
3260 /* Don't try to parallelize inner loops in an oacc kernels region. */
3261 if (loop->inner)
3262 skip_loop = loop->inner;
3263
3264 if (dump_file && (dump_flags & TDF_DETAILS))
3265 fprintf (dump_file,
3266 "Trying loop %d with header bb %d in oacc kernels"
3267 " region\n", loop->num, loop->header->index);
3268 }
3269
3270 if (dump_file && (dump_flags & TDF_DETAILS))
3271 {
3272 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
3273 if (loop->inner)
3274 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
3275 else
3276 fprintf (dump_file, "loop %d is innermost\n",loop->num);
3277 }
3278
3279 /* If we use autopar in graphite pass, we use its marked dependency
3280 checking results. */
3281 if (flag_loop_parallelize_all && !loop->can_be_parallel)
3282 {
3283 if (dump_file && (dump_flags & TDF_DETAILS))
3284 fprintf (dump_file, "loop is not parallel according to graphite\n");
3285 continue;
3286 }
3287
3288 if (!single_dom_exit (loop))
3289 {
3290
3291 if (dump_file && (dump_flags & TDF_DETAILS))
3292 fprintf (dump_file, "loop is !single_dom_exit\n");
3293
3294 continue;
3295 }
3296
3297 if (/* And of course, the loop must be parallelizable. */
3298 !can_duplicate_loop_p (loop)
3299 || loop_has_blocks_with_irreducible_flag (loop)
3300 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
3301 /* FIXME: the check for vector phi nodes could be removed. */
3302 || loop_has_vector_phi_nodes (loop))
3303 continue;
3304
3305 estimated = estimated_stmt_executions_int (loop);
3306 if (estimated == -1)
3307 estimated = likely_max_stmt_executions_int (loop);
3308 /* FIXME: Bypass this check as graphite doesn't update the
3309 count and frequency correctly now. */
3310 if (!flag_loop_parallelize_all
3311 && !oacc_kernels_p
3312 && ((estimated != -1
3313 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
3314 /* Do not bother with loops in cold areas. */
3315 || optimize_loop_nest_for_size_p (loop)))
3316 continue;
3317
3318 if (!try_get_loop_niter (loop, &niter_desc))
3319 continue;
3320
3321 if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
3322 continue;
3323
3324 if (loop_has_phi_with_address_arg (loop))
3325 continue;
3326
3327 if (!flag_loop_parallelize_all
3328 && !loop_parallel_p (loop, &parloop_obstack))
3329 continue;
3330
3331 if (oacc_kernels_p
3332 && !oacc_entry_exit_ok (loop, &reduction_list))
3333 {
3334 if (dump_file)
3335 fprintf (dump_file, "entry/exit not ok: FAILED\n");
3336 continue;
3337 }
3338
3339 changed = true;
3340 skip_loop = loop->inner;
3341
3342 loop_loc = find_loop_location (loop);
3343 if (loop->inner)
3344 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
3345 "parallelizing outer loop %d\n", loop->num);
3346 else
3347 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
3348 "parallelizing inner loop %d\n", loop->num);
3349
3350 gen_parallel_loop (loop, &reduction_list,
3351 n_threads, &niter_desc, oacc_kernels_p);
3352 }
3353
3354 obstack_free (&parloop_obstack, NULL);
3355
3356 /* Parallelization will cause new function calls to be inserted through
3357 which local variables will escape. Reset the points-to solution
3358 for ESCAPED. */
3359 if (changed)
3360 pt_solution_reset (&cfun->gimple_df->escaped);
3361
3362 return changed;
3363 }
3364
3365 /* Parallelization. */
3366
3367 namespace {
3368
3369 const pass_data pass_data_parallelize_loops =
3370 {
3371 GIMPLE_PASS, /* type */
3372 "parloops", /* name */
3373 OPTGROUP_LOOP, /* optinfo_flags */
3374 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
3375 ( PROP_cfg | PROP_ssa ), /* properties_required */
3376 0, /* properties_provided */
3377 0, /* properties_destroyed */
3378 0, /* todo_flags_start */
3379 0, /* todo_flags_finish */
3380 };
3381
3382 class pass_parallelize_loops : public gimple_opt_pass
3383 {
3384 public:
3385 pass_parallelize_loops (gcc::context *ctxt)
3386 : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
3387 oacc_kernels_p (false)
3388 {}
3389
3390 /* opt_pass methods: */
3391 virtual bool gate (function *)
3392 {
3393 if (oacc_kernels_p)
3394 return flag_openacc;
3395 else
3396 return flag_tree_parallelize_loops > 1;
3397 }
3398 virtual unsigned int execute (function *);
3399 opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
3400 void set_pass_param (unsigned int n, bool param)
3401 {
3402 gcc_assert (n == 0);
3403 oacc_kernels_p = param;
3404 }
3405
3406 private:
3407 bool oacc_kernels_p;
3408 }; // class pass_parallelize_loops
3409
3410 unsigned
3411 pass_parallelize_loops::execute (function *fun)
3412 {
3413 tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3414 if (nthreads == NULL_TREE)
3415 return 0;
3416
3417 bool in_loop_pipeline = scev_initialized_p ();
3418 if (!in_loop_pipeline)
3419 loop_optimizer_init (LOOPS_NORMAL
3420 | LOOPS_HAVE_RECORDED_EXITS);
3421
3422 if (number_of_loops (fun) <= 1)
3423 return 0;
3424
3425 if (!in_loop_pipeline)
3426 {
3427 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3428 scev_initialize ();
3429 }
3430
3431 unsigned int todo = 0;
3432 if (parallelize_loops (oacc_kernels_p))
3433 {
3434 fun->curr_properties &= ~(PROP_gimple_eomp);
3435
3436 checking_verify_loop_structure ();
3437
3438 todo |= TODO_update_ssa;
3439 }
3440
3441 if (!in_loop_pipeline)
3442 {
3443 scev_finalize ();
3444 loop_optimizer_finalize ();
3445 }
3446
3447 return todo;
3448 }
3449
3450 } // anon namespace
3451
3452 gimple_opt_pass *
3453 make_pass_parallelize_loops (gcc::context *ctxt)
3454 {
3455 return new pass_parallelize_loops (ctxt);
3456 }