]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-parloops.c
ssa-loop-manip.c: Include langhooks.h.
[thirdparty/gcc.git] / gcc / tree-parloops.c
1 /* Loop autoparallelization.
2 Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
4 Zdenek Dvorak <dvorakz@suse.cz>.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "tree.h"
27 #include "rtl.h"
28 #include "tree-flow.h"
29 #include "cfgloop.h"
30 #include "ggc.h"
31 #include "tree-data-ref.h"
32 #include "diagnostic.h"
33 #include "tree-pass.h"
34 #include "tree-scalar-evolution.h"
35 #include "hashtab.h"
36 #include "langhooks.h"
37 #include "tree-vectorizer.h"
38
39 /* This pass tries to distribute iterations of loops into several threads.
40 The implementation is straightforward -- for each loop we test whether its
41 iterations are independent, and if it is the case (and some additional
42 conditions regarding profitability and correctness are satisfied), we
43 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
44 machinery do its job.
45
46 The most of the complexity is in bringing the code into shape expected
47 by the omp expanders:
48 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
49 variable and that the exit test is at the start of the loop body
50 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
51 variables by accesses through pointers, and breaking up ssa chains
52 by storing the values incoming to the parallelized loop to a structure
53 passed to the new function as an argument (something similar is done
54 in omp gimplification, unfortunately only a small part of the code
55 can be shared).
56
57 TODO:
58 -- if there are several parallelizable loops in a function, it may be
59 possible to generate the threads just once (using synchronization to
60 ensure that cross-loop dependences are obeyed).
61 -- handling of common scalar dependence patterns (accumulation, ...)
62 -- handling of non-innermost loops */
63
64 /*
65 Reduction handling:
66 currently we use vect_is_simple_reduction() to detect reduction patterns.
67 The code transformation will be introduced by an example.
68
69
70 parloop
71 {
72 int sum=1;
73
74 for (i = 0; i < N; i++)
75 {
76 x[i] = i + 3;
77 sum+=x[i];
78 }
79 }
80
81 gimple-like code:
82 header_bb:
83
84 # sum_29 = PHI <sum_11(5), 1(3)>
85 # i_28 = PHI <i_12(5), 0(3)>
86 D.1795_8 = i_28 + 3;
87 x[i_28] = D.1795_8;
88 sum_11 = D.1795_8 + sum_29;
89 i_12 = i_28 + 1;
90 if (N_6(D) > i_12)
91 goto header_bb;
92
93
94 exit_bb:
95
96 # sum_21 = PHI <sum_11(4)>
97 printf (&"%d"[0], sum_21);
98
99
100 after reduction transformation (only relevant parts):
101
102 parloop
103 {
104
105 ....
106
107
108 # Storing the initial value given by the user. #
109
110 .paral_data_store.32.sum.27 = 1;
111
112 #pragma omp parallel num_threads(4)
113
114 #pragma omp for schedule(static)
115
116 # The neutral element corresponding to the particular
117 reduction's operation, e.g. 0 for PLUS_EXPR,
118 1 for MULT_EXPR, etc. replaces the user's initial value. #
119
120 # sum.27_29 = PHI <sum.27_11, 0>
121
122 sum.27_11 = D.1827_8 + sum.27_29;
123
124 GIMPLE_OMP_CONTINUE
125
126 # Adding this reduction phi is done at create_phi_for_local_result() #
127 # sum.27_56 = PHI <sum.27_11, 0>
128 GIMPLE_OMP_RETURN
129
130 # Creating the atomic operation is done at
131 create_call_for_reduction_1() #
132
133 #pragma omp atomic_load
134 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
135 D.1840_60 = sum.27_56 + D.1839_59;
136 #pragma omp atomic_store (D.1840_60);
137
138 GIMPLE_OMP_RETURN
139
140 # collecting the result after the join of the threads is done at
141 create_loads_for_reductions().
142 The value computed by the threads is loaded from the
143 shared struct. #
144
145
146 .paral_data_load.33_52 = &.paral_data_store.32;
147 sum_37 = .paral_data_load.33_52->sum.27;
148 sum_43 = D.1795_41 + sum_37;
149
150 exit bb:
151 # sum_21 = PHI <sum_43, sum_26>
152 printf (&"%d"[0], sum_21);
153
154 ...
155
156 }
157
158 */
159
160 /* Minimal number of iterations of a loop that should be executed in each
161 thread. */
162 #define MIN_PER_THREAD 100
163
164 /* Element of the hashtable, representing a
165 reduction in the current loop. */
166 struct reduction_info
167 {
168 gimple reduc_stmt; /* reduction statement. */
169 gimple reduc_phi; /* The phi node defining the reduction. */
170 enum tree_code reduction_code;/* code for the reduction operation. */
171 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
172 of the reduction variable when existing the loop. */
173 tree initial_value; /* The initial value of the reduction var before entering the loop. */
174 tree field; /* the name of the field in the parloop data structure intended for reduction. */
175 tree init; /* reduction initialization value. */
176 gimple new_phi; /* (helper field) Newly created phi node whose result
177 will be passed to the atomic operation. Represents
178 the local result each thread computed for the reduction
179 operation. */
180 };
181
182 /* Equality and hash functions for hashtab code. */
183
184 static int
185 reduction_info_eq (const void *aa, const void *bb)
186 {
187 const struct reduction_info *a = (const struct reduction_info *) aa;
188 const struct reduction_info *b = (const struct reduction_info *) bb;
189
190 return (a->reduc_phi == b->reduc_phi);
191 }
192
193 static hashval_t
194 reduction_info_hash (const void *aa)
195 {
196 const struct reduction_info *a = (const struct reduction_info *) aa;
197
198 return htab_hash_pointer (a->reduc_phi);
199 }
200
201 static struct reduction_info *
202 reduction_phi (htab_t reduction_list, gimple phi)
203 {
204 struct reduction_info tmpred, *red;
205
206 if (htab_elements (reduction_list) == 0)
207 return NULL;
208
209 tmpred.reduc_phi = phi;
210 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
211
212 return red;
213 }
214
215 /* Element of hashtable of names to copy. */
216
217 struct name_to_copy_elt
218 {
219 unsigned version; /* The version of the name to copy. */
220 tree new_name; /* The new name used in the copy. */
221 tree field; /* The field of the structure used to pass the
222 value. */
223 };
224
225 /* Equality and hash functions for hashtab code. */
226
227 static int
228 name_to_copy_elt_eq (const void *aa, const void *bb)
229 {
230 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
231 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
232
233 return a->version == b->version;
234 }
235
236 static hashval_t
237 name_to_copy_elt_hash (const void *aa)
238 {
239 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
240
241 return (hashval_t) a->version;
242 }
243
244
245 /* Data dependency analysis. Returns true if the iterations of LOOP
246 are independent on each other (that is, if we can execute them
247 in parallel). */
248
249 static bool
250 loop_parallel_p (struct loop *loop)
251 {
252 VEC (ddr_p, heap) * dependence_relations;
253 VEC (data_reference_p, heap) *datarefs;
254 lambda_trans_matrix trans;
255 bool ret = false;
256
257 if (dump_file && (dump_flags & TDF_DETAILS))
258 fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
259
260 /* Check for problems with dependences. If the loop can be reversed,
261 the iterations are independent. */
262 datarefs = VEC_alloc (data_reference_p, heap, 10);
263 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
264 compute_data_dependences_for_loop (loop, true, &datarefs,
265 &dependence_relations);
266 if (dump_file && (dump_flags & TDF_DETAILS))
267 dump_data_dependence_relations (dump_file, dependence_relations);
268
269 trans = lambda_trans_matrix_new (1, 1);
270 LTM_MATRIX (trans)[0][0] = -1;
271
272 if (lambda_transform_legal_p (trans, 1, dependence_relations))
273 {
274 ret = true;
275 if (dump_file && (dump_flags & TDF_DETAILS))
276 fprintf (dump_file, " SUCCESS: may be parallelized\n");
277 }
278 else if (dump_file && (dump_flags & TDF_DETAILS))
279 fprintf (dump_file,
280 " FAILED: data dependencies exist across iterations\n");
281
282 free_dependence_relations (dependence_relations);
283 free_data_refs (datarefs);
284
285 return ret;
286 }
287
288 /* Return true when LOOP contains basic blocks marked with the
289 BB_IRREDUCIBLE_LOOP flag. */
290
291 static inline bool
292 loop_has_blocks_with_irreducible_flag (struct loop *loop)
293 {
294 unsigned i;
295 basic_block *bbs = get_loop_body_in_dom_order (loop);
296 bool res = true;
297
298 for (i = 0; i < loop->num_nodes; i++)
299 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
300 goto end;
301
302 res = false;
303 end:
304 free (bbs);
305 return res;
306 }
307
308 /* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
309 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
310 to their addresses that can be reused. The address of OBJ is known to
311 be invariant in the whole function. */
312
313 static tree
314 take_address_of (tree obj, tree type, edge entry, htab_t decl_address)
315 {
316 int uid;
317 void **dslot;
318 struct int_tree_map ielt, *nielt;
319 tree *var_p, name, bvar, addr;
320 gimple stmt;
321 gimple_seq stmts;
322
323 /* Since the address of OBJ is invariant, the trees may be shared.
324 Avoid rewriting unrelated parts of the code. */
325 obj = unshare_expr (obj);
326 for (var_p = &obj;
327 handled_component_p (*var_p);
328 var_p = &TREE_OPERAND (*var_p, 0))
329 continue;
330 uid = DECL_UID (*var_p);
331
332 ielt.uid = uid;
333 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
334 if (!*dslot)
335 {
336 addr = build_addr (*var_p, current_function_decl);
337 bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
338 add_referenced_var (bvar);
339 stmt = gimple_build_assign (bvar, addr);
340 name = make_ssa_name (bvar, stmt);
341 gimple_assign_set_lhs (stmt, name);
342 gsi_insert_on_edge_immediate (entry, stmt);
343
344 nielt = XNEW (struct int_tree_map);
345 nielt->uid = uid;
346 nielt->to = name;
347 *dslot = nielt;
348 }
349 else
350 name = ((struct int_tree_map *) *dslot)->to;
351
352 if (var_p != &obj)
353 {
354 *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
355 name = force_gimple_operand (build_addr (obj, current_function_decl),
356 &stmts, true, NULL_TREE);
357 if (!gimple_seq_empty_p (stmts))
358 gsi_insert_seq_on_edge_immediate (entry, stmts);
359 }
360
361 if (TREE_TYPE (name) != type)
362 {
363 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
364 NULL_TREE);
365 if (!gimple_seq_empty_p (stmts))
366 gsi_insert_seq_on_edge_immediate (entry, stmts);
367 }
368
369 return name;
370 }
371
372 /* Callback for htab_traverse. Create the initialization statement
373 for reduction described in SLOT, and place it at the preheader of
374 the loop described in DATA. */
375
376 static int
377 initialize_reductions (void **slot, void *data)
378 {
379 tree init, c;
380 tree bvar, type, arg;
381 edge e;
382
383 struct reduction_info *const reduc = (struct reduction_info *) *slot;
384 struct loop *loop = (struct loop *) data;
385
386 /* Create initialization in preheader:
387 reduction_variable = initialization value of reduction. */
388
389 /* In the phi node at the header, replace the argument coming
390 from the preheader with the reduction initialization value. */
391
392 /* Create a new variable to initialize the reduction. */
393 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
394 bvar = create_tmp_var (type, "reduction");
395 add_referenced_var (bvar);
396
397 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
398 OMP_CLAUSE_REDUCTION);
399 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
400 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
401
402 init = omp_reduction_init (c, TREE_TYPE (bvar));
403 reduc->init = init;
404
405 /* Replace the argument representing the initialization value
406 with the initialization value for the reduction (neutral
407 element for the particular operation, e.g. 0 for PLUS_EXPR,
408 1 for MULT_EXPR, etc).
409 Keep the old value in a new variable "reduction_initial",
410 that will be taken in consideration after the parallel
411 computing is done. */
412
413 e = loop_preheader_edge (loop);
414 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
415 /* Create new variable to hold the initial value. */
416
417 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
418 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
419 reduc->initial_value = arg;
420 return 1;
421 }
422
423 struct elv_data
424 {
425 struct walk_stmt_info info;
426 edge entry;
427 htab_t decl_address;
428 bool changed;
429 };
430
431 /* Eliminates references to local variables in *TP out of the single
432 entry single exit region starting at DTA->ENTRY.
433 DECL_ADDRESS contains addresses of the references that had their
434 address taken already. If the expression is changed, CHANGED is
435 set to true. Callback for walk_tree. */
436
437 static tree
438 eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
439 {
440 struct elv_data *const dta = (struct elv_data *) data;
441 tree t = *tp, var, addr, addr_type, type, obj;
442
443 if (DECL_P (t))
444 {
445 *walk_subtrees = 0;
446
447 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
448 return NULL_TREE;
449
450 type = TREE_TYPE (t);
451 addr_type = build_pointer_type (type);
452 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address);
453 *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
454
455 dta->changed = true;
456 return NULL_TREE;
457 }
458
459 if (TREE_CODE (t) == ADDR_EXPR)
460 {
461 /* ADDR_EXPR may appear in two contexts:
462 -- as a gimple operand, when the address taken is a function invariant
463 -- as gimple rhs, when the resulting address in not a function
464 invariant
465 We do not need to do anything special in the latter case (the base of
466 the memory reference whose address is taken may be replaced in the
467 DECL_P case). The former case is more complicated, as we need to
468 ensure that the new address is still a gimple operand. Thus, it
469 is not sufficient to replace just the base of the memory reference --
470 we need to move the whole computation of the address out of the
471 loop. */
472 if (!is_gimple_val (t))
473 return NULL_TREE;
474
475 *walk_subtrees = 0;
476 obj = TREE_OPERAND (t, 0);
477 var = get_base_address (obj);
478 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
479 return NULL_TREE;
480
481 addr_type = TREE_TYPE (t);
482 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address);
483 *tp = addr;
484
485 dta->changed = true;
486 return NULL_TREE;
487 }
488
489 if (!EXPR_P (t))
490 *walk_subtrees = 0;
491
492 return NULL_TREE;
493 }
494
495 /* Moves the references to local variables in STMT out of the single
496 entry single exit region starting at ENTRY. DECL_ADDRESS contains
497 addresses of the references that had their address taken
498 already. */
499
500 static void
501 eliminate_local_variables_stmt (edge entry, gimple stmt,
502 htab_t decl_address)
503 {
504 struct elv_data dta;
505
506 memset (&dta.info, '\0', sizeof (dta.info));
507 dta.entry = entry;
508 dta.decl_address = decl_address;
509 dta.changed = false;
510
511 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
512
513 if (dta.changed)
514 update_stmt (stmt);
515 }
516
517 /* Eliminates the references to local variables from the single entry
518 single exit region between the ENTRY and EXIT edges.
519
520 This includes:
521 1) Taking address of a local variable -- these are moved out of the
522 region (and temporary variable is created to hold the address if
523 necessary).
524
525 2) Dereferencing a local variable -- these are replaced with indirect
526 references. */
527
528 static void
529 eliminate_local_variables (edge entry, edge exit)
530 {
531 basic_block bb;
532 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
533 unsigned i;
534 gimple_stmt_iterator gsi;
535 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
536 free);
537 basic_block entry_bb = entry->src;
538 basic_block exit_bb = exit->dest;
539
540 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
541
542 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
543 if (bb != entry_bb && bb != exit_bb)
544 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
545 eliminate_local_variables_stmt (entry, gsi_stmt (gsi),
546 decl_address);
547
548 htab_delete (decl_address);
549 VEC_free (basic_block, heap, body);
550 }
551
552 /* Returns true if expression EXPR is not defined between ENTRY and
553 EXIT, i.e. if all its operands are defined outside of the region. */
554
555 static bool
556 expr_invariant_in_region_p (edge entry, edge exit, tree expr)
557 {
558 basic_block entry_bb = entry->src;
559 basic_block exit_bb = exit->dest;
560 basic_block def_bb;
561
562 if (is_gimple_min_invariant (expr))
563 return true;
564
565 if (TREE_CODE (expr) == SSA_NAME)
566 {
567 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
568 if (def_bb
569 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
570 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
571 return false;
572
573 return true;
574 }
575
576 return false;
577 }
578
579 /* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
580 The copies are stored to NAME_COPIES, if NAME was already duplicated,
581 its duplicate stored in NAME_COPIES is returned.
582
583 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
584 duplicated, storing the copies in DECL_COPIES. */
585
586 static tree
587 separate_decls_in_region_name (tree name,
588 htab_t name_copies, htab_t decl_copies,
589 bool copy_name_p)
590 {
591 tree copy, var, var_copy;
592 unsigned idx, uid, nuid;
593 struct int_tree_map ielt, *nielt;
594 struct name_to_copy_elt elt, *nelt;
595 void **slot, **dslot;
596
597 if (TREE_CODE (name) != SSA_NAME)
598 return name;
599
600 idx = SSA_NAME_VERSION (name);
601 elt.version = idx;
602 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
603 copy_name_p ? INSERT : NO_INSERT);
604 if (slot && *slot)
605 return ((struct name_to_copy_elt *) *slot)->new_name;
606
607 var = SSA_NAME_VAR (name);
608 uid = DECL_UID (var);
609 ielt.uid = uid;
610 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
611 if (!*dslot)
612 {
613 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
614 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
615 add_referenced_var (var_copy);
616 nielt = XNEW (struct int_tree_map);
617 nielt->uid = uid;
618 nielt->to = var_copy;
619 *dslot = nielt;
620
621 /* Ensure that when we meet this decl next time, we won't duplicate
622 it again. */
623 nuid = DECL_UID (var_copy);
624 ielt.uid = nuid;
625 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
626 gcc_assert (!*dslot);
627 nielt = XNEW (struct int_tree_map);
628 nielt->uid = nuid;
629 nielt->to = var_copy;
630 *dslot = nielt;
631 }
632 else
633 var_copy = ((struct int_tree_map *) *dslot)->to;
634
635 if (copy_name_p)
636 {
637 copy = duplicate_ssa_name (name, NULL);
638 nelt = XNEW (struct name_to_copy_elt);
639 nelt->version = idx;
640 nelt->new_name = copy;
641 nelt->field = NULL_TREE;
642 *slot = nelt;
643 }
644 else
645 {
646 gcc_assert (!slot);
647 copy = name;
648 }
649
650 SSA_NAME_VAR (copy) = var_copy;
651 return copy;
652 }
653
654 /* Finds the ssa names used in STMT that are defined outside the
655 region between ENTRY and EXIT and replaces such ssa names with
656 their duplicates. The duplicates are stored to NAME_COPIES. Base
657 decls of all ssa names used in STMT (including those defined in
658 LOOP) are replaced with the new temporary variables; the
659 replacement decls are stored in DECL_COPIES. */
660
661 static void
662 separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
663 htab_t name_copies, htab_t decl_copies)
664 {
665 use_operand_p use;
666 def_operand_p def;
667 ssa_op_iter oi;
668 tree name, copy;
669 bool copy_name_p;
670
671 mark_virtual_ops_for_renaming (stmt);
672
673 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
674 {
675 name = DEF_FROM_PTR (def);
676 gcc_assert (TREE_CODE (name) == SSA_NAME);
677 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
678 false);
679 gcc_assert (copy == name);
680 }
681
682 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
683 {
684 name = USE_FROM_PTR (use);
685 if (TREE_CODE (name) != SSA_NAME)
686 continue;
687
688 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
689 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
690 copy_name_p);
691 SET_USE (use, copy);
692 }
693 }
694
695 /* Callback for htab_traverse. Adds a field corresponding to the reduction
696 specified in SLOT. The type is passed in DATA. */
697
698 static int
699 add_field_for_reduction (void **slot, void *data)
700 {
701
702 struct reduction_info *const red = (struct reduction_info *) *slot;
703 tree const type = (tree) data;
704 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
705 tree field = build_decl (gimple_location (red->reduc_stmt),
706 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
707
708 insert_field_into_struct (type, field);
709
710 red->field = field;
711
712 return 1;
713 }
714
715 /* Callback for htab_traverse. Adds a field corresponding to a ssa name
716 described in SLOT. The type is passed in DATA. */
717
718 static int
719 add_field_for_name (void **slot, void *data)
720 {
721 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
722 tree type = (tree) data;
723 tree name = ssa_name (elt->version);
724 tree var = SSA_NAME_VAR (name);
725 tree field = build_decl (DECL_SOURCE_LOCATION (var),
726 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
727
728 insert_field_into_struct (type, field);
729 elt->field = field;
730
731 return 1;
732 }
733
734 /* Callback for htab_traverse. A local result is the intermediate result
735 computed by a single
736 thread, or the initial value in case no iteration was executed.
737 This function creates a phi node reflecting these values.
738 The phi's result will be stored in NEW_PHI field of the
739 reduction's data structure. */
740
741 static int
742 create_phi_for_local_result (void **slot, void *data)
743 {
744 struct reduction_info *const reduc = (struct reduction_info *) *slot;
745 const struct loop *const loop = (const struct loop *) data;
746 edge e;
747 gimple new_phi;
748 basic_block store_bb;
749 tree local_res;
750
751 /* STORE_BB is the block where the phi
752 should be stored. It is the destination of the loop exit.
753 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
754 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
755
756 /* STORE_BB has two predecessors. One coming from the loop
757 (the reduction's result is computed at the loop),
758 and another coming from a block preceding the loop,
759 when no iterations
760 are executed (the initial value should be taken). */
761 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
762 e = EDGE_PRED (store_bb, 1);
763 else
764 e = EDGE_PRED (store_bb, 0);
765 local_res
766 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
767 NULL);
768 new_phi = create_phi_node (local_res, store_bb);
769 SSA_NAME_DEF_STMT (local_res) = new_phi;
770 add_phi_arg (new_phi, reduc->init, e);
771 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
772 FALLTHRU_EDGE (loop->latch));
773 reduc->new_phi = new_phi;
774
775 return 1;
776 }
777
778 struct clsn_data
779 {
780 tree store;
781 tree load;
782
783 basic_block store_bb;
784 basic_block load_bb;
785 };
786
787 /* Callback for htab_traverse. Create an atomic instruction for the
788 reduction described in SLOT.
789 DATA annotates the place in memory the atomic operation relates to,
790 and the basic block it needs to be generated in. */
791
792 static int
793 create_call_for_reduction_1 (void **slot, void *data)
794 {
795 struct reduction_info *const reduc = (struct reduction_info *) *slot;
796 struct clsn_data *const clsn_data = (struct clsn_data *) data;
797 gimple_stmt_iterator gsi;
798 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
799 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
800 tree load_struct;
801 basic_block bb;
802 basic_block new_bb;
803 edge e;
804 tree t, addr, addr_type, ref, x;
805 tree tmp_load, name;
806 gimple load;
807
808 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
809 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
810 addr_type = build_pointer_type (type);
811
812 addr = build_addr (t, current_function_decl);
813
814 /* Create phi node. */
815 bb = clsn_data->load_bb;
816
817 e = split_block (bb, t);
818 new_bb = e->dest;
819
820 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
821 add_referenced_var (tmp_load);
822 tmp_load = make_ssa_name (tmp_load, NULL);
823 load = gimple_build_omp_atomic_load (tmp_load, addr);
824 SSA_NAME_DEF_STMT (tmp_load) = load;
825 gsi = gsi_start_bb (new_bb);
826 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
827
828 e = split_block (new_bb, load);
829 new_bb = e->dest;
830 gsi = gsi_start_bb (new_bb);
831 ref = tmp_load;
832 x = fold_build2 (reduc->reduction_code,
833 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
834 PHI_RESULT (reduc->new_phi));
835
836 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
837 GSI_CONTINUE_LINKING);
838
839 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
840 return 1;
841 }
842
843 /* Create the atomic operation at the join point of the threads.
844 REDUCTION_LIST describes the reductions in the LOOP.
845 LD_ST_DATA describes the shared data structure where
846 shared data is stored in and loaded from. */
847 static void
848 create_call_for_reduction (struct loop *loop, htab_t reduction_list,
849 struct clsn_data *ld_st_data)
850 {
851 htab_traverse (reduction_list, create_phi_for_local_result, loop);
852 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
853 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
854 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
855 }
856
857 /* Callback for htab_traverse. Loads the final reduction value at the
858 join point of all threads, and inserts it in the right place. */
859
860 static int
861 create_loads_for_reductions (void **slot, void *data)
862 {
863 struct reduction_info *const red = (struct reduction_info *) *slot;
864 struct clsn_data *const clsn_data = (struct clsn_data *) data;
865 gimple stmt;
866 gimple_stmt_iterator gsi;
867 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
868 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
869 tree load_struct;
870 tree name;
871 tree x;
872
873 gsi = gsi_after_labels (clsn_data->load_bb);
874 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
875 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
876 NULL_TREE);
877
878 x = load_struct;
879 name = PHI_RESULT (red->keep_res);
880 stmt = gimple_build_assign (name, x);
881 SSA_NAME_DEF_STMT (name) = stmt;
882
883 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
884
885 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
886 !gsi_end_p (gsi); gsi_next (&gsi))
887 if (gsi_stmt (gsi) == red->keep_res)
888 {
889 remove_phi_node (&gsi, false);
890 return 1;
891 }
892 gcc_unreachable ();
893 }
894
895 /* Load the reduction result that was stored in LD_ST_DATA.
896 REDUCTION_LIST describes the list of reductions that the
897 loads should be generated for. */
898 static void
899 create_final_loads_for_reduction (htab_t reduction_list,
900 struct clsn_data *ld_st_data)
901 {
902 gimple_stmt_iterator gsi;
903 tree t;
904 gimple stmt;
905
906 gsi = gsi_after_labels (ld_st_data->load_bb);
907 t = build_fold_addr_expr (ld_st_data->store);
908 stmt = gimple_build_assign (ld_st_data->load, t);
909
910 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
911 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
912
913 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
914
915 }
916
917 /* Callback for htab_traverse. Store the neutral value for the
918 particular reduction's operation, e.g. 0 for PLUS_EXPR,
919 1 for MULT_EXPR, etc. into the reduction field.
920 The reduction is specified in SLOT. The store information is
921 passed in DATA. */
922
923 static int
924 create_stores_for_reduction (void **slot, void *data)
925 {
926 struct reduction_info *const red = (struct reduction_info *) *slot;
927 struct clsn_data *const clsn_data = (struct clsn_data *) data;
928 tree t;
929 gimple stmt;
930 gimple_stmt_iterator gsi;
931 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
932
933 gsi = gsi_last_bb (clsn_data->store_bb);
934 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
935 stmt = gimple_build_assign (t, red->initial_value);
936 mark_virtual_ops_for_renaming (stmt);
937 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
938
939 return 1;
940 }
941
942 /* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
943 store to a field of STORE in STORE_BB for the ssa name and its duplicate
944 specified in SLOT. */
945
946 static int
947 create_loads_and_stores_for_name (void **slot, void *data)
948 {
949 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
950 struct clsn_data *const clsn_data = (struct clsn_data *) data;
951 tree t;
952 gimple stmt;
953 gimple_stmt_iterator gsi;
954 tree type = TREE_TYPE (elt->new_name);
955 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
956 tree load_struct;
957
958 gsi = gsi_last_bb (clsn_data->store_bb);
959 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
960 stmt = gimple_build_assign (t, ssa_name (elt->version));
961 mark_virtual_ops_for_renaming (stmt);
962 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
963
964 gsi = gsi_last_bb (clsn_data->load_bb);
965 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
966 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
967 stmt = gimple_build_assign (elt->new_name, t);
968 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
969 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
970
971 return 1;
972 }
973
974 /* Moves all the variables used in LOOP and defined outside of it (including
975 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
976 name) to a structure created for this purpose. The code
977
978 while (1)
979 {
980 use (a);
981 use (b);
982 }
983
984 is transformed this way:
985
986 bb0:
987 old.a = a;
988 old.b = b;
989
990 bb1:
991 a' = new->a;
992 b' = new->b;
993 while (1)
994 {
995 use (a');
996 use (b');
997 }
998
999 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1000 pointer `new' is intentionally not initialized (the loop will be split to a
1001 separate function later, and `new' will be initialized from its arguments).
1002 LD_ST_DATA holds information about the shared data structure used to pass
1003 information among the threads. It is initialized here, and
1004 gen_parallel_loop will pass it to create_call_for_reduction that
1005 needs this information. REDUCTION_LIST describes the reductions
1006 in LOOP. */
1007
1008 static void
1009 separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1010 tree *arg_struct, tree *new_arg_struct,
1011 struct clsn_data *ld_st_data)
1012
1013 {
1014 basic_block bb1 = split_edge (entry);
1015 basic_block bb0 = single_pred (bb1);
1016 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1017 name_to_copy_elt_eq, free);
1018 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1019 free);
1020 unsigned i;
1021 tree type, type_name, nvar;
1022 gimple_stmt_iterator gsi;
1023 struct clsn_data clsn_data;
1024 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1025 basic_block bb;
1026 basic_block entry_bb = bb1;
1027 basic_block exit_bb = exit->dest;
1028
1029 entry = single_succ_edge (entry_bb);
1030 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
1031
1032 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1033 {
1034 if (bb != entry_bb && bb != exit_bb)
1035 {
1036 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1037 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1038 name_copies, decl_copies);
1039
1040 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1041 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1042 name_copies, decl_copies);
1043 }
1044 }
1045
1046 VEC_free (basic_block, heap, body);
1047
1048 if (htab_elements (name_copies) == 0 && reduction_list == 0)
1049 {
1050 /* It may happen that there is nothing to copy (if there are only
1051 loop carried and external variables in the loop). */
1052 *arg_struct = NULL;
1053 *new_arg_struct = NULL;
1054 }
1055 else
1056 {
1057 /* Create the type for the structure to store the ssa names to. */
1058 type = lang_hooks.types.make_type (RECORD_TYPE);
1059 type_name = build_decl (BUILTINS_LOCATION,
1060 TYPE_DECL, create_tmp_var_name (".paral_data"),
1061 type);
1062 TYPE_NAME (type) = type_name;
1063
1064 htab_traverse (name_copies, add_field_for_name, type);
1065 if (reduction_list && htab_elements (reduction_list) > 0)
1066 {
1067 /* Create the fields for reductions. */
1068 htab_traverse (reduction_list, add_field_for_reduction,
1069 type);
1070 }
1071 layout_type (type);
1072
1073 /* Create the loads and stores. */
1074 *arg_struct = create_tmp_var (type, ".paral_data_store");
1075 add_referenced_var (*arg_struct);
1076 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1077 add_referenced_var (nvar);
1078 *new_arg_struct = make_ssa_name (nvar, NULL);
1079
1080 ld_st_data->store = *arg_struct;
1081 ld_st_data->load = *new_arg_struct;
1082 ld_st_data->store_bb = bb0;
1083 ld_st_data->load_bb = bb1;
1084
1085 htab_traverse (name_copies, create_loads_and_stores_for_name,
1086 ld_st_data);
1087
1088 /* Load the calculation from memory (after the join of the threads). */
1089
1090 if (reduction_list && htab_elements (reduction_list) > 0)
1091 {
1092 htab_traverse (reduction_list, create_stores_for_reduction,
1093 ld_st_data);
1094 clsn_data.load = make_ssa_name (nvar, NULL);
1095 clsn_data.load_bb = exit->dest;
1096 clsn_data.store = ld_st_data->store;
1097 create_final_loads_for_reduction (reduction_list, &clsn_data);
1098 }
1099 }
1100
1101 htab_delete (decl_copies);
1102 htab_delete (name_copies);
1103 }
1104
1105 /* Bitmap containing uids of functions created by parallelization. We cannot
1106 allocate it from the default obstack, as it must live across compilation
1107 of several functions; we make it gc allocated instead. */
1108
1109 static GTY(()) bitmap parallelized_functions;
1110
1111 /* Returns true if FN was created by create_loop_fn. */
1112
1113 static bool
1114 parallelized_function_p (tree fn)
1115 {
1116 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1117 return false;
1118
1119 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1120 }
1121
1122 /* Creates and returns an empty function that will receive the body of
1123 a parallelized loop. */
1124
1125 static tree
1126 create_loop_fn (void)
1127 {
1128 char buf[100];
1129 char *tname;
1130 tree decl, type, name, t;
1131 struct function *act_cfun = cfun;
1132 static unsigned loopfn_num;
1133
1134 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1135 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1136 clean_symbol_name (tname);
1137 name = get_identifier (tname);
1138 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1139
1140 decl = build_decl (BUILTINS_LOCATION,
1141 FUNCTION_DECL, name, type);
1142 if (!parallelized_functions)
1143 parallelized_functions = BITMAP_GGC_ALLOC ();
1144 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1145
1146 TREE_STATIC (decl) = 1;
1147 TREE_USED (decl) = 1;
1148 DECL_ARTIFICIAL (decl) = 1;
1149 DECL_IGNORED_P (decl) = 0;
1150 TREE_PUBLIC (decl) = 0;
1151 DECL_UNINLINABLE (decl) = 1;
1152 DECL_EXTERNAL (decl) = 0;
1153 DECL_CONTEXT (decl) = NULL_TREE;
1154 DECL_INITIAL (decl) = make_node (BLOCK);
1155
1156 t = build_decl (BUILTINS_LOCATION,
1157 RESULT_DECL, NULL_TREE, void_type_node);
1158 DECL_ARTIFICIAL (t) = 1;
1159 DECL_IGNORED_P (t) = 1;
1160 DECL_RESULT (decl) = t;
1161
1162 t = build_decl (BUILTINS_LOCATION,
1163 PARM_DECL, get_identifier (".paral_data_param"),
1164 ptr_type_node);
1165 DECL_ARTIFICIAL (t) = 1;
1166 DECL_ARG_TYPE (t) = ptr_type_node;
1167 DECL_CONTEXT (t) = decl;
1168 TREE_USED (t) = 1;
1169 DECL_ARGUMENTS (decl) = t;
1170
1171 allocate_struct_function (decl, false);
1172
1173 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1174 it. */
1175 set_cfun (act_cfun);
1176
1177 return decl;
1178 }
1179
1180 /* Moves the exit condition of LOOP to the beginning of its header, and
1181 duplicates the part of the last iteration that gets disabled to the
1182 exit of the loop. NIT is the number of iterations of the loop
1183 (used to initialize the variables in the duplicated part).
1184
1185 TODO: the common case is that latch of the loop is empty and immediately
1186 follows the loop exit. In this case, it would be better not to copy the
1187 body of the loop, but only move the entry of the loop directly before the
1188 exit check and increase the number of iterations of the loop by one.
1189 This may need some additional preconditioning in case NIT = ~0.
1190 REDUCTION_LIST describes the reductions in LOOP. */
1191
1192 static void
1193 transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
1194 {
1195 basic_block *bbs, *nbbs, ex_bb, orig_header;
1196 unsigned n;
1197 bool ok;
1198 edge exit = single_dom_exit (loop), hpred;
1199 tree control, control_name, res, t;
1200 gimple phi, nphi, cond_stmt, stmt;
1201 gimple_stmt_iterator gsi;
1202
1203 split_block_after_labels (loop->header);
1204 orig_header = single_succ (loop->header);
1205 hpred = single_succ_edge (loop->header);
1206
1207 cond_stmt = last_stmt (exit->src);
1208 control = gimple_cond_lhs (cond_stmt);
1209 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
1210
1211 /* Make sure that we have phi nodes on exit for all loop header phis
1212 (create_parallel_loop requires that). */
1213 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1214 {
1215 phi = gsi_stmt (gsi);
1216 res = PHI_RESULT (phi);
1217 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1218 SET_PHI_RESULT (phi, t);
1219
1220 nphi = create_phi_node (res, orig_header);
1221 SSA_NAME_DEF_STMT (res) = nphi;
1222 add_phi_arg (nphi, t, hpred);
1223
1224 if (res == control)
1225 {
1226 gimple_cond_set_lhs (cond_stmt, t);
1227 update_stmt (cond_stmt);
1228 control = t;
1229 }
1230 }
1231
1232 bbs = get_loop_body_in_dom_order (loop);
1233 for (n = 0; bbs[n] != exit->src; n++)
1234 continue;
1235 nbbs = XNEWVEC (basic_block, n);
1236 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1237 bbs + 1, n, nbbs);
1238 gcc_assert (ok);
1239 free (bbs);
1240 ex_bb = nbbs[0];
1241 free (nbbs);
1242
1243 /* Other than reductions, the only gimple reg that should be copied
1244 out of the loop is the control variable. */
1245
1246 control_name = NULL_TREE;
1247 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
1248 {
1249 phi = gsi_stmt (gsi);
1250 res = PHI_RESULT (phi);
1251 if (!is_gimple_reg (res))
1252 {
1253 gsi_next (&gsi);
1254 continue;
1255 }
1256
1257 /* Check if it is a part of reduction. If it is,
1258 keep the phi at the reduction's keep_res field. The
1259 PHI_RESULT of this phi is the resulting value of the reduction
1260 variable when exiting the loop. */
1261
1262 exit = single_dom_exit (loop);
1263
1264 if (htab_elements (reduction_list) > 0)
1265 {
1266 struct reduction_info *red;
1267
1268 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1269
1270 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1271 if (red)
1272 {
1273 red->keep_res = phi;
1274 gsi_next (&gsi);
1275 continue;
1276 }
1277 }
1278 gcc_assert (control_name == NULL_TREE
1279 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
1280 control_name = res;
1281 remove_phi_node (&gsi, false);
1282 }
1283 gcc_assert (control_name != NULL_TREE);
1284
1285 /* Initialize the control variable to NIT. */
1286 gsi = gsi_after_labels (ex_bb);
1287 nit = force_gimple_operand_gsi (&gsi,
1288 fold_convert (TREE_TYPE (control_name), nit),
1289 false, NULL_TREE, false, GSI_SAME_STMT);
1290 stmt = gimple_build_assign (control_name, nit);
1291 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1292 SSA_NAME_DEF_STMT (control_name) = stmt;
1293 }
1294
1295 /* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1296 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
1297 NEW_DATA is the variable that should be initialized from the argument
1298 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1299 basic block containing GIMPLE_OMP_PARALLEL tree. */
1300
1301 static basic_block
1302 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1303 tree new_data, unsigned n_threads)
1304 {
1305 gimple_stmt_iterator gsi;
1306 basic_block bb, paral_bb, for_bb, ex_bb;
1307 tree t, param, res;
1308 gimple stmt, for_stmt, phi, cond_stmt;
1309 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
1310 edge exit, nexit, guard, end, e;
1311
1312 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
1313 bb = loop_preheader_edge (loop)->src;
1314 paral_bb = single_pred (bb);
1315 gsi = gsi_last_bb (paral_bb);
1316
1317 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_NUM_THREADS);
1318 OMP_CLAUSE_NUM_THREADS_EXPR (t)
1319 = build_int_cst (integer_type_node, n_threads);
1320 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1321
1322 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1323
1324 /* Initialize NEW_DATA. */
1325 if (data)
1326 {
1327 gsi = gsi_after_labels (bb);
1328
1329 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1330 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1331 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1332 SSA_NAME_DEF_STMT (param) = stmt;
1333
1334 stmt = gimple_build_assign (new_data,
1335 fold_convert (TREE_TYPE (new_data), param));
1336 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1337 SSA_NAME_DEF_STMT (new_data) = stmt;
1338 }
1339
1340 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
1341 bb = split_loop_exit_edge (single_dom_exit (loop));
1342 gsi = gsi_last_bb (bb);
1343 gsi_insert_after (&gsi, gimple_build_omp_return (false), GSI_NEW_STMT);
1344
1345 /* Extract data for GIMPLE_OMP_FOR. */
1346 gcc_assert (loop->header == single_dom_exit (loop)->src);
1347 cond_stmt = last_stmt (loop->header);
1348
1349 cvar = gimple_cond_lhs (cond_stmt);
1350 cvar_base = SSA_NAME_VAR (cvar);
1351 phi = SSA_NAME_DEF_STMT (cvar);
1352 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1353 initvar = make_ssa_name (cvar_base, NULL);
1354 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1355 initvar);
1356 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1357
1358 gsi = gsi_last_bb (loop->latch);
1359 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1360 gsi_remove (&gsi, true);
1361
1362 /* Prepare cfg. */
1363 for_bb = split_edge (loop_preheader_edge (loop));
1364 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1365 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1366 gcc_assert (exit == single_dom_exit (loop));
1367
1368 guard = make_edge (for_bb, ex_bb, 0);
1369 single_succ_edge (loop->latch)->flags = 0;
1370 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1371 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
1372 {
1373 phi = gsi_stmt (gsi);
1374 res = PHI_RESULT (phi);
1375 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1376 add_phi_arg (phi,
1377 PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop)),
1378 guard);
1379 add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop)),
1380 end);
1381 }
1382 e = redirect_edge_and_branch (exit, nexit->dest);
1383 PENDING_STMT (e) = NULL;
1384
1385 /* Emit GIMPLE_OMP_FOR. */
1386 gimple_cond_set_lhs (cond_stmt, cvar_base);
1387 type = TREE_TYPE (cvar);
1388 t = build_omp_clause (BUILTINS_LOCATION, OMP_CLAUSE_SCHEDULE);
1389 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1390
1391 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
1392 gimple_omp_for_set_index (for_stmt, 0, initvar);
1393 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1394 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1395 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1396 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1397 cvar_base,
1398 build_int_cst (type, 1)));
1399
1400 gsi = gsi_last_bb (for_bb);
1401 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
1402 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1403
1404 /* Emit GIMPLE_OMP_CONTINUE. */
1405 gsi = gsi_last_bb (loop->latch);
1406 stmt = gimple_build_omp_continue (cvar_next, cvar);
1407 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1408 SSA_NAME_DEF_STMT (cvar_next) = stmt;
1409
1410 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1411 gsi = gsi_last_bb (ex_bb);
1412 gsi_insert_after (&gsi, gimple_build_omp_return (true), GSI_NEW_STMT);
1413
1414 return paral_bb;
1415 }
1416
1417 /* Generates code to execute the iterations of LOOP in N_THREADS
1418 threads in parallel.
1419
1420 NITER describes number of iterations of LOOP.
1421 REDUCTION_LIST describes the reductions existent in the LOOP. */
1422
1423 static void
1424 gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1425 unsigned n_threads, struct tree_niter_desc *niter)
1426 {
1427 struct loop *nloop;
1428 loop_iterator li;
1429 tree many_iterations_cond, type, nit;
1430 tree arg_struct, new_arg_struct;
1431 gimple_seq stmts;
1432 basic_block parallel_head;
1433 edge entry, exit;
1434 struct clsn_data clsn_data;
1435 unsigned prob;
1436
1437 /* From
1438
1439 ---------------------------------------------------------------------
1440 loop
1441 {
1442 IV = phi (INIT, IV + STEP)
1443 BODY1;
1444 if (COND)
1445 break;
1446 BODY2;
1447 }
1448 ---------------------------------------------------------------------
1449
1450 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1451 we generate the following code:
1452
1453 ---------------------------------------------------------------------
1454
1455 if (MAY_BE_ZERO
1456 || NITER < MIN_PER_THREAD * N_THREADS)
1457 goto original;
1458
1459 BODY1;
1460 store all local loop-invariant variables used in body of the loop to DATA.
1461 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1462 load the variables from DATA.
1463 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1464 BODY2;
1465 BODY1;
1466 GIMPLE_OMP_CONTINUE;
1467 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1468 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
1469 goto end;
1470
1471 original:
1472 loop
1473 {
1474 IV = phi (INIT, IV + STEP)
1475 BODY1;
1476 if (COND)
1477 break;
1478 BODY2;
1479 }
1480
1481 end:
1482
1483 */
1484
1485 /* Create two versions of the loop -- in the old one, we know that the
1486 number of iterations is large enough, and we will transform it into the
1487 loop that will be split to loop_fn, the new one will be used for the
1488 remaining iterations. */
1489
1490 type = TREE_TYPE (niter->niter);
1491 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1492 NULL_TREE);
1493 if (stmts)
1494 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1495
1496 many_iterations_cond =
1497 fold_build2 (GE_EXPR, boolean_type_node,
1498 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
1499 many_iterations_cond
1500 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1501 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1502 many_iterations_cond);
1503 many_iterations_cond
1504 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
1505 if (stmts)
1506 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1507 if (!is_gimple_condexpr (many_iterations_cond))
1508 {
1509 many_iterations_cond
1510 = force_gimple_operand (many_iterations_cond, &stmts,
1511 true, NULL_TREE);
1512 if (stmts)
1513 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
1514 }
1515
1516 initialize_original_copy_tables ();
1517
1518 /* We assume that the loop usually iterates a lot. */
1519 prob = 4 * REG_BR_PROB_BASE / 5;
1520 nloop = loop_version (loop, many_iterations_cond, NULL,
1521 prob, prob, REG_BR_PROB_BASE - prob, true);
1522 update_ssa (TODO_update_ssa);
1523 free_original_copy_tables ();
1524
1525 /* Base all the induction variables in LOOP on a single control one. */
1526 canonicalize_loop_ivs (loop, &nit);
1527
1528 /* Ensure that the exit condition is the first statement in the loop. */
1529 transform_to_exit_first_loop (loop, reduction_list, nit);
1530
1531 /* Generate initializations for reductions. */
1532 if (htab_elements (reduction_list) > 0)
1533 htab_traverse (reduction_list, initialize_reductions, loop);
1534
1535 /* Eliminate the references to local variables from the loop. */
1536 gcc_assert (single_exit (loop));
1537 entry = loop_preheader_edge (loop);
1538 exit = single_dom_exit (loop);
1539
1540 eliminate_local_variables (entry, exit);
1541 /* In the old loop, move all variables non-local to the loop to a structure
1542 and back, and create separate decls for the variables used in loop. */
1543 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1544 &new_arg_struct, &clsn_data);
1545
1546 /* Create the parallel constructs. */
1547 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1548 new_arg_struct, n_threads);
1549 if (htab_elements (reduction_list) > 0)
1550 create_call_for_reduction (loop, reduction_list, &clsn_data);
1551
1552 scev_reset ();
1553
1554 /* Cancel the loop (it is simpler to do it here rather than to teach the
1555 expander to do it). */
1556 cancel_loop_tree (loop);
1557
1558 /* Free loop bound estimations that could contain references to
1559 removed statements. */
1560 FOR_EACH_LOOP (li, loop, 0)
1561 free_numbers_of_iterations_estimates_loop (loop);
1562
1563 /* Expand the parallel constructs. We do it directly here instead of running
1564 a separate expand_omp pass, since it is more efficient, and less likely to
1565 cause troubles with further analyses not being able to deal with the
1566 OMP trees. */
1567
1568 omp_expand_local (parallel_head);
1569 }
1570
1571 /* Returns true when LOOP contains vector phi nodes. */
1572
1573 static bool
1574 loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
1575 {
1576 unsigned i;
1577 basic_block *bbs = get_loop_body_in_dom_order (loop);
1578 gimple_stmt_iterator gsi;
1579 bool res = true;
1580
1581 for (i = 0; i < loop->num_nodes; i++)
1582 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1583 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
1584 goto end;
1585
1586 res = false;
1587 end:
1588 free (bbs);
1589 return res;
1590 }
1591
1592 /* Create a reduction_info struct, initialize it with REDUC_STMT
1593 and PHI, insert it to the REDUCTION_LIST. */
1594
1595 static void
1596 build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1597 {
1598 PTR *slot;
1599 struct reduction_info *new_reduction;
1600
1601 gcc_assert (reduc_stmt);
1602
1603 if (dump_file && (dump_flags & TDF_DETAILS))
1604 {
1605 fprintf (dump_file,
1606 "Detected reduction. reduction stmt is: \n");
1607 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1608 fprintf (dump_file, "\n");
1609 }
1610
1611 new_reduction = XCNEW (struct reduction_info);
1612
1613 new_reduction->reduc_stmt = reduc_stmt;
1614 new_reduction->reduc_phi = phi;
1615 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1616 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1617 *slot = new_reduction;
1618 }
1619
1620 /* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1621
1622 static void
1623 gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1624 {
1625 gimple_stmt_iterator gsi;
1626 loop_vec_info simple_loop_info;
1627
1628 vect_dump = NULL;
1629 simple_loop_info = vect_analyze_loop_form (loop);
1630
1631 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1632 {
1633 gimple phi = gsi_stmt (gsi);
1634 affine_iv iv;
1635 tree res = PHI_RESULT (phi);
1636 bool double_reduc;
1637
1638 if (!is_gimple_reg (res))
1639 continue;
1640
1641 if (!simple_iv (loop, loop, res, &iv, true)
1642 && simple_loop_info)
1643 {
1644 gimple reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi, true, &double_reduc);
1645 if (reduc_stmt)
1646 build_new_reduction (reduction_list, reduc_stmt, phi);
1647 }
1648 }
1649 destroy_loop_vec_info (simple_loop_info, true);
1650 }
1651
1652 /* Try to initialize NITER for code generation part. */
1653
1654 static bool
1655 try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
1656 {
1657 edge exit = single_dom_exit (loop);
1658
1659 gcc_assert (exit);
1660
1661 /* We need to know # of iterations, and there should be no uses of values
1662 defined inside loop outside of it, unless the values are invariants of
1663 the loop. */
1664 if (!number_of_iterations_exit (loop, exit, niter, false))
1665 {
1666 if (dump_file && (dump_flags & TDF_DETAILS))
1667 fprintf (dump_file, " FAILED: number of iterations not known\n");
1668 return false;
1669 }
1670
1671 return true;
1672 }
1673
1674 /* Try to initialize REDUCTION_LIST for code generation part.
1675 REDUCTION_LIST describes the reductions. */
1676
1677 static bool
1678 try_create_reduction_list (loop_p loop, htab_t reduction_list)
1679 {
1680 edge exit = single_dom_exit (loop);
1681 gimple_stmt_iterator gsi;
1682
1683 gcc_assert (exit);
1684
1685 gather_scalar_reductions (loop, reduction_list);
1686
1687
1688 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
1689 {
1690 gimple phi = gsi_stmt (gsi);
1691 struct reduction_info *red;
1692 imm_use_iterator imm_iter;
1693 use_operand_p use_p;
1694 gimple reduc_phi;
1695 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1696
1697 if (is_gimple_reg (val))
1698 {
1699 if (dump_file && (dump_flags & TDF_DETAILS))
1700 {
1701 fprintf (dump_file, "phi is ");
1702 print_gimple_stmt (dump_file, phi, 0, 0);
1703 fprintf (dump_file, "arg of phi to exit: value ");
1704 print_generic_expr (dump_file, val, 0);
1705 fprintf (dump_file, " used outside loop\n");
1706 fprintf (dump_file,
1707 " checking if it a part of reduction pattern: \n");
1708 }
1709 if (htab_elements (reduction_list) == 0)
1710 {
1711 if (dump_file && (dump_flags & TDF_DETAILS))
1712 fprintf (dump_file,
1713 " FAILED: it is not a part of reduction.\n");
1714 return false;
1715 }
1716 reduc_phi = NULL;
1717 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
1718 {
1719 if (flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
1720 {
1721 reduc_phi = USE_STMT (use_p);
1722 break;
1723 }
1724 }
1725 red = reduction_phi (reduction_list, reduc_phi);
1726 if (red == NULL)
1727 {
1728 if (dump_file && (dump_flags & TDF_DETAILS))
1729 fprintf (dump_file,
1730 " FAILED: it is not a part of reduction.\n");
1731 return false;
1732 }
1733 if (dump_file && (dump_flags & TDF_DETAILS))
1734 {
1735 fprintf (dump_file, "reduction phi is ");
1736 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
1737 fprintf (dump_file, "reduction stmt is ");
1738 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
1739 }
1740 }
1741 }
1742
1743 /* The iterations of the loop may communicate only through bivs whose
1744 iteration space can be distributed efficiently. */
1745 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1746 {
1747 gimple phi = gsi_stmt (gsi);
1748 tree def = PHI_RESULT (phi);
1749 affine_iv iv;
1750
1751 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
1752 {
1753 struct reduction_info *red;
1754
1755 red = reduction_phi (reduction_list, phi);
1756 if (red == NULL)
1757 {
1758 if (dump_file && (dump_flags & TDF_DETAILS))
1759 fprintf (dump_file,
1760 " FAILED: scalar dependency between iterations\n");
1761 return false;
1762 }
1763 }
1764 }
1765
1766
1767 return true;
1768 }
1769
1770 /* Detect parallel loops and generate parallel code using libgomp
1771 primitives. Returns true if some loop was parallelized, false
1772 otherwise. */
1773
1774 bool
1775 parallelize_loops (void)
1776 {
1777 unsigned n_threads = flag_tree_parallelize_loops;
1778 bool changed = false;
1779 struct loop *loop;
1780 struct tree_niter_desc niter_desc;
1781 loop_iterator li;
1782 htab_t reduction_list;
1783
1784 /* Do not parallelize loops in the functions created by parallelization. */
1785 if (parallelized_function_p (cfun->decl))
1786 return false;
1787
1788 reduction_list = htab_create (10, reduction_info_hash,
1789 reduction_info_eq, free);
1790 init_stmt_vec_info_vec ();
1791
1792 FOR_EACH_LOOP (li, loop, 0)
1793 {
1794 htab_empty (reduction_list);
1795
1796 /* FIXME: Only consider innermost loops with just one exit. */
1797 if (loop->inner || !single_dom_exit (loop))
1798 continue;
1799
1800 if (/* And of course, the loop must be parallelizable. */
1801 !can_duplicate_loop_p (loop)
1802 || loop_has_blocks_with_irreducible_flag (loop)
1803 /* FIXME: the check for vector phi nodes could be removed. */
1804 || loop_has_vector_phi_nodes (loop))
1805 continue;
1806
1807 if (/* Do not bother with loops in cold areas. */
1808 optimize_loop_nest_for_size_p (loop)
1809 /* Or loops that roll too little. */
1810 || expected_loop_iterations (loop) <= n_threads)
1811 continue;
1812 if (!try_get_loop_niter (loop, &niter_desc))
1813 continue;
1814
1815 if (!try_create_reduction_list (loop, reduction_list))
1816 continue;
1817
1818 if (!loop_parallel_p (loop))
1819 continue;
1820
1821 changed = true;
1822 gen_parallel_loop (loop, reduction_list,
1823 n_threads, &niter_desc);
1824 verify_flow_info ();
1825 verify_dominators (CDI_DOMINATORS);
1826 verify_loop_structure ();
1827 verify_loop_closed_ssa ();
1828 }
1829
1830 free_stmt_vec_info_vec ();
1831 htab_delete (reduction_list);
1832
1833 /* Parallelization will cause new function calls to be inserted through
1834 which local variables will escape. Reset the points-to solutions
1835 for ESCAPED and CALLUSED. */
1836 if (changed)
1837 {
1838 pt_solution_reset (&cfun->gimple_df->escaped);
1839 pt_solution_reset (&cfun->gimple_df->callused);
1840 }
1841
1842 return changed;
1843 }
1844
1845 #include "gt-tree-parloops.h"