]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
xcoff.h (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P): Do not always place FP constants in the...
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
4218dc2d 2 Copyright (C) 2006, 2007, 2008 Free Software Foundation, Inc.
5f40b3cb
ZD
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
4 Zdenek Dvorak <dvorakz@suse.cz>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 2, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING. If not, write to the Free
20Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2102110-1301, USA. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "tree.h"
28#include "rtl.h"
29#include "tree-flow.h"
30#include "cfgloop.h"
31#include "ggc.h"
32#include "tree-data-ref.h"
33#include "diagnostic.h"
34#include "tree-pass.h"
35#include "tree-scalar-evolution.h"
36#include "hashtab.h"
37#include "langhooks.h"
a509ebb5 38#include "tree-vectorizer.h"
5f40b3cb
ZD
39
40/* This pass tries to distribute iterations of loops into several threads.
41 The implementation is straightforward -- for each loop we test whether its
42 iterations are independent, and if it is the case (and some additional
43 conditions regarding profitability and correctness are satisfied), we
44 add OMP_PARALLEL and OMP_FOR codes and let omp expansion machinery do
45 its job.
46
47 The most of the complexity is in bringing the code into shape expected
48 by the omp expanders:
49 -- for OMP_FOR, ensuring that the loop has only one induction variable
50 and that the exit test is at the start of the loop body
51 -- for OMP_PARALLEL, replacing the references to local addressable
52 variables by accesses through pointers, and breaking up ssa chains
53 by storing the values incoming to the parallelized loop to a structure
54 passed to the new function as an argument (something similar is done
55 in omp gimplification, unfortunately only a small part of the code
56 can be shared).
57
58 TODO:
59 -- if there are several parallelizable loops in a function, it may be
60 possible to generate the threads just once (using synchronization to
61 ensure that cross-loop dependences are obeyed).
62 -- handling of common scalar dependence patterns (accumulation, ...)
63 -- handling of non-innermost loops */
64
a509ebb5
RL
65/*
66 Reduction handling:
67 currently we use vect_is_simple_reduction() to detect reduction patterns.
68 The code transformation will be introduced by an example.
69
0eb7e7aa 70
a509ebb5
RL
71parloop
72{
73 int sum=1;
74
0eb7e7aa 75 for (i = 0; i < N; i++)
a509ebb5
RL
76 {
77 x[i] = i + 3;
78 sum+=x[i];
79 }
80}
81
0eb7e7aa 82gimple-like code:
a509ebb5
RL
83header_bb:
84
0eb7e7aa
RL
85 # sum_29 = PHI <sum_11(5), 1(3)>
86 # i_28 = PHI <i_12(5), 0(3)>
87 D.1795_8 = i_28 + 3;
88 x[i_28] = D.1795_8;
89 sum_11 = D.1795_8 + sum_29;
90 i_12 = i_28 + 1;
91 if (N_6(D) > i_12)
92 goto header_bb;
93
a509ebb5
RL
94
95exit_bb:
96
0eb7e7aa
RL
97 # sum_21 = PHI <sum_11(4)>
98 printf (&"%d"[0], sum_21);
a509ebb5
RL
99
100
101after reduction transformation (only relevant parts):
102
103parloop
104{
105
106....
107
0eb7e7aa 108
ae0bce62 109 # Storing the the initial value given by the user. #
0eb7e7aa 110
ae0bce62 111 .paral_data_store.32.sum.27 = 1;
0eb7e7aa 112
0eb7e7aa 113 #pragma omp parallel num_threads(4)
a509ebb5 114
0eb7e7aa 115 #pragma omp for schedule(static)
ae0bce62
RL
116
117 # The neutral element corresponding to the particular
118 reduction's operation, e.g. 0 for PLUS_EXPR,
119 1 for MULT_EXPR, etc. replaces the user's initial value. #
120
121 # sum.27_29 = PHI <sum.27_11, 0>
122
0eb7e7aa 123 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 124
0eb7e7aa 125 OMP_CONTINUE
a509ebb5 126
0eb7e7aa
RL
127 # Adding this reduction phi is done at create_phi_for_local_result() #
128 # sum.27_56 = PHI <sum.27_11, 0>
129 OMP_RETURN
130
131 # Creating the atomic operation is done at
132 create_call_for_reduction_1() #
a509ebb5 133
0eb7e7aa
RL
134 #pragma omp atomic_load
135 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
136 D.1840_60 = sum.27_56 + D.1839_59;
137 #pragma omp atomic_store (D.1840_60);
a509ebb5 138
0eb7e7aa
RL
139 OMP_RETURN
140
141 # collecting the result after the join of the threads is done at
142 create_loads_for_reductions().
ae0bce62
RL
143 The value computed by the threads is loaded from the
144 shared struct. #
145
0eb7e7aa
RL
146
147 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 148 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
149 sum_43 = D.1795_41 + sum_37;
150
151 exit bb:
152 # sum_21 = PHI <sum_43, sum_26>
153 printf (&"%d"[0], sum_21);
154
155...
156
a509ebb5
RL
157}
158
159*/
160
5f40b3cb
ZD
161/* Minimal number of iterations of a loop that should be executed in each
162 thread. */
163#define MIN_PER_THREAD 100
164
a509ebb5
RL
165/* Element of the hashtable, representing a
166 reduction in the current loop. */
167struct reduction_info
168{
169 tree reduc_stmt; /* reduction statement. */
170 tree reduc_phi; /* The phi node defining the reduction. */
171 enum tree_code reduction_code; /* code for the reduction operation. */
172 tree keep_res; /* The PHI_RESULT of this phi is the resulting value
173 of the reduction variable when existing the loop. */
ae0bce62 174 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 175 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5
RL
176 tree init; /* reduction initialization value. */
177 tree new_phi; /* (helper field) Newly created phi node whose result
178 will be passed to the atomic operation. Represents
179 the local result each thread computed for the reduction
180 operation. */
181};
182
183/* Equality and hash functions for hashtab code. */
184
185static int
186reduction_info_eq (const void *aa, const void *bb)
187{
188 const struct reduction_info *a = (const struct reduction_info *) aa;
189 const struct reduction_info *b = (const struct reduction_info *) bb;
190
191 return (a->reduc_phi == b->reduc_phi);
192}
193
194static hashval_t
195reduction_info_hash (const void *aa)
196{
197 const struct reduction_info *a = (const struct reduction_info *) aa;
198
199 return htab_hash_pointer (a->reduc_phi);
200}
201
202static struct reduction_info *
203reduction_phi (htab_t reduction_list, tree phi)
204{
205 struct reduction_info tmpred, *red;
206
207 if (htab_elements (reduction_list) == 0)
208 return NULL;
209
210 tmpred.reduc_phi = phi;
211 red = htab_find (reduction_list, &tmpred);
212
213 return red;
214}
215
5f40b3cb
ZD
216/* Element of hashtable of names to copy. */
217
218struct name_to_copy_elt
219{
220 unsigned version; /* The version of the name to copy. */
221 tree new_name; /* The new name used in the copy. */
222 tree field; /* The field of the structure used to pass the
223 value. */
224};
225
226/* Equality and hash functions for hashtab code. */
227
228static int
229name_to_copy_elt_eq (const void *aa, const void *bb)
230{
a509ebb5
RL
231 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
232 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
5f40b3cb
ZD
233
234 return a->version == b->version;
235}
236
237static hashval_t
238name_to_copy_elt_hash (const void *aa)
239{
a509ebb5 240 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
5f40b3cb
ZD
241
242 return (hashval_t) a->version;
243}
244
245/* Returns true if the iterations of LOOP are independent on each other (that
246 is, if we can execute them in parallel), and if LOOP satisfies other
247 conditions that we need to be able to parallelize it. Description of number
a509ebb5
RL
248 of iterations is stored to NITER. Reduction analysis is done, if
249 reductions are found, they are inserted to the REDUCTION_LIST. */
5f40b3cb
ZD
250
251static bool
a509ebb5 252loop_parallel_p (struct loop *loop, htab_t reduction_list, struct tree_niter_desc *niter)
5f40b3cb
ZD
253{
254 edge exit = single_dom_exit (loop);
a509ebb5
RL
255 VEC (ddr_p, heap) * dependence_relations;
256 VEC (data_reference_p, heap) * datarefs;
5f40b3cb
ZD
257 lambda_trans_matrix trans;
258 bool ret = false;
259 tree phi;
a509ebb5 260 loop_vec_info simple_loop_info;
5f40b3cb
ZD
261
262 /* Only consider innermost loops with just one exit. The innermost-loop
263 restriction is not necessary, but it makes things simpler. */
264 if (loop->inner || !exit)
265 return false;
266
267 if (dump_file && (dump_flags & TDF_DETAILS))
268 fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
269
270 /* We need to know # of iterations, and there should be no uses of values
271 defined inside loop outside of it, unless the values are invariants of
272 the loop. */
273 if (!number_of_iterations_exit (loop, exit, niter, false))
274 {
275 if (dump_file && (dump_flags & TDF_DETAILS))
276 fprintf (dump_file, " FAILED: number of iterations not known\n");
277 return false;
278 }
279
a509ebb5
RL
280 simple_loop_info = vect_analyze_loop_form (loop);
281
282 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
283 {
284 tree reduc_stmt = NULL, operation;
285
286 /* ??? TODO: Change this into a generic function that
287 recognizes reductions. */
288 if (!is_gimple_reg (PHI_RESULT (phi)))
289 continue;
290 if (simple_loop_info)
291 reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi);
292
293 /* Create a reduction_info struct, initialize it and insert it to
294 the reduction list. */
295
296 if (reduc_stmt)
297 {
298 PTR *slot;
299 struct reduction_info *new_reduction;
300
301 if (dump_file && (dump_flags & TDF_DETAILS))
302 {
303 fprintf (dump_file,
304 "Detected reduction. reduction stmt is: \n");
305 print_generic_stmt (dump_file, reduc_stmt, 0);
306 fprintf (dump_file, "\n");
307 }
308
309 new_reduction = XCNEW (struct reduction_info);
310
311 new_reduction->reduc_stmt = reduc_stmt;
312 new_reduction->reduc_phi = phi;
313 operation = GIMPLE_STMT_OPERAND (reduc_stmt, 1);
314 new_reduction->reduction_code = TREE_CODE (operation);
315 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
316 *slot = new_reduction;
317 }
318 }
319
72425608
ZD
320 /* Get rid of the information created by the vectorizer functions. */
321 destroy_loop_vec_info (simple_loop_info, true);
322
5f40b3cb
ZD
323 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
324 {
a509ebb5
RL
325 struct reduction_info *red;
326 imm_use_iterator imm_iter;
327 use_operand_p use_p;
328 tree reduc_phi;
329
5f40b3cb
ZD
330 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
331
332 if (is_gimple_reg (val))
333 {
334 if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
335 {
336 fprintf (dump_file, "phi is ");
337 print_generic_expr (dump_file, phi, 0);
338 fprintf (dump_file, "arg of phi to exit: value ");
339 print_generic_expr (dump_file, val, 0);
340 fprintf (dump_file, " used outside loop\n");
341 fprintf (dump_file,
342 " checking if it a part of reduction pattern: \n");
343 }
344 if (htab_elements (reduction_list) == 0)
345 {
346 if (dump_file && (dump_flags & TDF_DETAILS))
347 fprintf (dump_file,
348 " FAILED: it is not a part of reduction.\n");
349 return false;
350 }
351 reduc_phi = NULL;
352 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
353 {
354 if (flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
355 {
356 reduc_phi = USE_STMT (use_p);
357 break;
358 }
359 }
360 red = reduction_phi (reduction_list, reduc_phi);
361 if (red == NULL)
362 {
363 if (dump_file && (dump_flags & TDF_DETAILS))
364 fprintf (dump_file,
365 " FAILED: it is not a part of reduction.\n");
366 return false;
367 }
368 if (dump_file && (dump_flags & TDF_DETAILS))
369 {
370 fprintf (dump_file, "reduction phi is ");
371 print_generic_expr (dump_file, red->reduc_phi, 0);
372 fprintf (dump_file, "reduction stmt is ");
373 print_generic_expr (dump_file, red->reduc_stmt, 0);
374 }
375
5f40b3cb
ZD
376 }
377 }
378
379 /* The iterations of the loop may communicate only through bivs whose
380 iteration space can be distributed efficiently. */
381 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
382 {
383 tree def = PHI_RESULT (phi);
384 affine_iv iv;
385
a509ebb5 386 if (is_gimple_reg (def) && !simple_iv (loop, phi, def, &iv, true))
5f40b3cb 387 {
a509ebb5
RL
388 struct reduction_info *red;
389
390 red = reduction_phi (reduction_list, phi);
391 if (red == NULL)
392 {
393 if (dump_file && (dump_flags & TDF_DETAILS))
394 fprintf (dump_file,
395 " FAILED: scalar dependency between iterations\n");
396 return false;
397 }
5f40b3cb
ZD
398 }
399 }
400
401 /* We need to version the loop to verify assumptions in runtime. */
402 if (!can_duplicate_loop_p (loop))
403 {
404 if (dump_file && (dump_flags & TDF_DETAILS))
405 fprintf (dump_file, " FAILED: cannot be duplicated\n");
406 return false;
407 }
408
409 /* Check for problems with dependences. If the loop can be reversed,
410 the iterations are independent. */
411 datarefs = VEC_alloc (data_reference_p, heap, 10);
412 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
413 compute_data_dependences_for_loop (loop, true, &datarefs,
414 &dependence_relations);
415 if (dump_file && (dump_flags & TDF_DETAILS))
416 dump_data_dependence_relations (dump_file, dependence_relations);
417
418 trans = lambda_trans_matrix_new (1, 1);
419 LTM_MATRIX (trans)[0][0] = -1;
420
421 if (lambda_transform_legal_p (trans, 1, dependence_relations))
422 {
423 ret = true;
424 if (dump_file && (dump_flags & TDF_DETAILS))
425 fprintf (dump_file, " SUCCESS: may be parallelized\n");
426 }
427 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
428 fprintf (dump_file,
429 " FAILED: data dependencies exist across iterations\n");
5f40b3cb
ZD
430
431 free_dependence_relations (dependence_relations);
432 free_data_refs (datarefs);
433
434 return ret;
435}
436
1d4af1e8
SP
437/* Return true when LOOP contains basic blocks marked with the
438 BB_IRREDUCIBLE_LOOP flag. */
439
440static inline bool
441loop_has_blocks_with_irreducible_flag (struct loop *loop)
442{
443 unsigned i;
444 basic_block *bbs = get_loop_body_in_dom_order (loop);
445 bool res = true;
446
447 for (i = 0; i < loop->num_nodes; i++)
448 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
449 goto end;
450
451 res = false;
452 end:
453 free (bbs);
454 return res;
455}
456
8a171a59 457/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 458 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59
ZD
459 to their addresses that can be reused. The address of OBJ is known to
460 be invariant in the whole function. */
5f40b3cb
ZD
461
462static tree
9f9f72aa 463take_address_of (tree obj, tree type, edge entry, htab_t decl_address)
5f40b3cb 464{
8a171a59 465 int uid;
5f40b3cb
ZD
466 void **dslot;
467 struct int_tree_map ielt, *nielt;
8a171a59 468 tree *var_p, name, bvar, stmt, addr;
5f40b3cb 469
8a171a59
ZD
470 /* Since the address of OBJ is invariant, the trees may be shared.
471 Avoid rewriting unrelated parts of the code. */
472 obj = unshare_expr (obj);
473 for (var_p = &obj;
474 handled_component_p (*var_p);
475 var_p = &TREE_OPERAND (*var_p, 0))
476 continue;
477 uid = DECL_UID (*var_p);
478
5f40b3cb
ZD
479 ielt.uid = uid;
480 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
481 if (!*dslot)
482 {
8a171a59
ZD
483 addr = build_addr (*var_p, current_function_decl);
484 bvar = create_tmp_var (TREE_TYPE (addr), get_name (*var_p));
5f40b3cb 485 add_referenced_var (bvar);
8a171a59 486 stmt = build_gimple_modify_stmt (bvar, addr);
5f40b3cb
ZD
487 name = make_ssa_name (bvar, stmt);
488 GIMPLE_STMT_OPERAND (stmt, 0) = name;
489 bsi_insert_on_edge_immediate (entry, stmt);
490
491 nielt = XNEW (struct int_tree_map);
492 nielt->uid = uid;
493 nielt->to = name;
494 *dslot = nielt;
5f40b3cb 495 }
8a171a59
ZD
496 else
497 name = ((struct int_tree_map *) *dslot)->to;
5f40b3cb 498
8a171a59
ZD
499 if (var_p != &obj)
500 {
501 *var_p = build1 (INDIRECT_REF, TREE_TYPE (*var_p), name);
502 name = force_gimple_operand (build_addr (obj, current_function_decl),
503 &stmt, true, NULL_TREE);
504 if (stmt)
505 bsi_insert_on_edge_immediate (entry, stmt);
506 }
5f40b3cb 507
8a171a59
ZD
508 if (TREE_TYPE (name) != type)
509 {
510 name = force_gimple_operand (fold_convert (type, name), &stmt, true,
511 NULL_TREE);
512 if (stmt)
513 bsi_insert_on_edge_immediate (entry, stmt);
514 }
5f40b3cb
ZD
515
516 return name;
517}
518
a509ebb5
RL
519/* Callback for htab_traverse. Create the initialization statement
520 for reduction described in SLOT, and place it at the preheader of
521 the loop described in DATA. */
522
523static int
524initialize_reductions (void **slot, void *data)
525{
a509ebb5 526 tree init, c;
a509ebb5
RL
527 tree bvar, type, arg;
528 edge e;
529
530 struct reduction_info *reduc = *slot;
531 struct loop *loop = (struct loop *) data;
532
533 /* Create initialization in preheader:
534 reduction_variable = initialization value of reduction. */
535
536 /* In the phi node at the header, replace the argument coming
537 from the preheader with the reduction initialization value. */
538
539 /* Create a new variable to initialize the reduction. */
540 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
541 bvar = create_tmp_var (type, "reduction");
542 add_referenced_var (bvar);
543
544 c = build_omp_clause (OMP_CLAUSE_REDUCTION);
545 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
546 OMP_CLAUSE_DECL (c) =
547 SSA_NAME_VAR (GIMPLE_STMT_OPERAND (reduc->reduc_stmt, 0));
548
549 init = omp_reduction_init (c, TREE_TYPE (bvar));
550 reduc->init = init;
551
0eb7e7aa
RL
552 /* Replace the argument representing the initialization value
553 with the initialization value for the reduction (neutral
554 element for the particular operation, e.g. 0 for PLUS_EXPR,
555 1 for MULT_EXPR, etc).
556 Keep the old value in a new variable "reduction_initial",
557 that will be taken in consideration after the parallel
558 computing is done. */
a509ebb5
RL
559
560 e = loop_preheader_edge (loop);
561 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
562 /* Create new variable to hold the initial value. */
a509ebb5 563
a509ebb5 564 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 565 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 566 reduc->initial_value = arg;
a509ebb5
RL
567 return 1;
568}
5f40b3cb
ZD
569
570struct elv_data
571{
9f9f72aa 572 edge entry;
5f40b3cb
ZD
573 htab_t decl_address;
574 bool changed;
575};
576
9f9f72aa
AP
577/* Eliminates references to local variables in *TP out of the single
578 entry single exit region starting at DTA->ENTRY.
579 DECL_ADDRESS contains addresses of the references that had their
580 address taken already. If the expression is changed, CHANGED is
581 set to true. Callback for walk_tree. */
a509ebb5 582
5f40b3cb 583static tree
8a171a59 584eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb
ZD
585{
586 struct elv_data *dta = data;
8a171a59 587 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
588
589 if (DECL_P (t))
590 {
591 *walk_subtrees = 0;
592
593 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
594 return NULL_TREE;
595
596 type = TREE_TYPE (t);
597 addr_type = build_pointer_type (type);
9f9f72aa 598 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address);
5f40b3cb
ZD
599 *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
600
601 dta->changed = true;
602 return NULL_TREE;
603 }
604
605 if (TREE_CODE (t) == ADDR_EXPR)
606 {
8a171a59
ZD
607 /* ADDR_EXPR may appear in two contexts:
608 -- as a gimple operand, when the address taken is a function invariant
609 -- as gimple rhs, when the resulting address in not a function
610 invariant
611 We do not need to do anything special in the latter case (the base of
612 the memory reference whose address is taken may be replaced in the
613 DECL_P case). The former case is more complicated, as we need to
614 ensure that the new address is still a gimple operand. Thus, it
615 is not sufficient to replace just the base of the memory reference --
616 we need to move the whole computation of the address out of the
617 loop. */
618 if (!is_gimple_val (t))
5f40b3cb
ZD
619 return NULL_TREE;
620
621 *walk_subtrees = 0;
8a171a59
ZD
622 obj = TREE_OPERAND (t, 0);
623 var = get_base_address (obj);
624 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
625 return NULL_TREE;
626
627 addr_type = TREE_TYPE (t);
9f9f72aa 628 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address);
5f40b3cb
ZD
629 *tp = addr;
630
631 dta->changed = true;
632 return NULL_TREE;
633 }
634
a509ebb5 635 if (!EXPR_P (t) && !GIMPLE_STMT_P (t))
5f40b3cb
ZD
636 *walk_subtrees = 0;
637
638 return NULL_TREE;
639}
640
9f9f72aa
AP
641/* Moves the references to local variables in STMT out of the single
642 entry single exit region starting at ENTRY. DECL_ADDRESS contains
643 addresses of the references that had their address taken
644 already. */
5f40b3cb
ZD
645
646static void
9f9f72aa 647eliminate_local_variables_stmt (edge entry, tree stmt,
5f40b3cb
ZD
648 htab_t decl_address)
649{
650 struct elv_data dta;
651
9f9f72aa 652 dta.entry = entry;
5f40b3cb
ZD
653 dta.decl_address = decl_address;
654 dta.changed = false;
655
656 walk_tree (&stmt, eliminate_local_variables_1, &dta, NULL);
657
658 if (dta.changed)
659 update_stmt (stmt);
660}
661
9f9f72aa
AP
662/* Eliminates the references to local variables from the single entry
663 single exit region between the ENTRY and EXIT edges.
664
a509ebb5
RL
665 This includes:
666 1) Taking address of a local variable -- these are moved out of the
9f9f72aa 667 region (and temporary variable is created to hold the address if
a509ebb5 668 necessary).
9f9f72aa 669
5f40b3cb 670 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 671 references. */
5f40b3cb
ZD
672
673static void
9f9f72aa 674eliminate_local_variables (edge entry, edge exit)
5f40b3cb 675{
9f9f72aa
AP
676 basic_block bb;
677 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
5f40b3cb
ZD
678 unsigned i;
679 block_stmt_iterator bsi;
680 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
681 free);
9f9f72aa
AP
682 basic_block entry_bb = entry->src;
683 basic_block exit_bb = exit->dest;
5f40b3cb 684
9f9f72aa 685 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 686
9f9f72aa
AP
687 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
688 if (bb != entry_bb && bb != exit_bb)
5f40b3cb 689 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
9f9f72aa
AP
690 eliminate_local_variables_stmt (entry, bsi_stmt (bsi),
691 decl_address);
5f40b3cb
ZD
692
693 htab_delete (decl_address);
9f9f72aa
AP
694 VEC_free (basic_block, heap, body);
695}
696
697/* Returns true if expression EXPR is not defined between ENTRY and
698 EXIT, i.e. if all its operands are defined outside of the region. */
699
700static bool
701expr_invariant_in_region_p (edge entry, edge exit, tree expr)
702{
703 basic_block entry_bb = entry->src;
704 basic_block exit_bb = exit->dest;
705 basic_block def_bb;
706 unsigned i, len;
707
708 if (is_gimple_min_invariant (expr))
709 return true;
710
711 if (TREE_CODE (expr) == SSA_NAME)
712 {
713 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
714 if (def_bb
715 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
716 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
717 return false;
718
719 return true;
720 }
721
722 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
723 return false;
724
725 len = TREE_OPERAND_LENGTH (expr);
726 for (i = 0; i < len; i++)
727 if (!expr_invariant_in_region_p (entry, exit, TREE_OPERAND (expr, i)))
728 return false;
729
730 return true;
5f40b3cb
ZD
731}
732
733/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
734 The copies are stored to NAME_COPIES, if NAME was already duplicated,
735 its duplicate stored in NAME_COPIES is returned.
736
737 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
738 duplicated, storing the copies in DECL_COPIES. */
739
740static tree
9f9f72aa
AP
741separate_decls_in_region_name (tree name,
742 htab_t name_copies, htab_t decl_copies,
743 bool copy_name_p)
5f40b3cb
ZD
744{
745 tree copy, var, var_copy;
746 unsigned idx, uid, nuid;
747 struct int_tree_map ielt, *nielt;
748 struct name_to_copy_elt elt, *nelt;
749 void **slot, **dslot;
750
751 if (TREE_CODE (name) != SSA_NAME)
752 return name;
753
754 idx = SSA_NAME_VERSION (name);
755 elt.version = idx;
756 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
757 copy_name_p ? INSERT : NO_INSERT);
758 if (slot && *slot)
759 return ((struct name_to_copy_elt *) *slot)->new_name;
760
761 var = SSA_NAME_VAR (name);
762 uid = DECL_UID (var);
763 ielt.uid = uid;
764 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
765 if (!*dslot)
766 {
767 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 768 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
5f40b3cb
ZD
769 add_referenced_var (var_copy);
770 nielt = XNEW (struct int_tree_map);
771 nielt->uid = uid;
772 nielt->to = var_copy;
773 *dslot = nielt;
774
775 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 776 it again. */
5f40b3cb
ZD
777 nuid = DECL_UID (var_copy);
778 ielt.uid = nuid;
779 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
780 gcc_assert (!*dslot);
781 nielt = XNEW (struct int_tree_map);
782 nielt->uid = nuid;
783 nielt->to = var_copy;
784 *dslot = nielt;
785 }
786 else
787 var_copy = ((struct int_tree_map *) *dslot)->to;
788
789 if (copy_name_p)
790 {
791 copy = duplicate_ssa_name (name, NULL_TREE);
792 nelt = XNEW (struct name_to_copy_elt);
793 nelt->version = idx;
794 nelt->new_name = copy;
795 nelt->field = NULL_TREE;
796 *slot = nelt;
797 }
798 else
799 {
800 gcc_assert (!slot);
801 copy = name;
802 }
803
804 SSA_NAME_VAR (copy) = var_copy;
805 return copy;
806}
807
9f9f72aa
AP
808/* Finds the ssa names used in STMT that are defined outside the
809 region between ENTRY and EXIT and replaces such ssa names with
810 their duplicates. The duplicates are stored to NAME_COPIES. Base
811 decls of all ssa names used in STMT (including those defined in
812 LOOP) are replaced with the new temporary variables; the
813 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
814
815static void
9f9f72aa
AP
816separate_decls_in_region_stmt (edge entry, edge exit, tree stmt,
817 htab_t name_copies, htab_t decl_copies)
5f40b3cb
ZD
818{
819 use_operand_p use;
820 def_operand_p def;
821 ssa_op_iter oi;
822 tree name, copy;
823 bool copy_name_p;
824
825 mark_virtual_ops_for_renaming (stmt);
826
827 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
828 {
829 name = DEF_FROM_PTR (def);
830 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
831 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
832 false);
a509ebb5
RL
833 gcc_assert (copy == name);
834 }
5f40b3cb
ZD
835
836 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
837 {
838 name = USE_FROM_PTR (use);
839 if (TREE_CODE (name) != SSA_NAME)
840 continue;
841
9f9f72aa
AP
842 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
843 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
844 copy_name_p);
a509ebb5
RL
845 SET_USE (use, copy);
846 }
5f40b3cb
ZD
847}
848
0eb7e7aa
RL
849/* Callback for htab_traverse. Adds a field corresponding to the reduction
850 specified in SLOT. The type is passed in DATA. */
851
852static int
853add_field_for_reduction (void **slot, void *data)
a509ebb5 854{
0eb7e7aa
RL
855
856 struct reduction_info *red = *slot;
857 tree type = data;
858 tree var = SSA_NAME_VAR (GIMPLE_STMT_OPERAND (red->reduc_stmt, 0));
859 tree field = build_decl (FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
860
861 insert_field_into_struct (type, field);
862
863 red->field = field;
864
865 return 1;
866}
a509ebb5 867
5f40b3cb 868/* Callback for htab_traverse. Adds a field corresponding to a ssa name
0eb7e7aa 869 described in SLOT. The type is passed in DATA. */
5f40b3cb
ZD
870
871static int
872add_field_for_name (void **slot, void *data)
873{
874 struct name_to_copy_elt *elt = *slot;
0eb7e7aa 875 tree type = data;
5f40b3cb
ZD
876 tree name = ssa_name (elt->version);
877 tree var = SSA_NAME_VAR (name);
878 tree field = build_decl (FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
879
880 insert_field_into_struct (type, field);
881 elt->field = field;
a509ebb5 882
5f40b3cb
ZD
883 return 1;
884}
885
a509ebb5
RL
886/* Callback for htab_traverse. A local result is the intermediate result
887 computed by a single
888 thread, or the intial value in case no iteration was executed.
889 This function creates a phi node reflecting these values.
890 The phi's result will be stored in NEW_PHI field of the
891 reduction's data structure. */
892
893static int
894create_phi_for_local_result (void **slot, void *data)
895{
896 struct reduction_info *reduc = *slot;
897 struct loop *loop = data;
898 edge e;
899 tree new_phi;
900 basic_block store_bb;
901 tree local_res;
902
903 /* STORE_BB is the block where the phi
904 should be stored. It is the destination of the loop exit.
905 (Find the fallthru edge from OMP_CONTINUE). */
906 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
907
908 /* STORE_BB has two predecessors. One coming from the loop
909 (the reduction's result is computed at the loop),
910 and another coming from a block preceding the loop,
911 when no iterations
912 are executed (the initial value should be taken). */
913 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
914 e = EDGE_PRED (store_bb, 1);
915 else
916 e = EDGE_PRED (store_bb, 0);
0eb7e7aa 917 local_res = make_ssa_name (SSA_NAME_VAR (GIMPLE_STMT_OPERAND (reduc->reduc_stmt, 0)), NULL_TREE);
a509ebb5
RL
918 new_phi = create_phi_node (local_res, store_bb);
919 SSA_NAME_DEF_STMT (local_res) = new_phi;
920 add_phi_arg (new_phi, reduc->init, e);
921 add_phi_arg (new_phi, GIMPLE_STMT_OPERAND (reduc->reduc_stmt, 0),
922 FALLTHRU_EDGE (loop->latch));
923 reduc->new_phi = new_phi;
924
925 return 1;
926}
5f40b3cb
ZD
927
928struct clsn_data
929{
930 tree store;
931 tree load;
932
933 basic_block store_bb;
934 basic_block load_bb;
935};
936
a509ebb5
RL
937/* Callback for htab_traverse. Create an atomic instruction for the
938 reduction described in SLOT.
939 DATA annotates the place in memory the atomic operation relates to,
940 and the basic block it needs to be generated in. */
941
942static int
943create_call_for_reduction_1 (void **slot, void *data)
944{
945 struct reduction_info *reduc = *slot;
946 struct clsn_data *clsn_data = data;
947 block_stmt_iterator bsi;
948 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
949 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
950 tree load_struct;
951 basic_block bb;
952 basic_block new_bb;
953 edge e;
954 tree t, addr, addr_type, ref, x;
955 tree tmp_load, load, name;
956
957 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
958 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
959 addr_type = build_pointer_type (type);
960
961 addr = build_addr (t, current_function_decl);
962
963 /* Create phi node. */
964 bb = clsn_data->load_bb;
965
966 e = split_block (bb, t);
967 new_bb = e->dest;
968
969 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
970 add_referenced_var (tmp_load);
971 tmp_load = make_ssa_name (tmp_load, NULL);
972 load = build2 (OMP_ATOMIC_LOAD, void_type_node, tmp_load, addr);
973 SSA_NAME_DEF_STMT (tmp_load) = load;
974 bsi = bsi_start (new_bb);
975 bsi_insert_after (&bsi, load, BSI_NEW_STMT);
976
977 e = split_block (new_bb, load);
978 new_bb = e->dest;
979 bsi = bsi_start (new_bb);
980 ref = tmp_load;
981 x =
982 fold_build2 (reduc->reduction_code,
983 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
984 PHI_RESULT (reduc->new_phi));
985
986 name =
987 force_gimple_operand_bsi (&bsi, x, true, NULL_TREE, true,
988 BSI_CONTINUE_LINKING);
989
990 x = build1 (OMP_ATOMIC_STORE, void_type_node, name);
991
992 bsi_insert_after (&bsi, x, BSI_NEW_STMT);
993 return 1;
994}
995
996/* Create the atomic operation at the join point of the threads.
997 REDUCTION_LIST describes the reductions in the LOOP.
998 LD_ST_DATA describes the shared data structure where
999 shared data is stored in and loaded from. */
1000static void
1001create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1002 struct clsn_data *ld_st_data)
1003{
1004 htab_traverse (reduction_list, create_phi_for_local_result, loop);
1005 /* Find the fallthru edge from OMP_CONTINUE. */
1006 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1007 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1008}
1009
ae0bce62
RL
1010/* Callback for htab_traverse. Loads the final reduction value at the
1011 join point of all threads, and inserts it in the right place. */
a509ebb5
RL
1012
1013static int
1014create_loads_for_reductions (void **slot, void *data)
1015{
1016 struct reduction_info *red = *slot;
1017 struct clsn_data *clsn_data = data;
1018 tree stmt;
1019 block_stmt_iterator bsi;
0eb7e7aa 1020 tree type = TREE_TYPE (GIMPLE_STMT_OPERAND (red->reduc_stmt, 0));
a509ebb5
RL
1021 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
1022 tree load_struct;
ae0bce62 1023 tree name;
a509ebb5
RL
1024 tree x;
1025
1026 bsi = bsi_after_labels (clsn_data->load_bb);
1027 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
1028 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1029 NULL_TREE);
a509ebb5 1030
ae0bce62 1031 x = load_struct;
a509ebb5
RL
1032 name = PHI_RESULT (red->keep_res);
1033 stmt = build_gimple_modify_stmt (name, x);
1034 GIMPLE_STMT_OPERAND (stmt, 0) = name;
1035 SSA_NAME_DEF_STMT (name) = stmt;
1036
1037 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1038
1039 remove_phi_node (red->keep_res, NULL_TREE, false);
1040
1041 return 1;
1042}
1043
1044/* Load the reduction result that was stored in LD_ST_DATA.
1045 REDUCTION_LIST describes the list of reductions that the
1046 loades should be generated for. */
1047static void
1048create_final_loads_for_reduction (htab_t reduction_list,
1049 struct clsn_data *ld_st_data)
1050{
1051 block_stmt_iterator bsi;
1052 tree t;
1053
1054 bsi = bsi_after_labels (ld_st_data->load_bb);
1055 t = build_fold_addr_expr (ld_st_data->store);
1056 t =
1057 build_gimple_modify_stmt (ld_st_data->load,
1058 build_fold_addr_expr (ld_st_data->store));
1059
1060 bsi_insert_before (&bsi, t, BSI_NEW_STMT);
1061 SSA_NAME_DEF_STMT (ld_st_data->load) = t;
1062 GIMPLE_STMT_OPERAND (t, 0) = ld_st_data->load;
1063
1064 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1065
1066}
1067
0eb7e7aa
RL
1068/* Callback for htab_traverse. Store the neutral value for the
1069 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1070 1 for MULT_EXPR, etc. into the reduction field.
1071 The reduction is specified in SLOT. The store information is
1072 passed in DATA. */
1073
1074static int
1075create_stores_for_reduction (void **slot, void *data)
1076{
1077 struct reduction_info *red = *slot;
1078 struct clsn_data *clsn_data = data;
1079 tree stmt;
1080 block_stmt_iterator bsi;
1081 tree type = TREE_TYPE (GIMPLE_STMT_OPERAND (red->reduc_stmt, 0));
1082
1083 bsi = bsi_last (clsn_data->store_bb);
1084 stmt =
1085 build_gimple_modify_stmt (build3
1086 (COMPONENT_REF, type, clsn_data->store,
1087 red->field, NULL_TREE),
ae0bce62 1088 red->initial_value);
0eb7e7aa
RL
1089 mark_virtual_ops_for_renaming (stmt);
1090 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1091
1092 return 1;
1093}
1094
a509ebb5
RL
1095/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1096 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1097 specified in SLOT. */
1098
5f40b3cb
ZD
1099static int
1100create_loads_and_stores_for_name (void **slot, void *data)
1101{
1102 struct name_to_copy_elt *elt = *slot;
1103 struct clsn_data *clsn_data = data;
1104 tree stmt;
1105 block_stmt_iterator bsi;
1106 tree type = TREE_TYPE (elt->new_name);
1107 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
1108 tree load_struct;
1109
1110 bsi = bsi_last (clsn_data->store_bb);
a509ebb5
RL
1111 stmt =
1112 build_gimple_modify_stmt (build3
1113 (COMPONENT_REF, type, clsn_data->store,
1114 elt->field, NULL_TREE),
1115 ssa_name (elt->version));
5f40b3cb
ZD
1116 mark_virtual_ops_for_renaming (stmt);
1117 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1118
1119 bsi = bsi_last (clsn_data->load_bb);
1120 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
a509ebb5
RL
1121 stmt = build_gimple_modify_stmt (elt->new_name,
1122 build3 (COMPONENT_REF, type, load_struct,
1123 elt->field, NULL_TREE));
5f40b3cb
ZD
1124 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1125 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1126
1127 return 1;
1128}
1129
1130/* Moves all the variables used in LOOP and defined outside of it (including
1131 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1132 name) to a structure created for this purpose. The code
1133
1134 while (1)
1135 {
1136 use (a);
1137 use (b);
1138 }
1139
1140 is transformed this way:
1141
1142 bb0:
1143 old.a = a;
1144 old.b = b;
1145
1146 bb1:
1147 a' = new->a;
1148 b' = new->b;
1149 while (1)
1150 {
1151 use (a');
1152 use (b');
1153 }
1154
1155 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1156 pointer `new' is intentionally not initialized (the loop will be split to a
1157 separate function later, and `new' will be initialized from its arguments).
a509ebb5
RL
1158 LD_ST_DATA holds information about the shared data structure used to pass
1159 information among the threads. It is initialized here, and
1160 gen_parallel_loop will pass it to create_call_for_reduction that
1161 needs this information. REDUCTION_LIST describes the reductions
1162 in LOOP. */
5f40b3cb
ZD
1163
1164static void
9f9f72aa
AP
1165separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
1166 tree *arg_struct, tree *new_arg_struct,
1167 struct clsn_data *ld_st_data)
a509ebb5 1168
5f40b3cb 1169{
9f9f72aa 1170 basic_block bb1 = split_edge (entry);
5f40b3cb
ZD
1171 basic_block bb0 = single_pred (bb1);
1172 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1173 name_to_copy_elt_eq, free);
1174 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1175 free);
5f40b3cb
ZD
1176 unsigned i;
1177 tree phi, type, type_name, nvar;
1178 block_stmt_iterator bsi;
1179 struct clsn_data clsn_data;
9f9f72aa
AP
1180 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1181 basic_block bb;
1182 basic_block entry_bb = bb1;
1183 basic_block exit_bb = exit->dest;
5f40b3cb 1184
9f9f72aa
AP
1185 entry = single_succ_edge(entry_bb);
1186 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1187
9f9f72aa
AP
1188 for (i = 0; VEC_iterate (basic_block, body, i, bb); i++)
1189 {
1190 if (bb != entry_bb && bb != exit_bb)
1191 {
1192 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1193 separate_decls_in_region_stmt (entry, exit, phi, name_copies,
1194 decl_copies);
1195
1196 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1197 separate_decls_in_region_stmt (entry, exit, bsi_stmt (bsi),
1198 name_copies, decl_copies);
1199 }
5f40b3cb 1200 }
9f9f72aa
AP
1201
1202 VEC_free (basic_block, heap, body);
5f40b3cb
ZD
1203
1204 if (htab_elements (name_copies) == 0)
1205 {
1206 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1207 loop carried and external variables in the loop). */
5f40b3cb
ZD
1208 *arg_struct = NULL;
1209 *new_arg_struct = NULL;
1210 }
1211 else
1212 {
1213 /* Create the type for the structure to store the ssa names to. */
1214 type = lang_hooks.types.make_type (RECORD_TYPE);
1215 type_name = build_decl (TYPE_DECL, create_tmp_var_name (".paral_data"),
1216 type);
1217 TYPE_NAME (type) = type_name;
1218
0eb7e7aa 1219 htab_traverse (name_copies, add_field_for_name, type);
9f9f72aa 1220 if (reduction_list && htab_elements (reduction_list) > 0)
0eb7e7aa
RL
1221 {
1222 /* Create the fields for reductions. */
1223 htab_traverse (reduction_list, add_field_for_reduction,
1224 type);
1225 }
5f40b3cb 1226 layout_type (type);
0eb7e7aa 1227
5f40b3cb
ZD
1228 /* Create the loads and stores. */
1229 *arg_struct = create_tmp_var (type, ".paral_data_store");
1230 add_referenced_var (*arg_struct);
1231 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1232 add_referenced_var (nvar);
1233 *new_arg_struct = make_ssa_name (nvar, NULL_TREE);
1234
a509ebb5
RL
1235 ld_st_data->store = *arg_struct;
1236 ld_st_data->load = *new_arg_struct;
1237 ld_st_data->store_bb = bb0;
1238 ld_st_data->load_bb = bb1;
0eb7e7aa 1239
5f40b3cb 1240 htab_traverse (name_copies, create_loads_and_stores_for_name,
a509ebb5
RL
1241 ld_st_data);
1242
ae0bce62
RL
1243 /* Load the calculation from memory (after the join of the threads). */
1244
9f9f72aa 1245 if (reduction_list && htab_elements (reduction_list) > 0)
a509ebb5 1246 {
0eb7e7aa
RL
1247 htab_traverse (reduction_list, create_stores_for_reduction,
1248 ld_st_data);
a509ebb5 1249 clsn_data.load = make_ssa_name (nvar, NULL_TREE);
9f9f72aa 1250 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1251 clsn_data.store = ld_st_data->store;
1252 create_final_loads_for_reduction (reduction_list, &clsn_data);
1253 }
5f40b3cb
ZD
1254 }
1255
1256 htab_delete (decl_copies);
1257 htab_delete (name_copies);
1258}
1259
1260/* Bitmap containing uids of functions created by parallelization. We cannot
1261 allocate it from the default obstack, as it must live across compilation
1262 of several functions; we make it gc allocated instead. */
1263
1264static GTY(()) bitmap parallelized_functions;
1265
1266/* Returns true if FN was created by create_loop_fn. */
1267
1268static bool
1269parallelized_function_p (tree fn)
1270{
1271 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1272 return false;
1273
1274 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1275}
1276
1277/* Creates and returns an empty function that will receive the body of
1278 a parallelized loop. */
1279
1280static tree
1281create_loop_fn (void)
1282{
1283 char buf[100];
1284 char *tname;
1285 tree decl, type, name, t;
1286 struct function *act_cfun = cfun;
1287 static unsigned loopfn_num;
1288
1289 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1290 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1291 clean_symbol_name (tname);
1292 name = get_identifier (tname);
1293 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1294
1295 decl = build_decl (FUNCTION_DECL, name, type);
1296 if (!parallelized_functions)
1297 parallelized_functions = BITMAP_GGC_ALLOC ();
1298 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1299
1300 TREE_STATIC (decl) = 1;
1301 TREE_USED (decl) = 1;
1302 DECL_ARTIFICIAL (decl) = 1;
1303 DECL_IGNORED_P (decl) = 0;
1304 TREE_PUBLIC (decl) = 0;
1305 DECL_UNINLINABLE (decl) = 1;
1306 DECL_EXTERNAL (decl) = 0;
1307 DECL_CONTEXT (decl) = NULL_TREE;
1308 DECL_INITIAL (decl) = make_node (BLOCK);
1309
1310 t = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1311 DECL_ARTIFICIAL (t) = 1;
1312 DECL_IGNORED_P (t) = 1;
1313 DECL_RESULT (decl) = t;
1314
1315 t = build_decl (PARM_DECL, get_identifier (".paral_data_param"),
1316 ptr_type_node);
1317 DECL_ARTIFICIAL (t) = 1;
1318 DECL_ARG_TYPE (t) = ptr_type_node;
1319 DECL_CONTEXT (t) = decl;
1320 TREE_USED (t) = 1;
1321 DECL_ARGUMENTS (decl) = t;
1322
182e0d71 1323 allocate_struct_function (decl, false);
5f40b3cb
ZD
1324
1325 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1326 it. */
5576d6f2 1327 set_cfun (act_cfun);
5f40b3cb
ZD
1328
1329 return decl;
1330}
1331
1332/* Bases all the induction variables in LOOP on a single induction variable
1333 (unsigned with base 0 and step 1), whose final value is compared with
a509ebb5
RL
1334 NIT. The induction variable is incremented in the loop latch.
1335 REDUCTION_LIST describes the reductions in LOOP. */
5f40b3cb
ZD
1336
1337static void
a509ebb5 1338canonicalize_loop_ivs (struct loop *loop, htab_t reduction_list, tree nit)
5f40b3cb
ZD
1339{
1340 unsigned precision = TYPE_PRECISION (TREE_TYPE (nit));
36ad7922 1341 tree phi, prev, res, type, var_before, val, atype, mtype, t, next;
5f40b3cb
ZD
1342 block_stmt_iterator bsi;
1343 bool ok;
1344 affine_iv iv;
1345 edge exit = single_dom_exit (loop);
a509ebb5 1346 struct reduction_info *red;
5f40b3cb
ZD
1347
1348 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1349 {
1350 res = PHI_RESULT (phi);
1351
a509ebb5 1352 if (is_gimple_reg (res) && TYPE_PRECISION (TREE_TYPE (res)) > precision)
5f40b3cb
ZD
1353 precision = TYPE_PRECISION (TREE_TYPE (res));
1354 }
1355
1356 type = lang_hooks.types.type_for_size (precision, 1);
1357
1358 bsi = bsi_last (loop->latch);
1359 create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE,
1360 loop, &bsi, true, &var_before, NULL);
1361
1362 bsi = bsi_after_labels (loop->header);
1363 prev = NULL;
1364 for (phi = phi_nodes (loop->header); phi; phi = next)
1365 {
1366 next = PHI_CHAIN (phi);
1367 res = PHI_RESULT (phi);
1368
a509ebb5 1369 if (!is_gimple_reg (res) || res == var_before)
5f40b3cb
ZD
1370 {
1371 prev = phi;
1372 continue;
1373 }
5f40b3cb 1374
a509ebb5
RL
1375 ok = simple_iv (loop, phi, res, &iv, true);
1376 red = reduction_phi (reduction_list, phi);
1377 /* We preserve the reduction phi nodes. */
1378 if (!ok && red)
1379 {
1380 prev = phi;
1381 continue;
1382 }
1383 else
1384 gcc_assert (ok);
5f40b3cb
ZD
1385 remove_phi_node (phi, prev, false);
1386
1387 atype = TREE_TYPE (res);
36ad7922
JJ
1388 mtype = POINTER_TYPE_P (atype) ? sizetype : atype;
1389 val = fold_build2 (MULT_EXPR, mtype, unshare_expr (iv.step),
1390 fold_convert (mtype, var_before));
1391 val = fold_build2 (POINTER_TYPE_P (atype)
1392 ? POINTER_PLUS_EXPR : PLUS_EXPR,
1393 atype, unshare_expr (iv.base), val);
5f40b3cb
ZD
1394 val = force_gimple_operand_bsi (&bsi, val, false, NULL_TREE, true,
1395 BSI_SAME_STMT);
1396 t = build_gimple_modify_stmt (res, val);
1397 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
1398 SSA_NAME_DEF_STMT (res) = t;
1399 }
1400
1401 t = last_stmt (exit->src);
1402 /* Make the loop exit if the control condition is not satisfied. */
1403 if (exit->flags & EDGE_TRUE_VALUE)
1404 {
1405 edge te, fe;
1406
1407 extract_true_false_edges_from_block (exit->src, &te, &fe);
1408 te->flags = EDGE_FALSE_VALUE;
1409 fe->flags = EDGE_TRUE_VALUE;
1410 }
1411 COND_EXPR_COND (t) = build2 (LT_EXPR, boolean_type_node, var_before, nit);
1412}
1413
1414/* Moves the exit condition of LOOP to the beginning of its header, and
1415 duplicates the part of the last iteration that gets disabled to the
1416 exit of the loop. NIT is the number of iterations of the loop
1417 (used to initialize the variables in the duplicated part).
1418
1419 TODO: the common case is that latch of the loop is empty and immediatelly
1420 follows the loop exit. In this case, it would be better not to copy the
1421 body of the loop, but only move the entry of the loop directly before the
1422 exit check and increase the number of iterations of the loop by one.
a509ebb5
RL
1423 This may need some additional preconditioning in case NIT = ~0.
1424 REDUCTION_LIST describes the reductions in LOOP. */
5f40b3cb
ZD
1425
1426static void
a509ebb5 1427transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
5f40b3cb
ZD
1428{
1429 basic_block *bbs, *nbbs, ex_bb, orig_header;
1430 unsigned n;
1431 bool ok;
1432 edge exit = single_dom_exit (loop), hpred;
1433 tree phi, nphi, cond, control, control_name, res, t, cond_stmt;
1434 block_stmt_iterator bsi;
1435
1436 split_block_after_labels (loop->header);
1437 orig_header = single_succ (loop->header);
1438 hpred = single_succ_edge (loop->header);
1439
1440 cond_stmt = last_stmt (exit->src);
1441 cond = COND_EXPR_COND (cond_stmt);
1442 control = TREE_OPERAND (cond, 0);
1443 gcc_assert (TREE_OPERAND (cond, 1) == nit);
1444
1445 /* Make sure that we have phi nodes on exit for all loop header phis
1446 (create_parallel_loop requires that). */
1447 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1448 {
1449 res = PHI_RESULT (phi);
1450 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1451 SET_PHI_RESULT (phi, t);
1452
1453 nphi = create_phi_node (res, orig_header);
1454 SSA_NAME_DEF_STMT (res) = nphi;
1455 add_phi_arg (nphi, t, hpred);
1456
1457 if (res == control)
1458 {
1459 TREE_OPERAND (cond, 0) = t;
1460 update_stmt (cond_stmt);
1461 control = t;
1462 }
1463 }
1464
1465 bbs = get_loop_body_in_dom_order (loop);
1466 for (n = 0; bbs[n] != exit->src; n++)
1467 continue;
1468 nbbs = XNEWVEC (basic_block, n);
1469 ok = tree_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1470 bbs + 1, n, nbbs);
1471 gcc_assert (ok);
1472 free (bbs);
1473 ex_bb = nbbs[0];
1474 free (nbbs);
1475
a509ebb5
RL
1476 /* Other than reductions, the only gimple reg that should be copied
1477 out of the loop is the control variable. */
1478
5f40b3cb
ZD
1479 control_name = NULL_TREE;
1480 for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi))
1481 {
1482 res = PHI_RESULT (phi);
1483 if (!is_gimple_reg (res))
1484 continue;
1485
a509ebb5
RL
1486 /* Check if it is a part of reduction. If it is,
1487 keep the phi at the reduction's keep_res field. The
1488 PHI_RESULT of this phi is the resulting value of the reduction
1489 variable when exiting the loop. */
1490
1491 exit = single_dom_exit (loop);
1492
1493 if (htab_elements (reduction_list) > 0)
1494 {
1495 struct reduction_info *red;
1496
1497 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1498
1499 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1500 if (red)
1501 red->keep_res = phi;
1502 }
1503 else
1504 gcc_assert (control_name == NULL_TREE
1505 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb
ZD
1506 control_name = res;
1507 }
1508 gcc_assert (control_name != NULL_TREE);
1509 phi = SSA_NAME_DEF_STMT (control_name);
1510 remove_phi_node (phi, NULL_TREE, false);
1511
1512 /* Initialize the control variable to NIT. */
1513 bsi = bsi_after_labels (ex_bb);
29ac1d9a
ZD
1514 nit = force_gimple_operand_bsi (&bsi,
1515 fold_convert (TREE_TYPE (control_name), nit),
1516 false, NULL_TREE, false, BSI_SAME_STMT);
5f40b3cb
ZD
1517 t = build_gimple_modify_stmt (control_name, nit);
1518 bsi_insert_before (&bsi, t, BSI_NEW_STMT);
1519 SSA_NAME_DEF_STMT (control_name) = t;
1520}
1521
1522/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1523 LOOP_FN and DATA are the arguments of OMP_PARALLEL.
1524 NEW_DATA is the variable that should be initialized from the argument
1525 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1526 basic block containing OMP_PARALLEL tree. */
1527
1528static basic_block
1529create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1530 tree new_data, unsigned n_threads)
1531{
1532 block_stmt_iterator bsi;
1533 basic_block bb, paral_bb, for_bb, ex_bb;
1534 tree t, param, res, for_stmt;
1535 tree cvar, cvar_init, initvar, cvar_next, cvar_base, cond, phi, type;
1536 edge exit, nexit, guard, end, e;
1537
1538 /* Prepare the OMP_PARALLEL statement. */
1539 bb = loop_preheader_edge (loop)->src;
1540 paral_bb = single_pred (bb);
1541 bsi = bsi_last (paral_bb);
1542
1543 t = build_omp_clause (OMP_CLAUSE_NUM_THREADS);
1544 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5
RL
1545 = build_int_cst (integer_type_node, n_threads);
1546 t = build4 (OMP_PARALLEL, void_type_node, NULL_TREE, t, loop_fn, data);
5f40b3cb
ZD
1547
1548 bsi_insert_after (&bsi, t, BSI_NEW_STMT);
1549
1550 /* Initialize NEW_DATA. */
1551 if (data)
1552 {
1553 bsi = bsi_after_labels (bb);
1554
1555 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL_TREE);
1556 t = build_gimple_modify_stmt (param, build_fold_addr_expr (data));
1557 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
1558 SSA_NAME_DEF_STMT (param) = t;
1559
1560 t = build_gimple_modify_stmt (new_data,
a509ebb5
RL
1561 fold_convert (TREE_TYPE (new_data),
1562 param));
5f40b3cb
ZD
1563 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
1564 SSA_NAME_DEF_STMT (new_data) = t;
1565 }
1566
1567 /* Emit OMP_RETURN for OMP_PARALLEL. */
1568 bb = split_loop_exit_edge (single_dom_exit (loop));
1569 bsi = bsi_last (bb);
1570 bsi_insert_after (&bsi, make_node (OMP_RETURN), BSI_NEW_STMT);
1571
1572 /* Extract data for OMP_FOR. */
1573 gcc_assert (loop->header == single_dom_exit (loop)->src);
1574 cond = COND_EXPR_COND (last_stmt (loop->header));
1575
1576 cvar = TREE_OPERAND (cond, 0);
1577 cvar_base = SSA_NAME_VAR (cvar);
1578 phi = SSA_NAME_DEF_STMT (cvar);
1579 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1580 initvar = make_ssa_name (cvar_base, NULL_TREE);
1581 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1582 initvar);
1583 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1584
1585 bsi = bsi_last (loop->latch);
1586 gcc_assert (bsi_stmt (bsi) == SSA_NAME_DEF_STMT (cvar_next));
1587 bsi_remove (&bsi, true);
1588
1589 /* Prepare cfg. */
1590 for_bb = split_edge (loop_preheader_edge (loop));
1591 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1592 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1593 gcc_assert (exit == single_dom_exit (loop));
1594
1595 guard = make_edge (for_bb, ex_bb, 0);
1596 single_succ_edge (loop->latch)->flags = 0;
1597 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1598 for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi))
1599 {
1600 res = PHI_RESULT (phi);
1601 gcc_assert (!is_gimple_reg (phi));
1602 t = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1603 add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_preheader_edge (loop)),
1604 guard);
1605 add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_latch_edge (loop)),
1606 end);
1607 }
1608 e = redirect_edge_and_branch (exit, nexit->dest);
1609 PENDING_STMT (e) = NULL;
1610
1611 /* Emit OMP_FOR. */
1612 TREE_OPERAND (cond, 0) = cvar_base;
1613 type = TREE_TYPE (cvar);
1614 t = build_omp_clause (OMP_CLAUSE_SCHEDULE);
1615 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1616
1617 for_stmt = make_node (OMP_FOR);
1618 TREE_TYPE (for_stmt) = void_type_node;
1619 OMP_FOR_CLAUSES (for_stmt) = t;
1620 OMP_FOR_INIT (for_stmt) = build_gimple_modify_stmt (initvar, cvar_init);
1621 OMP_FOR_COND (for_stmt) = cond;
a509ebb5
RL
1622 OMP_FOR_INCR (for_stmt) = build_gimple_modify_stmt (cvar_base,
1623 build2 (PLUS_EXPR, type,
1624 cvar_base,
1625 build_int_cst
1626 (type, 1)));
5f40b3cb
ZD
1627 OMP_FOR_BODY (for_stmt) = NULL_TREE;
1628 OMP_FOR_PRE_BODY (for_stmt) = NULL_TREE;
1629
1630 bsi = bsi_last (for_bb);
1631 bsi_insert_after (&bsi, for_stmt, BSI_NEW_STMT);
1632 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1633
1634 /* Emit OMP_CONTINUE. */
1635 bsi = bsi_last (loop->latch);
1636 t = build2 (OMP_CONTINUE, void_type_node, cvar_next, cvar);
1637 bsi_insert_after (&bsi, t, BSI_NEW_STMT);
1638 SSA_NAME_DEF_STMT (cvar_next) = t;
1639
1640 /* Emit OMP_RETURN for OMP_FOR. */
1641 bsi = bsi_last (ex_bb);
4218dc2d
JJ
1642 t = make_node (OMP_RETURN);
1643 OMP_RETURN_NOWAIT (t) = 1;
1644 bsi_insert_after (&bsi, t, BSI_NEW_STMT);
5f40b3cb
ZD
1645
1646 return paral_bb;
1647}
1648
1649/* Generates code to execute the iterations of LOOP in N_THREADS threads in
a509ebb5
RL
1650 parallel. NITER describes number of iterations of LOOP.
1651 REDUCTION_LIST describes the reductions existant in the LOOP. */
5f40b3cb
ZD
1652
1653static void
a509ebb5
RL
1654gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1655 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb
ZD
1656{
1657 struct loop *nloop;
9326236d 1658 loop_iterator li;
5f40b3cb
ZD
1659 tree many_iterations_cond, type, nit;
1660 tree stmts, arg_struct, new_arg_struct;
1661 basic_block parallel_head;
9f9f72aa 1662 edge entry, exit;
a509ebb5 1663 struct clsn_data clsn_data;
5f40b3cb
ZD
1664 unsigned prob;
1665
1666 /* From
1667
1668 ---------------------------------------------------------------------
1669 loop
1670 {
1671 IV = phi (INIT, IV + STEP)
1672 BODY1;
1673 if (COND)
1674 break;
1675 BODY2;
1676 }
1677 ---------------------------------------------------------------------
1678
1679 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1680 we generate the following code:
1681
1682 ---------------------------------------------------------------------
1683
1684 if (MAY_BE_ZERO
a509ebb5
RL
1685 || NITER < MIN_PER_THREAD * N_THREADS)
1686 goto original;
5f40b3cb
ZD
1687
1688 BODY1;
1689 store all local loop-invariant variables used in body of the loop to DATA.
1690 OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1691 load the variables from DATA.
1692 OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1693 BODY2;
1694 BODY1;
1695 OMP_CONTINUE;
a509ebb5
RL
1696 OMP_RETURN -- OMP_FOR
1697 OMP_RETURN -- OMP_PARALLEL
5f40b3cb
ZD
1698 goto end;
1699
1700 original:
1701 loop
1702 {
1703 IV = phi (INIT, IV + STEP)
1704 BODY1;
1705 if (COND)
1706 break;
1707 BODY2;
1708 }
1709
1710 end:
1711
1712 */
1713
1714 /* Create two versions of the loop -- in the old one, we know that the
1715 number of iterations is large enough, and we will transform it into the
1716 loop that will be split to loop_fn, the new one will be used for the
1717 remaining iterations. */
a509ebb5 1718
5f40b3cb
ZD
1719 type = TREE_TYPE (niter->niter);
1720 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1721 NULL_TREE);
1722 if (stmts)
1723 bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
1724
1725 many_iterations_cond =
a509ebb5
RL
1726 fold_build2 (GE_EXPR, boolean_type_node,
1727 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
5f40b3cb 1728 many_iterations_cond
a509ebb5
RL
1729 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1730 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1731 many_iterations_cond);
5f40b3cb 1732 many_iterations_cond
a509ebb5 1733 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb
ZD
1734 if (stmts)
1735 bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
1736 if (!is_gimple_condexpr (many_iterations_cond))
1737 {
1738 many_iterations_cond
a509ebb5
RL
1739 = force_gimple_operand (many_iterations_cond, &stmts,
1740 true, NULL_TREE);
5f40b3cb
ZD
1741 if (stmts)
1742 bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
1743 }
1744
1745 initialize_original_copy_tables ();
1746
1747 /* We assume that the loop usually iterates a lot. */
1748 prob = 4 * REG_BR_PROB_BASE / 5;
1749 nloop = loop_version (loop, many_iterations_cond, NULL,
1750 prob, prob, REG_BR_PROB_BASE - prob, true);
1751 update_ssa (TODO_update_ssa);
1752 free_original_copy_tables ();
1753
1754 /* Base all the induction variables in LOOP on a single control one. */
a509ebb5 1755 canonicalize_loop_ivs (loop, reduction_list, nit);
5f40b3cb
ZD
1756
1757 /* Ensure that the exit condition is the first statement in the loop. */
a509ebb5
RL
1758 transform_to_exit_first_loop (loop, reduction_list, nit);
1759
a509ebb5 1760 /* Generate intializations for reductions. */
a509ebb5
RL
1761 if (htab_elements (reduction_list) > 0)
1762 htab_traverse (reduction_list, initialize_reductions, loop);
5f40b3cb
ZD
1763
1764 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
1765 gcc_assert (single_exit (loop));
1766 entry = loop_preheader_edge (loop);
1767 exit = single_dom_exit (loop);
5f40b3cb 1768
9f9f72aa 1769 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
1770 /* In the old loop, move all variables non-local to the loop to a structure
1771 and back, and create separate decls for the variables used in loop. */
9f9f72aa
AP
1772 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
1773 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
1774
1775 /* Create the parallel constructs. */
1776 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1777 new_arg_struct, n_threads);
a509ebb5
RL
1778 if (htab_elements (reduction_list) > 0)
1779 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
1780
1781 scev_reset ();
1782
1783 /* Cancel the loop (it is simpler to do it here rather than to teach the
1784 expander to do it). */
1785 cancel_loop_tree (loop);
1786
92a6bdbd
SP
1787 /* Free loop bound estimations that could contain references to
1788 removed statements. */
1789 FOR_EACH_LOOP (li, loop, 0)
1790 free_numbers_of_iterations_estimates_loop (loop);
1791
5f40b3cb
ZD
1792 /* Expand the parallel constructs. We do it directly here instead of running
1793 a separate expand_omp pass, since it is more efficient, and less likely to
1794 cause troubles with further analyses not being able to deal with the
1795 OMP trees. */
a509ebb5 1796
5f40b3cb
ZD
1797 omp_expand_local (parallel_head);
1798}
1799
9857228c
SP
1800/* Returns true when LOOP contains vector phi nodes. */
1801
1802static bool
1803loop_has_vector_phi_nodes (struct loop *loop)
1804{
1805 unsigned i;
1806 basic_block *bbs = get_loop_body_in_dom_order (loop);
1807 bool res = true;
1808 tree phi;
1809
1810 for (i = 0; i < loop->num_nodes; i++)
1811 for (phi = phi_nodes (bbs[i]); phi; phi = PHI_CHAIN (phi))
1812 if (TREE_CODE (TREE_TYPE (PHI_RESULT (phi))) == VECTOR_TYPE)
1813 goto end;
1814
1815 res = false;
1816 end:
1817 free (bbs);
1818 return res;
1819}
1820
5f40b3cb
ZD
1821/* Detect parallel loops and generate parallel code using libgomp
1822 primitives. Returns true if some loop was parallelized, false
1823 otherwise. */
1824
1825bool
1826parallelize_loops (void)
1827{
1828 unsigned n_threads = flag_tree_parallelize_loops;
1829 bool changed = false;
1830 struct loop *loop;
1831 struct tree_niter_desc niter_desc;
1832 loop_iterator li;
a509ebb5 1833 htab_t reduction_list;
5f40b3cb
ZD
1834
1835 /* Do not parallelize loops in the functions created by parallelization. */
1836 if (parallelized_function_p (cfun->decl))
1837 return false;
1838
a509ebb5
RL
1839 reduction_list = htab_create (10, reduction_info_hash,
1840 reduction_info_eq, free);
1841
5f40b3cb
ZD
1842 FOR_EACH_LOOP (li, loop, 0)
1843 {
a509ebb5 1844 htab_empty (reduction_list);
5f40b3cb
ZD
1845 if (/* Do not bother with loops in cold areas. */
1846 !maybe_hot_bb_p (loop->header)
1847 /* Or loops that roll too little. */
1848 || expected_loop_iterations (loop) <= n_threads
1849 /* And of course, the loop must be parallelizable. */
1850 || !can_duplicate_loop_p (loop)
1d4af1e8 1851 || loop_has_blocks_with_irreducible_flag (loop)
9857228c
SP
1852 /* FIXME: the check for vector phi nodes could be removed. */
1853 || loop_has_vector_phi_nodes (loop)
a509ebb5 1854 || !loop_parallel_p (loop, reduction_list, &niter_desc))
5f40b3cb
ZD
1855 continue;
1856
1857 changed = true;
a509ebb5 1858 gen_parallel_loop (loop, reduction_list, n_threads, &niter_desc);
5f40b3cb
ZD
1859 verify_flow_info ();
1860 verify_dominators (CDI_DOMINATORS);
1861 verify_loop_structure ();
1862 verify_loop_closed_ssa ();
1863 }
1864
a509ebb5 1865 htab_delete (reduction_list);
5f40b3cb
ZD
1866 return changed;
1867}
1868
1869#include "gt-tree-parloops.h"