]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
re PR target/33168 (GCC Boot failure, building libstc++)
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb
ZD
1/* Loop autoparallelization.
2 Copyright (C) 2006 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr> and
4 Zdenek Dvorak <dvorakz@suse.cz>.
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 2, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING. If not, write to the Free
20Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
2102110-1301, USA. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "tree.h"
28#include "rtl.h"
29#include "tree-flow.h"
30#include "cfgloop.h"
31#include "ggc.h"
32#include "tree-data-ref.h"
33#include "diagnostic.h"
34#include "tree-pass.h"
35#include "tree-scalar-evolution.h"
36#include "hashtab.h"
37#include "langhooks.h"
a509ebb5 38#include "tree-vectorizer.h"
5f40b3cb
ZD
39
40/* This pass tries to distribute iterations of loops into several threads.
41 The implementation is straightforward -- for each loop we test whether its
42 iterations are independent, and if it is the case (and some additional
43 conditions regarding profitability and correctness are satisfied), we
44 add OMP_PARALLEL and OMP_FOR codes and let omp expansion machinery do
45 its job.
46
47 The most of the complexity is in bringing the code into shape expected
48 by the omp expanders:
49 -- for OMP_FOR, ensuring that the loop has only one induction variable
50 and that the exit test is at the start of the loop body
51 -- for OMP_PARALLEL, replacing the references to local addressable
52 variables by accesses through pointers, and breaking up ssa chains
53 by storing the values incoming to the parallelized loop to a structure
54 passed to the new function as an argument (something similar is done
55 in omp gimplification, unfortunately only a small part of the code
56 can be shared).
57
58 TODO:
59 -- if there are several parallelizable loops in a function, it may be
60 possible to generate the threads just once (using synchronization to
61 ensure that cross-loop dependences are obeyed).
62 -- handling of common scalar dependence patterns (accumulation, ...)
63 -- handling of non-innermost loops */
64
a509ebb5
RL
65/*
66 Reduction handling:
67 currently we use vect_is_simple_reduction() to detect reduction patterns.
68 The code transformation will be introduced by an example.
69
70 source code:
71
72parloop
73{
74 int sum=1;
75
76 for (i = 0; i < N/1000; i++)
77 {
78 x[i] = i + 3;
79 sum+=x[i];
80 }
81}
82
83gimple code:
84
85header_bb:
86
87 # sum_24 = PHI <sum_14(3), 1(2)>;
88 # i_21 = PHI <i_15(3), 0(2)>;
89<L0>:;
90 D.2191_10 = i_21 + 3;
91 x[i_21] = D.2191_10;
92 sum_14 = D.2191_10 + sum_24;
93 i_15 = i_21 + 1;
94 if (N_8 > i_15) goto <L0>; else goto <L2>;
95
96exit_bb:
97
98 # sum_25 = PHI <sum_14(3)>;
99<L2>:;
100
101
102after reduction transformation (only relevant parts):
103
104parloop
105{
106
107....
108
109<L16>:;
110 D.2241_2 = (unsigned int) N_8;
111 D.2242_26 = D.2241_2 - 1;
112 if (D.2242_26 > 399) goto <L26>; else goto <L27>;
113
114#two new variables are created for each reduction:
115"reduction" is the variable holding the neutral element
116for the particular operation, e.g. 0 for PLUS_EXPR,
1171 for MULT_EXPR, etc.
118"reduction_initial" is the initial value given by the user.
119It is kept and will be used after the parallel computing
120is done.#
121
122<L26>:;
123 reduction.38_42 = 0;
124 reduction_initial.39_43 = 1;
125 x.40_44 = &x;
126 .paral_data_store.47.D.2261 = D.2242_26;
127 .paral_data_store.47.reduction.38 = reduction.38_42;
128 .paral_data_store.47.x.40 = x.40_44;
129 __builtin_GOMP_parallel_start (parloop._loopfn.0, &.paral_data_store.47, 4);
130 parloop._loopfn.0 (&.paral_data_store.47);
131 __builtin_GOMP_parallel_end ();
132
133# collecting the result after the join of the threads is done at
134 create_loads_for_reductions().
135 a new variable "reduction_final" is created. It calculates the
136 final value from the initial value and the value computed by
137 the threads. #
138
139 .paral_data_load.48_49 = &.paral_data_store.47;
140 reduction_final.49_50 = .paral_data_load.48_49->reduction.38;
141 reduction_final.49_51 = reduction_initial.39_43 + reduction_final.49_50;
142 ivtmp.37_36 = D.2242_26;
143 i_37 = (int) ivtmp.37_36;
144 D.2191_38 = i_37 + 3;
145 x[i_37] = D.2191_38;
146 sum_40 = D.2191_38 + reduction_final.49_51;
147 i_41 = i_37 + 1;
148 goto <bb 8> (<L2>);
149
150 # sum_25 = PHI <sum_40(4), sum_9(6)>;
151<L2>:;
152 printf (&"sum is %d\n"[0], sum_25);
153
154...
155
156}
157
158parloop._loopfn.0 (.paral_data_param)
159{
160 ...
161
162<L28>:;
163 .paral_data_param_52 = .paral_data_param_75;
164 .paral_data_load.48_48 = (struct .paral_data.46 *) .paral_data_param_52;
165 D.2289_46 = .paral_data_load.48_48->D.2261;
166 reduction.43_45 = .paral_data_load.48_48->reduction.38;
167 x.45_47 = .paral_data_load.48_48->x.40;
168 # SUCC: 23 [100.0%] (fallthru)
169
170 # BLOCK 23
171 # PRED: 21 [100.0%] (fallthru)
172<L30>:;
173 D.2292_60 = __builtin_omp_get_num_threads ();
174 D.2293_61 = (unsigned int) D.2292_60;
175 D.2294_62 = __builtin_omp_get_thread_num ();
176 D.2295_63 = (unsigned int) D.2294_62;
177 D.2296_64 = D.2289_46 / D.2293_61;
178 D.2297_65 = D.2293_61 * D.2296_64;
179 D.2298_66 = D.2297_65 != D.2289_46;
180 D.2299_67 = D.2296_64 + D.2298_66;
181 D.2300_68 = D.2299_67 * D.2295_63;
182 D.2301_69 = D.2299_67 + D.2300_68;
183 D.2302_70 = MIN_EXPR <D.2301_69, D.2289_46>;
184 ivtmp.41_54 = D.2300_68;
185 if (D.2300_68 >= D.2302_70) goto <L31>; else goto <L32>;
186 # SUCC: 26 [100.0%] (false) 24 (true)
187
188 # BLOCK 26
189 # PRED: 23 [100.0%] (false)
190<L32>:;
191 # SUCC: 4 [100.0%] (fallthru)
192
193 # BLOCK 4
194 # PRED: 5 [100.0%] (true) 26 [100.0%] (fallthru)
195 # ivtmp.41_31 = PHI <ivtmp.41_30(5), ivtmp.41_54(26)>;
196 # sum.42_32 = PHI <sum.42_14(5), reduction.43_45(26)>;
197<L0>:;
198 # SUCC: 19 [100.0%] (fallthru)
199
200 # BLOCK 19
201 # PRED: 4 [100.0%] (fallthru)
202 # sum.42_24 = PHI <sum.42_32(4)>;
203 # ivtmp.41_17 = PHI <ivtmp.41_31(4)>;
204 i.44_21 = (int) ivtmp.41_17;
205 D.2310_10 = i.44_21 + 3;
206 (*x.45_47)[i.44_21] = D.2310_10;
207 sum.42_14 = D.2310_10 + sum.42_24;
208 i.44_15 = i.44_21 + 1;
209 # SUCC: 5 [100.0%] (fallthru)
210
211 # BLOCK 5
212 # PRED: 19 [100.0%] (fallthru)
213<L17>:;
214 ivtmp.41_30 = ivtmp.41_31 + 1;
215 if (ivtmp.41_30 < D.2302_70) goto <L0>; else goto <L31>;
216 # SUCC: 4 [100.0%] (true) 24 (false)
217
218 # Adding this reduction phi is done at
219 create_phi_for_local_result() #
220
221 # BLOCK 24
222 # PRED: 5 (false) 23 (true)
223 # reduction.38_56 = PHI <sum.42_14(5), 0(23)>;
224 <L31>:;
225 __builtin_GOMP_barrier ();
226 # SUCC: 25 [100.0%] (fallthru)
227
228 # Creating the atomic operation is
229 done at create_call_for_reduction_1() #
230
231 # BLOCK 25
232 # PRED: 24 [100.0%] (fallthru)
233 D.2306_57 = &.paral_data_load.48_48->reduction.38;
234 D.2307_58 = (unsigned int) reduction.38_56;
235 D.2308_59 = __sync_fetch_and_add_4 (D.2306_57, D.2307_58);
236 # SUCC: 22 [100.0%] (fallthru)
237
238 # BLOCK 22
239 # PRED: 25 [100.0%] (fallthru)
240 <L29>:;
241 return;
242 # SUCC: EXIT
243
244}
245
246*/
247
5f40b3cb
ZD
248/* Minimal number of iterations of a loop that should be executed in each
249 thread. */
250#define MIN_PER_THREAD 100
251
a509ebb5
RL
252/* Element of the hashtable, representing a
253 reduction in the current loop. */
254struct reduction_info
255{
256 tree reduc_stmt; /* reduction statement. */
257 tree reduc_phi; /* The phi node defining the reduction. */
258 enum tree_code reduction_code; /* code for the reduction operation. */
259 tree keep_res; /* The PHI_RESULT of this phi is the resulting value
260 of the reduction variable when existing the loop. */
261 tree initial_value; /* An ssa name representing a new variable holding
262 the initial value of the reduction var before entering the loop. */
263 tree field; /* the name of the field in the parloop data structure intended for reduction. */
264 tree reduction_init; /* An ssa name representing a new variable which will be
265 assigned the proper reduction initialization value (init). */
266 tree init; /* reduction initialization value. */
267 tree new_phi; /* (helper field) Newly created phi node whose result
268 will be passed to the atomic operation. Represents
269 the local result each thread computed for the reduction
270 operation. */
271};
272
273/* Equality and hash functions for hashtab code. */
274
275static int
276reduction_info_eq (const void *aa, const void *bb)
277{
278 const struct reduction_info *a = (const struct reduction_info *) aa;
279 const struct reduction_info *b = (const struct reduction_info *) bb;
280
281 return (a->reduc_phi == b->reduc_phi);
282}
283
284static hashval_t
285reduction_info_hash (const void *aa)
286{
287 const struct reduction_info *a = (const struct reduction_info *) aa;
288
289 return htab_hash_pointer (a->reduc_phi);
290}
291
292static struct reduction_info *
293reduction_phi (htab_t reduction_list, tree phi)
294{
295 struct reduction_info tmpred, *red;
296
297 if (htab_elements (reduction_list) == 0)
298 return NULL;
299
300 tmpred.reduc_phi = phi;
301 red = htab_find (reduction_list, &tmpred);
302
303 return red;
304}
305
5f40b3cb
ZD
306/* Element of hashtable of names to copy. */
307
308struct name_to_copy_elt
309{
310 unsigned version; /* The version of the name to copy. */
311 tree new_name; /* The new name used in the copy. */
312 tree field; /* The field of the structure used to pass the
313 value. */
314};
315
316/* Equality and hash functions for hashtab code. */
317
318static int
319name_to_copy_elt_eq (const void *aa, const void *bb)
320{
a509ebb5
RL
321 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
322 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
5f40b3cb
ZD
323
324 return a->version == b->version;
325}
326
327static hashval_t
328name_to_copy_elt_hash (const void *aa)
329{
a509ebb5 330 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
5f40b3cb
ZD
331
332 return (hashval_t) a->version;
333}
334
335/* Returns true if the iterations of LOOP are independent on each other (that
336 is, if we can execute them in parallel), and if LOOP satisfies other
337 conditions that we need to be able to parallelize it. Description of number
a509ebb5
RL
338 of iterations is stored to NITER. Reduction analysis is done, if
339 reductions are found, they are inserted to the REDUCTION_LIST. */
5f40b3cb
ZD
340
341static bool
a509ebb5 342loop_parallel_p (struct loop *loop, htab_t reduction_list, struct tree_niter_desc *niter)
5f40b3cb
ZD
343{
344 edge exit = single_dom_exit (loop);
a509ebb5
RL
345 VEC (ddr_p, heap) * dependence_relations;
346 VEC (data_reference_p, heap) * datarefs;
5f40b3cb
ZD
347 lambda_trans_matrix trans;
348 bool ret = false;
349 tree phi;
a509ebb5 350 loop_vec_info simple_loop_info;
5f40b3cb
ZD
351
352 /* Only consider innermost loops with just one exit. The innermost-loop
353 restriction is not necessary, but it makes things simpler. */
354 if (loop->inner || !exit)
355 return false;
356
357 if (dump_file && (dump_flags & TDF_DETAILS))
358 fprintf (dump_file, "\nConsidering loop %d\n", loop->num);
359
360 /* We need to know # of iterations, and there should be no uses of values
361 defined inside loop outside of it, unless the values are invariants of
362 the loop. */
363 if (!number_of_iterations_exit (loop, exit, niter, false))
364 {
365 if (dump_file && (dump_flags & TDF_DETAILS))
366 fprintf (dump_file, " FAILED: number of iterations not known\n");
367 return false;
368 }
369
a509ebb5
RL
370 simple_loop_info = vect_analyze_loop_form (loop);
371
372 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
373 {
374 tree reduc_stmt = NULL, operation;
375
376 /* ??? TODO: Change this into a generic function that
377 recognizes reductions. */
378 if (!is_gimple_reg (PHI_RESULT (phi)))
379 continue;
380 if (simple_loop_info)
381 reduc_stmt = vect_is_simple_reduction (simple_loop_info, phi);
382
383 /* Create a reduction_info struct, initialize it and insert it to
384 the reduction list. */
385
386 if (reduc_stmt)
387 {
388 PTR *slot;
389 struct reduction_info *new_reduction;
390
391 if (dump_file && (dump_flags & TDF_DETAILS))
392 {
393 fprintf (dump_file,
394 "Detected reduction. reduction stmt is: \n");
395 print_generic_stmt (dump_file, reduc_stmt, 0);
396 fprintf (dump_file, "\n");
397 }
398
399 new_reduction = XCNEW (struct reduction_info);
400
401 new_reduction->reduc_stmt = reduc_stmt;
402 new_reduction->reduc_phi = phi;
403 operation = GIMPLE_STMT_OPERAND (reduc_stmt, 1);
404 new_reduction->reduction_code = TREE_CODE (operation);
405 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
406 *slot = new_reduction;
407 }
408 }
409
5f40b3cb
ZD
410 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
411 {
a509ebb5
RL
412 struct reduction_info *red;
413 imm_use_iterator imm_iter;
414 use_operand_p use_p;
415 tree reduc_phi;
416
5f40b3cb
ZD
417 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
418
419 if (is_gimple_reg (val))
420 {
421 if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
422 {
423 fprintf (dump_file, "phi is ");
424 print_generic_expr (dump_file, phi, 0);
425 fprintf (dump_file, "arg of phi to exit: value ");
426 print_generic_expr (dump_file, val, 0);
427 fprintf (dump_file, " used outside loop\n");
428 fprintf (dump_file,
429 " checking if it a part of reduction pattern: \n");
430 }
431 if (htab_elements (reduction_list) == 0)
432 {
433 if (dump_file && (dump_flags & TDF_DETAILS))
434 fprintf (dump_file,
435 " FAILED: it is not a part of reduction.\n");
436 return false;
437 }
438 reduc_phi = NULL;
439 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
440 {
441 if (flow_bb_inside_loop_p (loop, bb_for_stmt (USE_STMT (use_p))))
442 {
443 reduc_phi = USE_STMT (use_p);
444 break;
445 }
446 }
447 red = reduction_phi (reduction_list, reduc_phi);
448 if (red == NULL)
449 {
450 if (dump_file && (dump_flags & TDF_DETAILS))
451 fprintf (dump_file,
452 " FAILED: it is not a part of reduction.\n");
453 return false;
454 }
455 if (dump_file && (dump_flags & TDF_DETAILS))
456 {
457 fprintf (dump_file, "reduction phi is ");
458 print_generic_expr (dump_file, red->reduc_phi, 0);
459 fprintf (dump_file, "reduction stmt is ");
460 print_generic_expr (dump_file, red->reduc_stmt, 0);
461 }
462
5f40b3cb
ZD
463 }
464 }
465
466 /* The iterations of the loop may communicate only through bivs whose
467 iteration space can be distributed efficiently. */
468 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
469 {
470 tree def = PHI_RESULT (phi);
471 affine_iv iv;
472
a509ebb5 473 if (is_gimple_reg (def) && !simple_iv (loop, phi, def, &iv, true))
5f40b3cb 474 {
a509ebb5
RL
475 struct reduction_info *red;
476
477 red = reduction_phi (reduction_list, phi);
478 if (red == NULL)
479 {
480 if (dump_file && (dump_flags & TDF_DETAILS))
481 fprintf (dump_file,
482 " FAILED: scalar dependency between iterations\n");
483 return false;
484 }
5f40b3cb
ZD
485 }
486 }
487
488 /* We need to version the loop to verify assumptions in runtime. */
489 if (!can_duplicate_loop_p (loop))
490 {
491 if (dump_file && (dump_flags & TDF_DETAILS))
492 fprintf (dump_file, " FAILED: cannot be duplicated\n");
493 return false;
494 }
495
496 /* Check for problems with dependences. If the loop can be reversed,
497 the iterations are independent. */
498 datarefs = VEC_alloc (data_reference_p, heap, 10);
499 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
500 compute_data_dependences_for_loop (loop, true, &datarefs,
501 &dependence_relations);
502 if (dump_file && (dump_flags & TDF_DETAILS))
503 dump_data_dependence_relations (dump_file, dependence_relations);
504
505 trans = lambda_trans_matrix_new (1, 1);
506 LTM_MATRIX (trans)[0][0] = -1;
507
508 if (lambda_transform_legal_p (trans, 1, dependence_relations))
509 {
510 ret = true;
511 if (dump_file && (dump_flags & TDF_DETAILS))
512 fprintf (dump_file, " SUCCESS: may be parallelized\n");
513 }
514 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
515 fprintf (dump_file,
516 " FAILED: data dependencies exist across iterations\n");
5f40b3cb
ZD
517
518 free_dependence_relations (dependence_relations);
519 free_data_refs (datarefs);
520
521 return ret;
522}
523
524/* Assigns the address of VAR in TYPE to an ssa name, and returns this name.
525 The assignment statement is placed before LOOP. DECL_ADDRESS maps decls
526 to their addresses that can be reused. */
527
528static tree
529take_address_of (tree var, tree type, struct loop *loop, htab_t decl_address)
530{
531 int uid = DECL_UID (var);
532 void **dslot;
533 struct int_tree_map ielt, *nielt;
534 tree name, bvar, stmt;
535 edge entry = loop_preheader_edge (loop);
536
537 ielt.uid = uid;
538 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
539 if (!*dslot)
540 {
541 bvar = create_tmp_var (type, get_name (var));
542 add_referenced_var (bvar);
543 stmt = build_gimple_modify_stmt (bvar,
a509ebb5
RL
544 fold_convert (type,
545 build_addr (var,
546 current_function_decl)));
5f40b3cb
ZD
547 name = make_ssa_name (bvar, stmt);
548 GIMPLE_STMT_OPERAND (stmt, 0) = name;
549 bsi_insert_on_edge_immediate (entry, stmt);
550
551 nielt = XNEW (struct int_tree_map);
552 nielt->uid = uid;
553 nielt->to = name;
554 *dslot = nielt;
555
556 return name;
557 }
558
559 name = ((struct int_tree_map *) *dslot)->to;
560 if (TREE_TYPE (name) == type)
561 return name;
562
563 bvar = SSA_NAME_VAR (name);
a509ebb5 564 stmt = build_gimple_modify_stmt (bvar, fold_convert (type, name));
5f40b3cb
ZD
565 name = make_ssa_name (bvar, stmt);
566 GIMPLE_STMT_OPERAND (stmt, 0) = name;
567 bsi_insert_on_edge_immediate (entry, stmt);
568
569 return name;
570}
571
a509ebb5
RL
572/* Callback for htab_traverse. Create the initialization statement
573 for reduction described in SLOT, and place it at the preheader of
574 the loop described in DATA. */
575
576static int
577initialize_reductions (void **slot, void *data)
578{
579 tree t, stmt;
580 tree init, c;
581 tree name, name1;
582 tree bvar, type, arg;
583 edge e;
584
585 struct reduction_info *reduc = *slot;
586 struct loop *loop = (struct loop *) data;
587
588 /* Create initialization in preheader:
589 reduction_variable = initialization value of reduction. */
590
591 /* In the phi node at the header, replace the argument coming
592 from the preheader with the reduction initialization value. */
593
594 /* Create a new variable to initialize the reduction. */
595 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
596 bvar = create_tmp_var (type, "reduction");
597 add_referenced_var (bvar);
598
599 c = build_omp_clause (OMP_CLAUSE_REDUCTION);
600 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
601 OMP_CLAUSE_DECL (c) =
602 SSA_NAME_VAR (GIMPLE_STMT_OPERAND (reduc->reduc_stmt, 0));
603
604 init = omp_reduction_init (c, TREE_TYPE (bvar));
605 reduc->init = init;
606
607 t = build_gimple_modify_stmt (bvar, init);
608 name = make_ssa_name (bvar, t);
609
610 GIMPLE_STMT_OPERAND (t, 0) = name;
611 SSA_NAME_DEF_STMT (name) = t;
612
613 /* Replace the argument
614 representing the initialization value. Keeping the old value
615 in a new variable "reduction_initial", that will be taken in
616 consideration after the parallel computing is done. */
617
618 e = loop_preheader_edge (loop);
619 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
620 /* Create new variable to hold the initial value. */
621 type = TREE_TYPE (bvar);
622 bvar = create_tmp_var (type, "reduction_initial");
623 add_referenced_var (bvar);
624
625 stmt = build_gimple_modify_stmt (bvar, arg);
626 name1 = make_ssa_name (bvar, stmt);
627 GIMPLE_STMT_OPERAND (stmt, 0) = name1;
628 SSA_NAME_DEF_STMT (name1) = stmt;
629
630 bsi_insert_on_edge_immediate (e, stmt);
631 bsi_insert_on_edge_immediate (e, t);
632 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
633 (reduc->reduc_phi, loop_preheader_edge (loop)), name);
634 reduc->initial_value = name1;
635 reduc->reduction_init = name;
636 return 1;
637}
5f40b3cb
ZD
638
639struct elv_data
640{
641 struct loop *loop;
642 htab_t decl_address;
643 bool changed;
644};
645
a509ebb5
RL
646/* Eliminates references to local variables in *TP out of LOOP. DECL_ADDRESS
647 contains addresses of the references that had their address taken already.
648 If the expression is changed, CHANGED is set to true. Callback for
649 walk_tree. */
650
5f40b3cb 651static tree
a509ebb5 652eliminate_local_variables_1 (tree * tp, int *walk_subtrees, void *data)
5f40b3cb
ZD
653{
654 struct elv_data *dta = data;
655 tree t = *tp, var, addr, addr_type, type;
656
657 if (DECL_P (t))
658 {
659 *walk_subtrees = 0;
660
661 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
662 return NULL_TREE;
663
664 type = TREE_TYPE (t);
665 addr_type = build_pointer_type (type);
666 addr = take_address_of (t, addr_type, dta->loop, dta->decl_address);
667 *tp = build1 (INDIRECT_REF, TREE_TYPE (*tp), addr);
668
669 dta->changed = true;
670 return NULL_TREE;
671 }
672
673 if (TREE_CODE (t) == ADDR_EXPR)
674 {
675 var = TREE_OPERAND (t, 0);
676 if (!DECL_P (var))
677 return NULL_TREE;
678
679 *walk_subtrees = 0;
680 if (!SSA_VAR_P (var) || DECL_EXTERNAL (var))
681 return NULL_TREE;
682
683 addr_type = TREE_TYPE (t);
684 addr = take_address_of (var, addr_type, dta->loop, dta->decl_address);
685 *tp = addr;
686
687 dta->changed = true;
688 return NULL_TREE;
689 }
690
a509ebb5 691 if (!EXPR_P (t) && !GIMPLE_STMT_P (t))
5f40b3cb
ZD
692 *walk_subtrees = 0;
693
694 return NULL_TREE;
695}
696
697/* Moves the references to local variables in STMT from LOOP. DECL_ADDRESS
698 contains addresses for the references for that we have already taken
699 them. */
700
701static void
702eliminate_local_variables_stmt (struct loop *loop, tree stmt,
703 htab_t decl_address)
704{
705 struct elv_data dta;
706
707 dta.loop = loop;
708 dta.decl_address = decl_address;
709 dta.changed = false;
710
711 walk_tree (&stmt, eliminate_local_variables_1, &dta, NULL);
712
713 if (dta.changed)
714 update_stmt (stmt);
715}
716
a509ebb5
RL
717/* Eliminates the references to local variables from LOOP.
718 This includes:
719 1) Taking address of a local variable -- these are moved out of the
720 loop (and temporary variable is created to hold the address if
721 necessary).
5f40b3cb 722 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 723 references. */
5f40b3cb
ZD
724
725static void
726eliminate_local_variables (struct loop *loop)
727{
728 basic_block bb, *body = get_loop_body (loop);
729 unsigned i;
730 block_stmt_iterator bsi;
731 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
732 free);
733
734 /* Find and rename the ssa names defined outside of loop. */
735 for (i = 0; i < loop->num_nodes; i++)
736 {
737 bb = body[i];
738
739 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
740 eliminate_local_variables_stmt (loop, bsi_stmt (bsi), decl_address);
741 }
742
743 htab_delete (decl_address);
744}
745
746/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
747 The copies are stored to NAME_COPIES, if NAME was already duplicated,
748 its duplicate stored in NAME_COPIES is returned.
749
750 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
751 duplicated, storing the copies in DECL_COPIES. */
752
753static tree
754separate_decls_in_loop_name (tree name,
755 htab_t name_copies, htab_t decl_copies,
756 bool copy_name_p)
757{
758 tree copy, var, var_copy;
759 unsigned idx, uid, nuid;
760 struct int_tree_map ielt, *nielt;
761 struct name_to_copy_elt elt, *nelt;
762 void **slot, **dslot;
763
764 if (TREE_CODE (name) != SSA_NAME)
765 return name;
766
767 idx = SSA_NAME_VERSION (name);
768 elt.version = idx;
769 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
770 copy_name_p ? INSERT : NO_INSERT);
771 if (slot && *slot)
772 return ((struct name_to_copy_elt *) *slot)->new_name;
773
774 var = SSA_NAME_VAR (name);
775 uid = DECL_UID (var);
776 ielt.uid = uid;
777 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
778 if (!*dslot)
779 {
780 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
781 add_referenced_var (var_copy);
782 nielt = XNEW (struct int_tree_map);
783 nielt->uid = uid;
784 nielt->to = var_copy;
785 *dslot = nielt;
786
787 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 788 it again. */
5f40b3cb
ZD
789 nuid = DECL_UID (var_copy);
790 ielt.uid = nuid;
791 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
792 gcc_assert (!*dslot);
793 nielt = XNEW (struct int_tree_map);
794 nielt->uid = nuid;
795 nielt->to = var_copy;
796 *dslot = nielt;
797 }
798 else
799 var_copy = ((struct int_tree_map *) *dslot)->to;
800
801 if (copy_name_p)
802 {
803 copy = duplicate_ssa_name (name, NULL_TREE);
804 nelt = XNEW (struct name_to_copy_elt);
805 nelt->version = idx;
806 nelt->new_name = copy;
807 nelt->field = NULL_TREE;
808 *slot = nelt;
809 }
810 else
811 {
812 gcc_assert (!slot);
813 copy = name;
814 }
815
816 SSA_NAME_VAR (copy) = var_copy;
817 return copy;
818}
819
820/* Finds the ssa names used in STMT that are defined outside of LOOP and
821 replaces such ssa names with their duplicates. The duplicates are stored to
822 NAME_COPIES. Base decls of all ssa names used in STMT
823 (including those defined in LOOP) are replaced with the new temporary
824 variables; the replacement decls are stored in DECL_COPIES. */
825
826static void
827separate_decls_in_loop_stmt (struct loop *loop, tree stmt,
828 htab_t name_copies, htab_t decl_copies)
829{
830 use_operand_p use;
831 def_operand_p def;
832 ssa_op_iter oi;
833 tree name, copy;
834 bool copy_name_p;
835
836 mark_virtual_ops_for_renaming (stmt);
837
838 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
839 {
840 name = DEF_FROM_PTR (def);
841 gcc_assert (TREE_CODE (name) == SSA_NAME);
842 copy = separate_decls_in_loop_name (name, name_copies, decl_copies,
843 false);
844 gcc_assert (copy == name);
845 }
5f40b3cb
ZD
846
847 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
848 {
849 name = USE_FROM_PTR (use);
850 if (TREE_CODE (name) != SSA_NAME)
851 continue;
852
853 copy_name_p = expr_invariant_in_loop_p (loop, name);
854 copy = separate_decls_in_loop_name (name, name_copies, decl_copies,
855 copy_name_p);
856 SET_USE (use, copy);
857 }
5f40b3cb
ZD
858}
859
a509ebb5
RL
860/* A helper structure for passing the TYPE and REDUCTION_LIST
861 to the DATA parameter of add_field_for_name. */
862struct data_arg
863{
864 tree type;
865 htab_t reduction_list;
866};
867
5f40b3cb 868/* Callback for htab_traverse. Adds a field corresponding to a ssa name
a509ebb5
RL
869 described in SLOT. The type is passed in DATA. The Reduction list
870 is also passes in DATA. */
5f40b3cb
ZD
871
872static int
873add_field_for_name (void **slot, void *data)
874{
a509ebb5
RL
875 tree stmt;
876 use_operand_p use_p = NULL;
877
5f40b3cb 878 struct name_to_copy_elt *elt = *slot;
a509ebb5
RL
879 struct data_arg *data_arg = (struct data_arg *) data;
880 tree type = data_arg->type;
5f40b3cb
ZD
881 tree name = ssa_name (elt->version);
882 tree var = SSA_NAME_VAR (name);
883 tree field = build_decl (FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
884
885 insert_field_into_struct (type, field);
886 elt->field = field;
a509ebb5
RL
887
888 /* Find uses of name to determine if this name is related to
889 a reduction phi, and if so, record the field in the reduction struct. */
890
891 if ((htab_elements (data_arg->reduction_list) > 0)
892 && single_imm_use (elt->new_name, &use_p, &stmt)
893 && TREE_CODE (stmt) == PHI_NODE)
894 {
895 /* check if STMT is a REDUC_PHI of some reduction. */
896 struct reduction_info *red;
897
898 red = reduction_phi (data_arg->reduction_list ,stmt);
899 if (red)
900 red->field = field;
901 }
902
5f40b3cb
ZD
903 return 1;
904}
905
a509ebb5
RL
906/* Callback for htab_traverse. A local result is the intermediate result
907 computed by a single
908 thread, or the intial value in case no iteration was executed.
909 This function creates a phi node reflecting these values.
910 The phi's result will be stored in NEW_PHI field of the
911 reduction's data structure. */
912
913static int
914create_phi_for_local_result (void **slot, void *data)
915{
916 struct reduction_info *reduc = *slot;
917 struct loop *loop = data;
918 edge e;
919 tree new_phi;
920 basic_block store_bb;
921 tree local_res;
922
923 /* STORE_BB is the block where the phi
924 should be stored. It is the destination of the loop exit.
925 (Find the fallthru edge from OMP_CONTINUE). */
926 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
927
928 /* STORE_BB has two predecessors. One coming from the loop
929 (the reduction's result is computed at the loop),
930 and another coming from a block preceding the loop,
931 when no iterations
932 are executed (the initial value should be taken). */
933 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
934 e = EDGE_PRED (store_bb, 1);
935 else
936 e = EDGE_PRED (store_bb, 0);
937 local_res = make_ssa_name (SSA_NAME_VAR (reduc->reduction_init), NULL_TREE);
938 new_phi = create_phi_node (local_res, store_bb);
939 SSA_NAME_DEF_STMT (local_res) = new_phi;
940 add_phi_arg (new_phi, reduc->init, e);
941 add_phi_arg (new_phi, GIMPLE_STMT_OPERAND (reduc->reduc_stmt, 0),
942 FALLTHRU_EDGE (loop->latch));
943 reduc->new_phi = new_phi;
944
945 return 1;
946}
5f40b3cb
ZD
947
948struct clsn_data
949{
950 tree store;
951 tree load;
952
953 basic_block store_bb;
954 basic_block load_bb;
955};
956
a509ebb5
RL
957/* Callback for htab_traverse. Create an atomic instruction for the
958 reduction described in SLOT.
959 DATA annotates the place in memory the atomic operation relates to,
960 and the basic block it needs to be generated in. */
961
962static int
963create_call_for_reduction_1 (void **slot, void *data)
964{
965 struct reduction_info *reduc = *slot;
966 struct clsn_data *clsn_data = data;
967 block_stmt_iterator bsi;
968 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
969 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
970 tree load_struct;
971 basic_block bb;
972 basic_block new_bb;
973 edge e;
974 tree t, addr, addr_type, ref, x;
975 tree tmp_load, load, name;
976
977 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
978 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
979 addr_type = build_pointer_type (type);
980
981 addr = build_addr (t, current_function_decl);
982
983 /* Create phi node. */
984 bb = clsn_data->load_bb;
985
986 e = split_block (bb, t);
987 new_bb = e->dest;
988
989 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
990 add_referenced_var (tmp_load);
991 tmp_load = make_ssa_name (tmp_load, NULL);
992 load = build2 (OMP_ATOMIC_LOAD, void_type_node, tmp_load, addr);
993 SSA_NAME_DEF_STMT (tmp_load) = load;
994 bsi = bsi_start (new_bb);
995 bsi_insert_after (&bsi, load, BSI_NEW_STMT);
996
997 e = split_block (new_bb, load);
998 new_bb = e->dest;
999 bsi = bsi_start (new_bb);
1000 ref = tmp_load;
1001 x =
1002 fold_build2 (reduc->reduction_code,
1003 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1004 PHI_RESULT (reduc->new_phi));
1005
1006 name =
1007 force_gimple_operand_bsi (&bsi, x, true, NULL_TREE, true,
1008 BSI_CONTINUE_LINKING);
1009
1010 x = build1 (OMP_ATOMIC_STORE, void_type_node, name);
1011
1012 bsi_insert_after (&bsi, x, BSI_NEW_STMT);
1013 return 1;
1014}
1015
1016/* Create the atomic operation at the join point of the threads.
1017 REDUCTION_LIST describes the reductions in the LOOP.
1018 LD_ST_DATA describes the shared data structure where
1019 shared data is stored in and loaded from. */
1020static void
1021create_call_for_reduction (struct loop *loop, htab_t reduction_list,
1022 struct clsn_data *ld_st_data)
1023{
1024 htab_traverse (reduction_list, create_phi_for_local_result, loop);
1025 /* Find the fallthru edge from OMP_CONTINUE. */
1026 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1027 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1028}
1029
1030/* Callback for htab_traverse. Create a new variable that loads the
1031 final reduction value at the
1032 join point of all threads, adds the initial value the reduction
1033 variable had before the parallel computation started, and
1034 inserts it in the right place. */
1035
1036static int
1037create_loads_for_reductions (void **slot, void *data)
1038{
1039 struct reduction_info *red = *slot;
1040 struct clsn_data *clsn_data = data;
1041 tree stmt;
1042 block_stmt_iterator bsi;
1043 tree type = TREE_TYPE (red->reduction_init);
1044 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
1045 tree load_struct;
1046 tree bvar, name;
1047 tree x;
1048
1049 bsi = bsi_after_labels (clsn_data->load_bb);
1050 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
1051 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1052 NULL_TREE);
1053 bvar = create_tmp_var (type, "reduction_final");
1054 add_referenced_var (bvar);
1055
1056 /* Apply operation between the new variable which is the result
1057 of computation all threads, and the initial value which is kept
1058 at reduction->inital_value. */
1059
1060 stmt = build_gimple_modify_stmt (bvar, load_struct);
1061 name = make_ssa_name (bvar, stmt);
1062 GIMPLE_STMT_OPERAND (stmt, 0) = name;
1063 SSA_NAME_DEF_STMT (name) = stmt;
1064
1065 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1066
1067 x =
1068 fold_build2 (red->reduction_code, TREE_TYPE (load_struct),
1069 name, red->initial_value);
1070 name = PHI_RESULT (red->keep_res);
1071 stmt = build_gimple_modify_stmt (name, x);
1072 GIMPLE_STMT_OPERAND (stmt, 0) = name;
1073 SSA_NAME_DEF_STMT (name) = stmt;
1074
1075 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1076
1077 remove_phi_node (red->keep_res, NULL_TREE, false);
1078
1079 return 1;
1080}
1081
1082/* Load the reduction result that was stored in LD_ST_DATA.
1083 REDUCTION_LIST describes the list of reductions that the
1084 loades should be generated for. */
1085static void
1086create_final_loads_for_reduction (htab_t reduction_list,
1087 struct clsn_data *ld_st_data)
1088{
1089 block_stmt_iterator bsi;
1090 tree t;
1091
1092 bsi = bsi_after_labels (ld_st_data->load_bb);
1093 t = build_fold_addr_expr (ld_st_data->store);
1094 t =
1095 build_gimple_modify_stmt (ld_st_data->load,
1096 build_fold_addr_expr (ld_st_data->store));
1097
1098 bsi_insert_before (&bsi, t, BSI_NEW_STMT);
1099 SSA_NAME_DEF_STMT (ld_st_data->load) = t;
1100 GIMPLE_STMT_OPERAND (t, 0) = ld_st_data->load;
1101
1102 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1103
1104}
1105
1106/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1107 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1108 specified in SLOT. */
1109
5f40b3cb
ZD
1110static int
1111create_loads_and_stores_for_name (void **slot, void *data)
1112{
1113 struct name_to_copy_elt *elt = *slot;
1114 struct clsn_data *clsn_data = data;
1115 tree stmt;
1116 block_stmt_iterator bsi;
1117 tree type = TREE_TYPE (elt->new_name);
1118 tree struct_type = TREE_TYPE (TREE_TYPE (clsn_data->load));
1119 tree load_struct;
1120
1121 bsi = bsi_last (clsn_data->store_bb);
a509ebb5
RL
1122 stmt =
1123 build_gimple_modify_stmt (build3
1124 (COMPONENT_REF, type, clsn_data->store,
1125 elt->field, NULL_TREE),
1126 ssa_name (elt->version));
5f40b3cb
ZD
1127 mark_virtual_ops_for_renaming (stmt);
1128 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1129
1130 bsi = bsi_last (clsn_data->load_bb);
1131 load_struct = fold_build1 (INDIRECT_REF, struct_type, clsn_data->load);
a509ebb5
RL
1132 stmt = build_gimple_modify_stmt (elt->new_name,
1133 build3 (COMPONENT_REF, type, load_struct,
1134 elt->field, NULL_TREE));
5f40b3cb
ZD
1135 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
1136 bsi_insert_after (&bsi, stmt, BSI_NEW_STMT);
1137
1138 return 1;
1139}
1140
1141/* Moves all the variables used in LOOP and defined outside of it (including
1142 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1143 name) to a structure created for this purpose. The code
1144
1145 while (1)
1146 {
1147 use (a);
1148 use (b);
1149 }
1150
1151 is transformed this way:
1152
1153 bb0:
1154 old.a = a;
1155 old.b = b;
1156
1157 bb1:
1158 a' = new->a;
1159 b' = new->b;
1160 while (1)
1161 {
1162 use (a');
1163 use (b');
1164 }
1165
1166 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1167 pointer `new' is intentionally not initialized (the loop will be split to a
1168 separate function later, and `new' will be initialized from its arguments).
a509ebb5
RL
1169 LD_ST_DATA holds information about the shared data structure used to pass
1170 information among the threads. It is initialized here, and
1171 gen_parallel_loop will pass it to create_call_for_reduction that
1172 needs this information. REDUCTION_LIST describes the reductions
1173 in LOOP. */
5f40b3cb
ZD
1174
1175static void
a509ebb5
RL
1176separate_decls_in_loop (struct loop *loop, htab_t reduction_list,
1177 tree * arg_struct, tree * new_arg_struct,
1178 struct clsn_data *ld_st_data)
1179
5f40b3cb
ZD
1180{
1181 basic_block bb1 = split_edge (loop_preheader_edge (loop));
1182 basic_block bb0 = single_pred (bb1);
1183 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1184 name_to_copy_elt_eq, free);
1185 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1186 free);
1187 basic_block bb, *body = get_loop_body (loop);
1188 unsigned i;
1189 tree phi, type, type_name, nvar;
1190 block_stmt_iterator bsi;
1191 struct clsn_data clsn_data;
1192
1193 /* Find and rename the ssa names defined outside of loop. */
1194 for (i = 0; i < loop->num_nodes; i++)
1195 {
1196 bb = body[i];
1197
1198 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1199 separate_decls_in_loop_stmt (loop, phi, name_copies, decl_copies);
1200
1201 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1202 separate_decls_in_loop_stmt (loop, bsi_stmt (bsi), name_copies,
1203 decl_copies);
1204 }
1205 free (body);
1206
1207 if (htab_elements (name_copies) == 0)
1208 {
1209 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1210 loop carried and external variables in the loop). */
5f40b3cb
ZD
1211 *arg_struct = NULL;
1212 *new_arg_struct = NULL;
1213 }
1214 else
1215 {
a509ebb5
RL
1216 struct data_arg data_arg;
1217
5f40b3cb
ZD
1218 /* Create the type for the structure to store the ssa names to. */
1219 type = lang_hooks.types.make_type (RECORD_TYPE);
1220 type_name = build_decl (TYPE_DECL, create_tmp_var_name (".paral_data"),
1221 type);
1222 TYPE_NAME (type) = type_name;
1223
a509ebb5
RL
1224 data_arg.type = type;
1225 data_arg.reduction_list = reduction_list;
1226 htab_traverse (name_copies, add_field_for_name, &data_arg);
5f40b3cb
ZD
1227 layout_type (type);
1228
1229 /* Create the loads and stores. */
1230 *arg_struct = create_tmp_var (type, ".paral_data_store");
1231 add_referenced_var (*arg_struct);
1232 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1233 add_referenced_var (nvar);
1234 *new_arg_struct = make_ssa_name (nvar, NULL_TREE);
1235
a509ebb5
RL
1236 ld_st_data->store = *arg_struct;
1237 ld_st_data->load = *new_arg_struct;
1238 ld_st_data->store_bb = bb0;
1239 ld_st_data->load_bb = bb1;
5f40b3cb 1240 htab_traverse (name_copies, create_loads_and_stores_for_name,
a509ebb5
RL
1241 ld_st_data);
1242
1243 /* Load the calculation from memory into a new
1244 reduction variable (after the join of the threads). */
1245 if (htab_elements (reduction_list) > 0)
1246 {
1247 clsn_data.load = make_ssa_name (nvar, NULL_TREE);
1248 clsn_data.load_bb = single_dom_exit (loop)->dest;
1249 clsn_data.store = ld_st_data->store;
1250 create_final_loads_for_reduction (reduction_list, &clsn_data);
1251 }
5f40b3cb
ZD
1252 }
1253
1254 htab_delete (decl_copies);
1255 htab_delete (name_copies);
1256}
1257
1258/* Bitmap containing uids of functions created by parallelization. We cannot
1259 allocate it from the default obstack, as it must live across compilation
1260 of several functions; we make it gc allocated instead. */
1261
1262static GTY(()) bitmap parallelized_functions;
1263
1264/* Returns true if FN was created by create_loop_fn. */
1265
1266static bool
1267parallelized_function_p (tree fn)
1268{
1269 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1270 return false;
1271
1272 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1273}
1274
1275/* Creates and returns an empty function that will receive the body of
1276 a parallelized loop. */
1277
1278static tree
1279create_loop_fn (void)
1280{
1281 char buf[100];
1282 char *tname;
1283 tree decl, type, name, t;
1284 struct function *act_cfun = cfun;
1285 static unsigned loopfn_num;
1286
1287 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1288 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1289 clean_symbol_name (tname);
1290 name = get_identifier (tname);
1291 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1292
1293 decl = build_decl (FUNCTION_DECL, name, type);
1294 if (!parallelized_functions)
1295 parallelized_functions = BITMAP_GGC_ALLOC ();
1296 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1297
1298 TREE_STATIC (decl) = 1;
1299 TREE_USED (decl) = 1;
1300 DECL_ARTIFICIAL (decl) = 1;
1301 DECL_IGNORED_P (decl) = 0;
1302 TREE_PUBLIC (decl) = 0;
1303 DECL_UNINLINABLE (decl) = 1;
1304 DECL_EXTERNAL (decl) = 0;
1305 DECL_CONTEXT (decl) = NULL_TREE;
1306 DECL_INITIAL (decl) = make_node (BLOCK);
1307
1308 t = build_decl (RESULT_DECL, NULL_TREE, void_type_node);
1309 DECL_ARTIFICIAL (t) = 1;
1310 DECL_IGNORED_P (t) = 1;
1311 DECL_RESULT (decl) = t;
1312
1313 t = build_decl (PARM_DECL, get_identifier (".paral_data_param"),
1314 ptr_type_node);
1315 DECL_ARTIFICIAL (t) = 1;
1316 DECL_ARG_TYPE (t) = ptr_type_node;
1317 DECL_CONTEXT (t) = decl;
1318 TREE_USED (t) = 1;
1319 DECL_ARGUMENTS (decl) = t;
1320
1321 allocate_struct_function (decl);
1322
1323 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1324 it. */
1325 cfun = act_cfun;
1326
1327 return decl;
1328}
1329
1330/* Bases all the induction variables in LOOP on a single induction variable
1331 (unsigned with base 0 and step 1), whose final value is compared with
a509ebb5
RL
1332 NIT. The induction variable is incremented in the loop latch.
1333 REDUCTION_LIST describes the reductions in LOOP. */
5f40b3cb
ZD
1334
1335static void
a509ebb5 1336canonicalize_loop_ivs (struct loop *loop, htab_t reduction_list, tree nit)
5f40b3cb
ZD
1337{
1338 unsigned precision = TYPE_PRECISION (TREE_TYPE (nit));
1339 tree phi, prev, res, type, var_before, val, atype, t, next;
1340 block_stmt_iterator bsi;
1341 bool ok;
1342 affine_iv iv;
1343 edge exit = single_dom_exit (loop);
a509ebb5 1344 struct reduction_info *red;
5f40b3cb
ZD
1345
1346 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1347 {
1348 res = PHI_RESULT (phi);
1349
a509ebb5 1350 if (is_gimple_reg (res) && TYPE_PRECISION (TREE_TYPE (res)) > precision)
5f40b3cb
ZD
1351 precision = TYPE_PRECISION (TREE_TYPE (res));
1352 }
1353
1354 type = lang_hooks.types.type_for_size (precision, 1);
1355
1356 bsi = bsi_last (loop->latch);
1357 create_iv (build_int_cst_type (type, 0), build_int_cst (type, 1), NULL_TREE,
1358 loop, &bsi, true, &var_before, NULL);
1359
1360 bsi = bsi_after_labels (loop->header);
1361 prev = NULL;
1362 for (phi = phi_nodes (loop->header); phi; phi = next)
1363 {
1364 next = PHI_CHAIN (phi);
1365 res = PHI_RESULT (phi);
1366
a509ebb5 1367 if (!is_gimple_reg (res) || res == var_before)
5f40b3cb
ZD
1368 {
1369 prev = phi;
1370 continue;
1371 }
5f40b3cb 1372
a509ebb5
RL
1373 ok = simple_iv (loop, phi, res, &iv, true);
1374 red = reduction_phi (reduction_list, phi);
1375 /* We preserve the reduction phi nodes. */
1376 if (!ok && red)
1377 {
1378 prev = phi;
1379 continue;
1380 }
1381 else
1382 gcc_assert (ok);
5f40b3cb
ZD
1383 remove_phi_node (phi, prev, false);
1384
1385 atype = TREE_TYPE (res);
1386 val = fold_build2 (PLUS_EXPR, atype,
1387 unshare_expr (iv.base),
1388 fold_build2 (MULT_EXPR, atype,
1389 unshare_expr (iv.step),
1390 fold_convert (atype, var_before)));
1391 val = force_gimple_operand_bsi (&bsi, val, false, NULL_TREE, true,
1392 BSI_SAME_STMT);
1393 t = build_gimple_modify_stmt (res, val);
1394 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
1395 SSA_NAME_DEF_STMT (res) = t;
1396 }
1397
1398 t = last_stmt (exit->src);
1399 /* Make the loop exit if the control condition is not satisfied. */
1400 if (exit->flags & EDGE_TRUE_VALUE)
1401 {
1402 edge te, fe;
1403
1404 extract_true_false_edges_from_block (exit->src, &te, &fe);
1405 te->flags = EDGE_FALSE_VALUE;
1406 fe->flags = EDGE_TRUE_VALUE;
1407 }
1408 COND_EXPR_COND (t) = build2 (LT_EXPR, boolean_type_node, var_before, nit);
1409}
1410
1411/* Moves the exit condition of LOOP to the beginning of its header, and
1412 duplicates the part of the last iteration that gets disabled to the
1413 exit of the loop. NIT is the number of iterations of the loop
1414 (used to initialize the variables in the duplicated part).
1415
1416 TODO: the common case is that latch of the loop is empty and immediatelly
1417 follows the loop exit. In this case, it would be better not to copy the
1418 body of the loop, but only move the entry of the loop directly before the
1419 exit check and increase the number of iterations of the loop by one.
a509ebb5
RL
1420 This may need some additional preconditioning in case NIT = ~0.
1421 REDUCTION_LIST describes the reductions in LOOP. */
5f40b3cb
ZD
1422
1423static void
a509ebb5 1424transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
5f40b3cb
ZD
1425{
1426 basic_block *bbs, *nbbs, ex_bb, orig_header;
1427 unsigned n;
1428 bool ok;
1429 edge exit = single_dom_exit (loop), hpred;
1430 tree phi, nphi, cond, control, control_name, res, t, cond_stmt;
1431 block_stmt_iterator bsi;
1432
1433 split_block_after_labels (loop->header);
1434 orig_header = single_succ (loop->header);
1435 hpred = single_succ_edge (loop->header);
1436
1437 cond_stmt = last_stmt (exit->src);
1438 cond = COND_EXPR_COND (cond_stmt);
1439 control = TREE_OPERAND (cond, 0);
1440 gcc_assert (TREE_OPERAND (cond, 1) == nit);
1441
1442 /* Make sure that we have phi nodes on exit for all loop header phis
1443 (create_parallel_loop requires that). */
1444 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
1445 {
1446 res = PHI_RESULT (phi);
1447 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1448 SET_PHI_RESULT (phi, t);
1449
1450 nphi = create_phi_node (res, orig_header);
1451 SSA_NAME_DEF_STMT (res) = nphi;
1452 add_phi_arg (nphi, t, hpred);
1453
1454 if (res == control)
1455 {
1456 TREE_OPERAND (cond, 0) = t;
1457 update_stmt (cond_stmt);
1458 control = t;
1459 }
1460 }
1461
1462 bbs = get_loop_body_in_dom_order (loop);
1463 for (n = 0; bbs[n] != exit->src; n++)
1464 continue;
1465 nbbs = XNEWVEC (basic_block, n);
1466 ok = tree_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1467 bbs + 1, n, nbbs);
1468 gcc_assert (ok);
1469 free (bbs);
1470 ex_bb = nbbs[0];
1471 free (nbbs);
1472
a509ebb5
RL
1473 /* Other than reductions, the only gimple reg that should be copied
1474 out of the loop is the control variable. */
1475
5f40b3cb
ZD
1476 control_name = NULL_TREE;
1477 for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi))
1478 {
1479 res = PHI_RESULT (phi);
1480 if (!is_gimple_reg (res))
1481 continue;
1482
a509ebb5
RL
1483 /* Check if it is a part of reduction. If it is,
1484 keep the phi at the reduction's keep_res field. The
1485 PHI_RESULT of this phi is the resulting value of the reduction
1486 variable when exiting the loop. */
1487
1488 exit = single_dom_exit (loop);
1489
1490 if (htab_elements (reduction_list) > 0)
1491 {
1492 struct reduction_info *red;
1493
1494 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1495
1496 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1497 if (red)
1498 red->keep_res = phi;
1499 }
1500 else
1501 gcc_assert (control_name == NULL_TREE
1502 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb
ZD
1503 control_name = res;
1504 }
1505 gcc_assert (control_name != NULL_TREE);
1506 phi = SSA_NAME_DEF_STMT (control_name);
1507 remove_phi_node (phi, NULL_TREE, false);
1508
1509 /* Initialize the control variable to NIT. */
1510 bsi = bsi_after_labels (ex_bb);
1511 t = build_gimple_modify_stmt (control_name, nit);
1512 bsi_insert_before (&bsi, t, BSI_NEW_STMT);
1513 SSA_NAME_DEF_STMT (control_name) = t;
1514}
1515
1516/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
1517 LOOP_FN and DATA are the arguments of OMP_PARALLEL.
1518 NEW_DATA is the variable that should be initialized from the argument
1519 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
1520 basic block containing OMP_PARALLEL tree. */
1521
1522static basic_block
1523create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
1524 tree new_data, unsigned n_threads)
1525{
1526 block_stmt_iterator bsi;
1527 basic_block bb, paral_bb, for_bb, ex_bb;
1528 tree t, param, res, for_stmt;
1529 tree cvar, cvar_init, initvar, cvar_next, cvar_base, cond, phi, type;
1530 edge exit, nexit, guard, end, e;
1531
1532 /* Prepare the OMP_PARALLEL statement. */
1533 bb = loop_preheader_edge (loop)->src;
1534 paral_bb = single_pred (bb);
1535 bsi = bsi_last (paral_bb);
1536
1537 t = build_omp_clause (OMP_CLAUSE_NUM_THREADS);
1538 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5
RL
1539 = build_int_cst (integer_type_node, n_threads);
1540 t = build4 (OMP_PARALLEL, void_type_node, NULL_TREE, t, loop_fn, data);
5f40b3cb
ZD
1541
1542 bsi_insert_after (&bsi, t, BSI_NEW_STMT);
1543
1544 /* Initialize NEW_DATA. */
1545 if (data)
1546 {
1547 bsi = bsi_after_labels (bb);
1548
1549 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL_TREE);
1550 t = build_gimple_modify_stmt (param, build_fold_addr_expr (data));
1551 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
1552 SSA_NAME_DEF_STMT (param) = t;
1553
1554 t = build_gimple_modify_stmt (new_data,
a509ebb5
RL
1555 fold_convert (TREE_TYPE (new_data),
1556 param));
5f40b3cb
ZD
1557 bsi_insert_before (&bsi, t, BSI_SAME_STMT);
1558 SSA_NAME_DEF_STMT (new_data) = t;
1559 }
1560
1561 /* Emit OMP_RETURN for OMP_PARALLEL. */
1562 bb = split_loop_exit_edge (single_dom_exit (loop));
1563 bsi = bsi_last (bb);
1564 bsi_insert_after (&bsi, make_node (OMP_RETURN), BSI_NEW_STMT);
1565
1566 /* Extract data for OMP_FOR. */
1567 gcc_assert (loop->header == single_dom_exit (loop)->src);
1568 cond = COND_EXPR_COND (last_stmt (loop->header));
1569
1570 cvar = TREE_OPERAND (cond, 0);
1571 cvar_base = SSA_NAME_VAR (cvar);
1572 phi = SSA_NAME_DEF_STMT (cvar);
1573 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1574 initvar = make_ssa_name (cvar_base, NULL_TREE);
1575 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1576 initvar);
1577 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1578
1579 bsi = bsi_last (loop->latch);
1580 gcc_assert (bsi_stmt (bsi) == SSA_NAME_DEF_STMT (cvar_next));
1581 bsi_remove (&bsi, true);
1582
1583 /* Prepare cfg. */
1584 for_bb = split_edge (loop_preheader_edge (loop));
1585 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1586 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1587 gcc_assert (exit == single_dom_exit (loop));
1588
1589 guard = make_edge (for_bb, ex_bb, 0);
1590 single_succ_edge (loop->latch)->flags = 0;
1591 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
1592 for (phi = phi_nodes (ex_bb); phi; phi = PHI_CHAIN (phi))
1593 {
1594 res = PHI_RESULT (phi);
1595 gcc_assert (!is_gimple_reg (phi));
1596 t = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
1597 add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_preheader_edge (loop)),
1598 guard);
1599 add_phi_arg (phi, PHI_ARG_DEF_FROM_EDGE (t, loop_latch_edge (loop)),
1600 end);
1601 }
1602 e = redirect_edge_and_branch (exit, nexit->dest);
1603 PENDING_STMT (e) = NULL;
1604
1605 /* Emit OMP_FOR. */
1606 TREE_OPERAND (cond, 0) = cvar_base;
1607 type = TREE_TYPE (cvar);
1608 t = build_omp_clause (OMP_CLAUSE_SCHEDULE);
1609 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1610
1611 for_stmt = make_node (OMP_FOR);
1612 TREE_TYPE (for_stmt) = void_type_node;
1613 OMP_FOR_CLAUSES (for_stmt) = t;
1614 OMP_FOR_INIT (for_stmt) = build_gimple_modify_stmt (initvar, cvar_init);
1615 OMP_FOR_COND (for_stmt) = cond;
a509ebb5
RL
1616 OMP_FOR_INCR (for_stmt) = build_gimple_modify_stmt (cvar_base,
1617 build2 (PLUS_EXPR, type,
1618 cvar_base,
1619 build_int_cst
1620 (type, 1)));
5f40b3cb
ZD
1621 OMP_FOR_BODY (for_stmt) = NULL_TREE;
1622 OMP_FOR_PRE_BODY (for_stmt) = NULL_TREE;
1623
1624 bsi = bsi_last (for_bb);
1625 bsi_insert_after (&bsi, for_stmt, BSI_NEW_STMT);
1626 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1627
1628 /* Emit OMP_CONTINUE. */
1629 bsi = bsi_last (loop->latch);
1630 t = build2 (OMP_CONTINUE, void_type_node, cvar_next, cvar);
1631 bsi_insert_after (&bsi, t, BSI_NEW_STMT);
1632 SSA_NAME_DEF_STMT (cvar_next) = t;
1633
1634 /* Emit OMP_RETURN for OMP_FOR. */
1635 bsi = bsi_last (ex_bb);
1636 bsi_insert_after (&bsi, make_node (OMP_RETURN), BSI_NEW_STMT);
1637
1638 return paral_bb;
1639}
1640
1641/* Generates code to execute the iterations of LOOP in N_THREADS threads in
a509ebb5
RL
1642 parallel. NITER describes number of iterations of LOOP.
1643 REDUCTION_LIST describes the reductions existant in the LOOP. */
5f40b3cb
ZD
1644
1645static void
a509ebb5
RL
1646gen_parallel_loop (struct loop *loop, htab_t reduction_list,
1647 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb
ZD
1648{
1649 struct loop *nloop;
1650 tree many_iterations_cond, type, nit;
1651 tree stmts, arg_struct, new_arg_struct;
1652 basic_block parallel_head;
a509ebb5 1653 struct clsn_data clsn_data;
5f40b3cb
ZD
1654 unsigned prob;
1655
1656 /* From
1657
1658 ---------------------------------------------------------------------
1659 loop
1660 {
1661 IV = phi (INIT, IV + STEP)
1662 BODY1;
1663 if (COND)
1664 break;
1665 BODY2;
1666 }
1667 ---------------------------------------------------------------------
1668
1669 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1670 we generate the following code:
1671
1672 ---------------------------------------------------------------------
1673
1674 if (MAY_BE_ZERO
a509ebb5
RL
1675 || NITER < MIN_PER_THREAD * N_THREADS)
1676 goto original;
5f40b3cb
ZD
1677
1678 BODY1;
1679 store all local loop-invariant variables used in body of the loop to DATA.
1680 OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
1681 load the variables from DATA.
1682 OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
1683 BODY2;
1684 BODY1;
1685 OMP_CONTINUE;
a509ebb5
RL
1686 OMP_RETURN -- OMP_FOR
1687 OMP_RETURN -- OMP_PARALLEL
5f40b3cb
ZD
1688 goto end;
1689
1690 original:
1691 loop
1692 {
1693 IV = phi (INIT, IV + STEP)
1694 BODY1;
1695 if (COND)
1696 break;
1697 BODY2;
1698 }
1699
1700 end:
1701
1702 */
1703
1704 /* Create two versions of the loop -- in the old one, we know that the
1705 number of iterations is large enough, and we will transform it into the
1706 loop that will be split to loop_fn, the new one will be used for the
1707 remaining iterations. */
a509ebb5 1708
5f40b3cb
ZD
1709 type = TREE_TYPE (niter->niter);
1710 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1711 NULL_TREE);
1712 if (stmts)
1713 bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
1714
1715 many_iterations_cond =
a509ebb5
RL
1716 fold_build2 (GE_EXPR, boolean_type_node,
1717 nit, build_int_cst (type, MIN_PER_THREAD * n_threads));
5f40b3cb 1718 many_iterations_cond
a509ebb5
RL
1719 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1720 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1721 many_iterations_cond);
5f40b3cb 1722 many_iterations_cond
a509ebb5 1723 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb
ZD
1724 if (stmts)
1725 bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
1726 if (!is_gimple_condexpr (many_iterations_cond))
1727 {
1728 many_iterations_cond
a509ebb5
RL
1729 = force_gimple_operand (many_iterations_cond, &stmts,
1730 true, NULL_TREE);
5f40b3cb
ZD
1731 if (stmts)
1732 bsi_insert_on_edge_immediate (loop_preheader_edge (loop), stmts);
1733 }
1734
1735 initialize_original_copy_tables ();
1736
1737 /* We assume that the loop usually iterates a lot. */
1738 prob = 4 * REG_BR_PROB_BASE / 5;
1739 nloop = loop_version (loop, many_iterations_cond, NULL,
1740 prob, prob, REG_BR_PROB_BASE - prob, true);
1741 update_ssa (TODO_update_ssa);
1742 free_original_copy_tables ();
1743
1744 /* Base all the induction variables in LOOP on a single control one. */
a509ebb5 1745 canonicalize_loop_ivs (loop, reduction_list, nit);
5f40b3cb
ZD
1746
1747 /* Ensure that the exit condition is the first statement in the loop. */
a509ebb5
RL
1748 transform_to_exit_first_loop (loop, reduction_list, nit);
1749
1750
1751 /* Generate intializations for reductions. */
1752
1753 if (htab_elements (reduction_list) > 0)
1754 htab_traverse (reduction_list, initialize_reductions, loop);
5f40b3cb
ZD
1755
1756 /* Eliminate the references to local variables from the loop. */
1757 eliminate_local_variables (loop);
1758
1759 /* In the old loop, move all variables non-local to the loop to a structure
1760 and back, and create separate decls for the variables used in loop. */
a509ebb5 1761 separate_decls_in_loop (loop, reduction_list, &arg_struct, &new_arg_struct, &clsn_data);
5f40b3cb
ZD
1762
1763 /* Create the parallel constructs. */
1764 parallel_head = create_parallel_loop (loop, create_loop_fn (), arg_struct,
1765 new_arg_struct, n_threads);
a509ebb5
RL
1766 if (htab_elements (reduction_list) > 0)
1767 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
1768
1769 scev_reset ();
1770
1771 /* Cancel the loop (it is simpler to do it here rather than to teach the
1772 expander to do it). */
1773 cancel_loop_tree (loop);
1774
1775 /* Expand the parallel constructs. We do it directly here instead of running
1776 a separate expand_omp pass, since it is more efficient, and less likely to
1777 cause troubles with further analyses not being able to deal with the
1778 OMP trees. */
a509ebb5 1779
5f40b3cb
ZD
1780 omp_expand_local (parallel_head);
1781}
1782
1783/* Detect parallel loops and generate parallel code using libgomp
1784 primitives. Returns true if some loop was parallelized, false
1785 otherwise. */
1786
1787bool
1788parallelize_loops (void)
1789{
1790 unsigned n_threads = flag_tree_parallelize_loops;
1791 bool changed = false;
1792 struct loop *loop;
1793 struct tree_niter_desc niter_desc;
1794 loop_iterator li;
a509ebb5 1795 htab_t reduction_list;
5f40b3cb
ZD
1796
1797 /* Do not parallelize loops in the functions created by parallelization. */
1798 if (parallelized_function_p (cfun->decl))
1799 return false;
1800
a509ebb5
RL
1801 reduction_list = htab_create (10, reduction_info_hash,
1802 reduction_info_eq, free);
1803
5f40b3cb
ZD
1804 FOR_EACH_LOOP (li, loop, 0)
1805 {
a509ebb5 1806 htab_empty (reduction_list);
5f40b3cb
ZD
1807 if (/* Do not bother with loops in cold areas. */
1808 !maybe_hot_bb_p (loop->header)
1809 /* Or loops that roll too little. */
1810 || expected_loop_iterations (loop) <= n_threads
1811 /* And of course, the loop must be parallelizable. */
1812 || !can_duplicate_loop_p (loop)
a509ebb5 1813 || !loop_parallel_p (loop, reduction_list, &niter_desc))
5f40b3cb
ZD
1814 continue;
1815
1816 changed = true;
a509ebb5 1817 gen_parallel_loop (loop, reduction_list, n_threads, &niter_desc);
5f40b3cb
ZD
1818 verify_flow_info ();
1819 verify_dominators (CDI_DOMINATORS);
1820 verify_loop_structure ();
1821 verify_loop_closed_ssa ();
1822 }
1823
a509ebb5 1824 htab_delete (reduction_list);
5f40b3cb
ZD
1825 return changed;
1826}
1827
1828#include "gt-tree-parloops.h"