]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vectorizer.c
re PR target/37170 (gcc.dg/weak/weak-1.c)
[thirdparty/gcc.git] / gcc / tree-vectorizer.c
CommitLineData
79fe1b3b 1/* Loop Vectorization
fa10beec
RW
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software
3 Foundation, Inc.
79fe1b3b
DN
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
9dcd6f09 10Software Foundation; either version 3, or (at your option) any later
79fe1b3b
DN
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
9dcd6f09
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
79fe1b3b
DN
21
22/* Loop Vectorization Pass.
23
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
28
29 For example, the vectorizer transforms the following simple loop:
30
31 short a[N]; short b[N]; short c[N]; int i;
32
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
35 }
36
37 as if it was manually vectorized by rewriting the source code into:
38
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
43
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
49 }
50
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
55
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
6775f1f3
IR
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
79fe1b3b
DN
63
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
69
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
74
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
85
86 For example, say stmt S1 was vectorized into stmt VS1:
87
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
91
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
206048bd 94 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
79fe1b3b
DN
95 resulting sequence would be:
96
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
104
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
166cdb08 115 machine_mode (e.g, optab_handler (add_optab, V8HImode)->insn_code). If
79fe1b3b
DN
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
118
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
121*/
122
123#include "config.h"
124#include "system.h"
125#include "coretypes.h"
126#include "tm.h"
79fe1b3b
DN
127#include "ggc.h"
128#include "tree.h"
129#include "target.h"
79fe1b3b
DN
130#include "rtl.h"
131#include "basic-block.h"
132#include "diagnostic.h"
133#include "tree-flow.h"
134#include "tree-dump.h"
135#include "timevar.h"
136#include "cfgloop.h"
137#include "cfglayout.h"
138#include "expr.h"
89d67cca 139#include "recog.h"
79fe1b3b 140#include "optabs.h"
c12cc930 141#include "params.h"
a023975e 142#include "toplev.h"
79fe1b3b
DN
143#include "tree-chrec.h"
144#include "tree-data-ref.h"
145#include "tree-scalar-evolution.h"
7353a8c1 146#include "input.h"
726a989a 147#include "hashtab.h"
79fe1b3b
DN
148#include "tree-vectorizer.h"
149#include "tree-pass.h"
ad2dd72a 150#include "langhooks.h"
f88a8cfa 151
7353a8c1 152/*************************************************************************
f7064d11 153 General Vectorization Utilities
7353a8c1 154 *************************************************************************/
c866976a
LB
155
156/* vect_dump will be set to stderr or dump_file if exist. */
157FILE *vect_dump;
158
f7064d11
DN
159/* vect_verbosity_level set to an invalid value
160 to mark that it's uninitialized. */
161enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL;
c866976a 162
00518cb1
DP
163/* Loop location. */
164static LOC vect_loop_location;
90ff949f
DN
165
166/* Bitmap of virtual variables to be renamed. */
38635499 167bitmap vect_memsyms_to_rename;
726a989a
RB
168
169/* Vector mapping GIMPLE stmt to stmt_vec_info. */
170VEC(vec_void_p,heap) *stmt_vec_info_vec;
171
a023975e 172\f
f88a8cfa
DN
173/*************************************************************************
174 Simple Loop Peeling Utilities
175
176 Utilities to support loop peeling for vectorization purposes.
177 *************************************************************************/
a023975e
OG
178
179
a023975e
OG
180/* Renames the use *OP_P. */
181
182static void
183rename_use_op (use_operand_p op_p)
184{
84d65814 185 tree new_name;
a023975e
OG
186
187 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
188 return;
189
84d65814 190 new_name = get_current_def (USE_FROM_PTR (op_p));
a023975e
OG
191
192 /* Something defined outside of the loop. */
84d65814 193 if (!new_name)
a023975e
OG
194 return;
195
196 /* An ordinary ssa name defined in the loop. */
197
84d65814 198 SET_USE (op_p, new_name);
a023975e
OG
199}
200
201
202/* Renames the variables in basic block BB. */
203
f8bf9252 204void
a023975e
OG
205rename_variables_in_bb (basic_block bb)
206{
726a989a
RB
207 gimple_stmt_iterator gsi;
208 gimple stmt;
f47c96aa
AM
209 use_operand_p use_p;
210 ssa_op_iter iter;
a023975e
OG
211 edge e;
212 edge_iterator ei;
63dfe6ff 213 struct loop *loop = bb->loop_father;
a023975e 214
726a989a 215 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
a023975e 216 {
726a989a 217 stmt = gsi_stmt (gsi);
38635499 218 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
f47c96aa 219 rename_use_op (use_p);
a023975e
OG
220 }
221
222 FOR_EACH_EDGE (e, ei, bb->succs)
63dfe6ff
DN
223 {
224 if (!flow_bb_inside_loop_p (loop, e->dest))
225 continue;
726a989a
RB
226 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
227 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e));
63dfe6ff 228 }
a023975e
OG
229}
230
231
a023975e
OG
232/* Renames variables in new generated LOOP. */
233
dea61d92 234void
a023975e
OG
235rename_variables_in_loop (struct loop *loop)
236{
237 unsigned i;
238 basic_block *bbs;
239
240 bbs = get_loop_body (loop);
241
242 for (i = 0; i < loop->num_nodes; i++)
243 rename_variables_in_bb (bbs[i]);
244
245 free (bbs);
246}
247
248
63dfe6ff 249/* Update the PHI nodes of NEW_LOOP.
a023975e 250
63dfe6ff
DN
251 NEW_LOOP is a duplicate of ORIG_LOOP.
252 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
253 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
254 executes before it. */
a023975e
OG
255
256static void
63dfe6ff 257slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
f88a8cfa 258 struct loop *new_loop, bool after)
a023975e 259{
84d65814 260 tree new_ssa_name;
726a989a 261 gimple phi_new, phi_orig;
63dfe6ff
DN
262 tree def;
263 edge orig_loop_latch = loop_latch_edge (orig_loop);
264 edge orig_entry_e = loop_preheader_edge (orig_loop);
ac8f6c69 265 edge new_loop_exit_e = single_exit (new_loop);
63dfe6ff
DN
266 edge new_loop_entry_e = loop_preheader_edge (new_loop);
267 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
726a989a 268 gimple_stmt_iterator gsi_new, gsi_orig;
63dfe6ff
DN
269
270 /*
271 step 1. For each loop-header-phi:
272 Add the first phi argument for the phi in NEW_LOOP
273 (the one associated with the entry of NEW_LOOP)
274
275 step 2. For each loop-header-phi:
276 Add the second phi argument for the phi in NEW_LOOP
277 (the one associated with the latch of NEW_LOOP)
a023975e 278
63dfe6ff 279 step 3. Update the phis in the successor block of NEW_LOOP.
a023975e 280
63dfe6ff
DN
281 case 1: NEW_LOOP was placed before ORIG_LOOP:
282 The successor block of NEW_LOOP is the header of ORIG_LOOP.
283 Updating the phis in the successor block can therefore be done
284 along with the scanning of the loop header phis, because the
285 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
286 phi nodes, organized in the same order.
287
288 case 2: NEW_LOOP was placed after ORIG_LOOP:
289 The successor block of NEW_LOOP is the original exit block of
290 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
291 We postpone updating these phis to a later stage (when
292 loop guards are added).
293 */
294
295
296 /* Scan the phis in the headers of the old and new loops
297 (they are organized in exactly the same order). */
a023975e 298
726a989a
RB
299 for (gsi_new = gsi_start_phis (new_loop->header),
300 gsi_orig = gsi_start_phis (orig_loop->header);
301 !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig);
302 gsi_next (&gsi_new), gsi_next (&gsi_orig))
a023975e 303 {
726a989a
RB
304 phi_new = gsi_stmt (gsi_new);
305 phi_orig = gsi_stmt (gsi_orig);
306
63dfe6ff
DN
307 /* step 1. */
308 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
d2e398df 309 add_phi_arg (phi_new, def, new_loop_entry_e);
a023975e 310
63dfe6ff
DN
311 /* step 2. */
312 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
a023975e 313 if (TREE_CODE (def) != SSA_NAME)
63dfe6ff 314 continue;
a023975e 315
84d65814
DN
316 new_ssa_name = get_current_def (def);
317 if (!new_ssa_name)
ed3c16fb
DN
318 {
319 /* This only happens if there are no definitions
320 inside the loop. use the phi_result in this case. */
321 new_ssa_name = PHI_RESULT (phi_new);
322 }
a023975e
OG
323
324 /* An ordinary ssa name defined in the loop. */
d2e398df 325 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
a023975e 326
63dfe6ff
DN
327 /* step 3 (case 1). */
328 if (!after)
329 {
330 gcc_assert (new_loop_exit_e == orig_entry_e);
331 SET_PHI_ARG_DEF (phi_orig,
3a2f1f06 332 new_loop_exit_e->dest_idx,
63dfe6ff
DN
333 new_ssa_name);
334 }
a023975e
OG
335 }
336}
337
338
339/* Update PHI nodes for a guard of the LOOP.
340
63dfe6ff
DN
341 Input:
342 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
343 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
344 originates from the guard-bb, skips LOOP and reaches the (unique) exit
345 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
3ce66cf1
DN
346 We denote this bb NEW_MERGE_BB because before the guard code was added
347 it had a single predecessor (the LOOP header), and now it became a merge
63dfe6ff
DN
348 point of two paths - the path that ends with the LOOP exit-edge, and
349 the path that ends with GUARD_EDGE.
3ce66cf1
DN
350 - NEW_EXIT_BB: New basic block that is added by this function between LOOP
351 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis.
63dfe6ff 352
3ce66cf1
DN
353 ===> The CFG before the guard-code was added:
354 LOOP_header_bb:
355 loop_body
356 if (exit_loop) goto update_bb
357 else goto LOOP_header_bb
358 update_bb:
63dfe6ff 359
3ce66cf1
DN
360 ==> The CFG after the guard-code was added:
361 guard_bb:
362 if (LOOP_guard_condition) goto new_merge_bb
363 else goto LOOP_header_bb
63dfe6ff 364 LOOP_header_bb:
3ce66cf1
DN
365 loop_body
366 if (exit_loop_condition) goto new_merge_bb
367 else goto LOOP_header_bb
368 new_merge_bb:
369 goto update_bb
63dfe6ff
DN
370 update_bb:
371
3ce66cf1
DN
372 ==> The CFG after this function:
373 guard_bb:
374 if (LOOP_guard_condition) goto new_merge_bb
375 else goto LOOP_header_bb
63dfe6ff 376 LOOP_header_bb:
3ce66cf1
DN
377 loop_body
378 if (exit_loop_condition) goto new_exit_bb
379 else goto LOOP_header_bb
380 new_exit_bb:
63dfe6ff
DN
381 new_merge_bb:
382 goto update_bb
383 update_bb:
384
3ce66cf1
DN
385 This function:
386 1. creates and updates the relevant phi nodes to account for the new
387 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves:
388 1.1. Create phi nodes at NEW_MERGE_BB.
389 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
390 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
391 2. preserves loop-closed-ssa-form by creating the required phi nodes
392 at the exit of LOOP (i.e, in NEW_EXIT_BB).
393
394 There are two flavors to this function:
395
396 slpeel_update_phi_nodes_for_guard1:
397 Here the guard controls whether we enter or skip LOOP, where LOOP is a
398 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are
399 for variables that have phis in the loop header.
400
401 slpeel_update_phi_nodes_for_guard2:
402 Here the guard controls whether we enter or skip LOOP, where LOOP is an
403 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are
404 for variables that have phis in the loop exit.
405
406 I.E., the overall structure is:
407
408 loop1_preheader_bb:
fa10beec 409 guard1 (goto loop1/merge1_bb)
3ce66cf1
DN
410 loop1
411 loop1_exit_bb:
412 guard2 (goto merge1_bb/merge2_bb)
413 merge1_bb
414 loop2
415 loop2_exit_bb
416 merge2_bb
417 next_bb
418
419 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in
420 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars
421 that have phis in loop1->header).
422
423 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in
424 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars
425 that have phis in next_bb). It also adds some of these phis to
426 loop1_exit_bb.
427
428 slpeel_update_phi_nodes_for_guard1 is always called before
429 slpeel_update_phi_nodes_for_guard2. They are both needed in order
430 to create correct data-flow and loop-closed-ssa-form.
431
432 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables
433 that change between iterations of a loop (and therefore have a phi-node
434 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates
435 phis for variables that are used out of the loop (and therefore have
436 loop-closed exit phis). Some variables may be both updated between
437 iterations and used after the loop. This is why in loop1_exit_bb we
438 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1)
439 and exit phis (created by slpeel_update_phi_nodes_for_guard2).
440
441 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of
442 an original loop. i.e., we have:
443
444 orig_loop
445 guard_bb (goto LOOP/new_merge)
446 new_loop <-- LOOP
447 new_exit
448 new_merge
449 next_bb
450
451 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we
452 have:
453
454 new_loop
455 guard_bb (goto LOOP/new_merge)
456 orig_loop <-- LOOP
457 new_exit
458 new_merge
459 next_bb
460
84d65814
DN
461 The SSA names defined in the original loop have a current
462 reaching definition that that records the corresponding new
463 ssa-name used in the new duplicated loop copy.
63dfe6ff 464 */
a023975e 465
3ce66cf1
DN
466/* Function slpeel_update_phi_nodes_for_guard1
467
468 Input:
469 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
470 - DEFS - a bitmap of ssa names to mark new names for which we recorded
471 information.
472
473 In the context of the overall structure, we have:
474
475 loop1_preheader_bb:
fa10beec 476 guard1 (goto loop1/merge1_bb)
3ce66cf1
DN
477LOOP-> loop1
478 loop1_exit_bb:
479 guard2 (goto merge1_bb/merge2_bb)
480 merge1_bb
481 loop2
482 loop2_exit_bb
483 merge2_bb
484 next_bb
485
486 For each name updated between loop iterations (i.e - for each name that has
487 an entry (loop-header) phi in LOOP) we create a new phi in:
488 1. merge1_bb (to account for the edge from guard1)
489 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form)
490*/
491
a023975e 492static void
3ce66cf1
DN
493slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop,
494 bool is_new_loop, basic_block *new_exit_bb,
495 bitmap *defs)
a023975e 496{
726a989a
RB
497 gimple orig_phi, new_phi;
498 gimple update_phi, update_phi2;
63dfe6ff
DN
499 tree guard_arg, loop_arg;
500 basic_block new_merge_bb = guard_edge->dest;
3ce66cf1 501 edge e = EDGE_SUCC (new_merge_bb, 0);
63dfe6ff 502 basic_block update_bb = e->dest;
3ce66cf1
DN
503 basic_block orig_bb = loop->header;
504 edge new_exit_e;
505 tree current_new_name;
90ff949f 506 tree name;
726a989a 507 gimple_stmt_iterator gsi_orig, gsi_update;
3ce66cf1
DN
508
509 /* Create new bb between loop and new_merge_bb. */
ac8f6c69 510 *new_exit_bb = split_edge (single_exit (loop));
3ce66cf1
DN
511
512 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
63dfe6ff 513
726a989a
RB
514 for (gsi_orig = gsi_start_phis (orig_bb),
515 gsi_update = gsi_start_phis (update_bb);
516 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
517 gsi_next (&gsi_orig), gsi_next (&gsi_update))
63dfe6ff 518 {
726a989a
RB
519 orig_phi = gsi_stmt (gsi_orig);
520 update_phi = gsi_stmt (gsi_update);
521
90ff949f
DN
522 /* Virtual phi; Mark it for renaming. We actually want to call
523 mar_sym_for_renaming, but since all ssa renaming datastructures
fa10beec 524 are going to be freed before we get to call ssa_update, we just
90ff949f
DN
525 record this name for now in a bitmap, and will mark it for
526 renaming later. */
527 name = PHI_RESULT (orig_phi);
528 if (!is_gimple_reg (SSA_NAME_VAR (name)))
38635499 529 bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name)));
90ff949f 530
3ce66cf1
DN
531 /** 1. Handle new-merge-point phis **/
532
533 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
534 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
535 new_merge_bb);
536
537 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
538 of LOOP. Set the two phi args in NEW_PHI for these edges: */
539 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0));
540 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop));
541
542 add_phi_arg (new_phi, loop_arg, new_exit_e);
543 add_phi_arg (new_phi, guard_arg, guard_edge);
544
545 /* 1.3. Update phi in successor block. */
546 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
547 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
548 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
549 update_phi2 = new_phi;
550
551
552 /** 2. Handle loop-closed-ssa-form phis **/
553
38635499
DN
554 if (!is_gimple_reg (PHI_RESULT (orig_phi)))
555 continue;
556
3ce66cf1
DN
557 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
558 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
559 *new_exit_bb);
560
561 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
ac8f6c69 562 add_phi_arg (new_phi, loop_arg, single_exit (loop));
3ce66cf1
DN
563
564 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
565 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
566 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
567
84d65814 568 /* 2.4. Record the newly created name with set_current_def.
3ce66cf1 569 We want to find a name such that
84d65814
DN
570 name = get_current_def (orig_loop_name)
571 and to set its current definition as follows:
572 set_current_def (name, new_phi_name)
3ce66cf1
DN
573
574 If LOOP is a new loop then loop_arg is already the name we're
575 looking for. If LOOP is the original loop, then loop_arg is
576 the orig_loop_name and the relevant name is recorded in its
84d65814 577 current reaching definition. */
3ce66cf1
DN
578 if (is_new_loop)
579 current_new_name = loop_arg;
580 else
581 {
84d65814 582 current_new_name = get_current_def (loop_arg);
ed3c16fb
DN
583 /* current_def is not available only if the variable does not
584 change inside the loop, in which case we also don't care
585 about recording a current_def for it because we won't be
586 trying to create loop-exit-phis for it. */
587 if (!current_new_name)
588 continue;
3ce66cf1 589 }
84d65814 590 gcc_assert (get_current_def (current_new_name) == NULL_TREE);
3ce66cf1 591
84d65814 592 set_current_def (current_new_name, PHI_RESULT (new_phi));
3ce66cf1
DN
593 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name));
594 }
3ce66cf1
DN
595}
596
597
598/* Function slpeel_update_phi_nodes_for_guard2
599
600 Input:
601 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above.
602
603 In the context of the overall structure, we have:
604
605 loop1_preheader_bb:
fa10beec 606 guard1 (goto loop1/merge1_bb)
3ce66cf1
DN
607 loop1
608 loop1_exit_bb:
609 guard2 (goto merge1_bb/merge2_bb)
610 merge1_bb
611LOOP-> loop2
612 loop2_exit_bb
613 merge2_bb
614 next_bb
615
616 For each name used out side the loop (i.e - for each name that has an exit
617 phi in next_bb) we create a new phi in:
618 1. merge2_bb (to account for the edge from guard_bb)
619 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form)
620 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form),
621 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1).
622*/
623
624static void
625slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop,
626 bool is_new_loop, basic_block *new_exit_bb)
627{
726a989a
RB
628 gimple orig_phi, new_phi;
629 gimple update_phi, update_phi2;
3ce66cf1
DN
630 tree guard_arg, loop_arg;
631 basic_block new_merge_bb = guard_edge->dest;
632 edge e = EDGE_SUCC (new_merge_bb, 0);
633 basic_block update_bb = e->dest;
634 edge new_exit_e;
84d65814 635 tree orig_def, orig_def_new_name;
3ce66cf1
DN
636 tree new_name, new_name2;
637 tree arg;
726a989a 638 gimple_stmt_iterator gsi;
3ce66cf1
DN
639
640 /* Create new bb between loop and new_merge_bb. */
ac8f6c69 641 *new_exit_bb = split_edge (single_exit (loop));
3ce66cf1
DN
642
643 new_exit_e = EDGE_SUCC (*new_exit_bb, 0);
644
726a989a 645 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi))
3ce66cf1 646 {
726a989a 647 update_phi = gsi_stmt (gsi);
3ce66cf1
DN
648 orig_phi = update_phi;
649 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
5bb1823d
DN
650 /* This loop-closed-phi actually doesn't represent a use
651 out of the loop - the phi arg is a constant. */
652 if (TREE_CODE (orig_def) != SSA_NAME)
653 continue;
84d65814 654 orig_def_new_name = get_current_def (orig_def);
3ce66cf1
DN
655 arg = NULL_TREE;
656
657 /** 1. Handle new-merge-point phis **/
658
659 /* 1.1. Generate new phi node in NEW_MERGE_BB: */
63dfe6ff
DN
660 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
661 new_merge_bb);
a023975e 662
3ce66cf1 663 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
84d65814 664 of LOOP. Set the two PHI args in NEW_PHI for these edges: */
3ce66cf1
DN
665 new_name = orig_def;
666 new_name2 = NULL_TREE;
84d65814 667 if (orig_def_new_name)
63dfe6ff 668 {
84d65814
DN
669 new_name = orig_def_new_name;
670 /* Some variables have both loop-entry-phis and loop-exit-phis.
671 Such variables were given yet newer names by phis placed in
672 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e:
673 new_name2 = get_current_def (get_current_def (orig_name)). */
674 new_name2 = get_current_def (new_name);
63dfe6ff 675 }
3ce66cf1
DN
676
677 if (is_new_loop)
63dfe6ff 678 {
3ce66cf1
DN
679 guard_arg = orig_def;
680 loop_arg = new_name;
63dfe6ff 681 }
3ce66cf1
DN
682 else
683 {
684 guard_arg = new_name;
685 loop_arg = orig_def;
686 }
687 if (new_name2)
688 guard_arg = new_name2;
689
690 add_phi_arg (new_phi, loop_arg, new_exit_e);
d2e398df 691 add_phi_arg (new_phi, guard_arg, guard_edge);
63dfe6ff 692
3ce66cf1
DN
693 /* 1.3. Update phi in successor block. */
694 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def);
3a2f1f06 695 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi));
3ce66cf1
DN
696 update_phi2 = new_phi;
697
698
699 /** 2. Handle loop-closed-ssa-form phis **/
700
701 /* 2.1. Generate new phi node in NEW_EXIT_BB: */
702 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
703 *new_exit_bb);
704
705 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */
ac8f6c69 706 add_phi_arg (new_phi, loop_arg, single_exit (loop));
3ce66cf1
DN
707
708 /* 2.3. Update phi in successor of NEW_EXIT_BB: */
709 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg);
710 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi));
711
712
713 /** 3. Handle loop-closed-ssa-form phis for first loop **/
714
84d65814
DN
715 /* 3.1. Find the relevant names that need an exit-phi in
716 GUARD_BB, i.e. names for which
717 slpeel_update_phi_nodes_for_guard1 had not already created a
718 phi node. This is the case for names that are used outside
2228adb2 719 the loop (and therefore need an exit phi) but are not updated
84d65814
DN
720 across loop iterations (and therefore don't have a
721 loop-header-phi).
722
723 slpeel_update_phi_nodes_for_guard1 is responsible for
724 creating loop-exit phis in GUARD_BB for names that have a
725 loop-header-phi. When such a phi is created we also record
726 the new name in its current definition. If this new name
727 exists, then guard_arg was set to this new name (see 1.2
728 above). Therefore, if guard_arg is not this new name, this
729 is an indication that an exit-phi in GUARD_BB was not yet
730 created, so we take care of it here. */
3ce66cf1
DN
731 if (guard_arg == new_name2)
732 continue;
733 arg = guard_arg;
734
735 /* 3.2. Generate new phi node in GUARD_BB: */
736 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
737 guard_edge->src);
738
739 /* 3.3. GUARD_BB has one incoming edge: */
740 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1);
741 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0));
742
743 /* 3.4. Update phi in successor of GUARD_BB: */
744 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge)
745 == guard_arg);
746 SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi));
63dfe6ff 747 }
a023975e
OG
748}
749
750
751/* Make the LOOP iterate NITERS times. This is done by adding a new IV
335d3d54
DN
752 that starts at zero, increases by one and its limit is NITERS.
753
754 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
a023975e 755
f7064d11 756void
335d3d54 757slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
a023975e 758{
726a989a
RB
759 tree indx_before_incr, indx_after_incr;
760 gimple cond_stmt;
761 gimple orig_cond;
ac8f6c69 762 edge exit_edge = single_exit (loop);
726a989a
RB
763 gimple_stmt_iterator loop_cond_gsi;
764 gimple_stmt_iterator incr_gsi;
cce4ca55 765 bool insert_after;
618bb89c
DN
766 tree init = build_int_cst (TREE_TYPE (niters), 0);
767 tree step = build_int_cst (TREE_TYPE (niters), 1);
7353a8c1 768 LOC loop_loc;
726a989a 769 enum tree_code code;
a023975e 770
a023975e
OG
771 orig_cond = get_loop_exit_condition (loop);
772 gcc_assert (orig_cond);
726a989a 773 loop_cond_gsi = gsi_for_stmt (orig_cond);
cce4ca55 774
726a989a 775 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
618bb89c 776 create_iv (init, step, NULL_TREE, loop,
726a989a
RB
777 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
778
779 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
780 true, NULL_TREE, true,
781 GSI_SAME_STMT);
782 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE,
783 true, GSI_SAME_STMT);
a023975e 784
726a989a
RB
785 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
786 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE,
787 NULL_TREE);
a023975e 788
726a989a 789 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
a023975e
OG
790
791 /* Remove old loop exit test: */
726a989a 792 gsi_remove (&loop_cond_gsi, true);
a023975e 793
7353a8c1 794 loop_loc = find_loop_location (loop);
c866976a
LB
795 if (dump_file && (dump_flags & TDF_DETAILS))
796 {
797 if (loop_loc != UNKNOWN_LOC)
798 fprintf (dump_file, "\nloop at %s:%d: ",
799 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
726a989a 800 print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM);
c866976a 801 }
d6f6ef21
DN
802
803 loop->nb_iterations = niters;
a023975e
OG
804}
805
806
807/* Given LOOP this function generates a new copy of it and puts it
808 on E which is either the entry or exit of LOOP. */
809
dea61d92 810struct loop *
d73be268 811slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e)
a023975e
OG
812{
813 struct loop *new_loop;
814 basic_block *new_bbs, *bbs;
815 bool at_exit;
816 bool was_imm_dom;
817 basic_block exit_dest;
726a989a
RB
818 gimple phi;
819 tree phi_arg;
ac8f6c69 820 edge exit, new_exit;
726a989a 821 gimple_stmt_iterator gsi;
a023975e 822
ac8f6c69 823 at_exit = (e == single_exit (loop));
a023975e 824 if (!at_exit && e != loop_preheader_edge (loop))
ef302293 825 return NULL;
a023975e
OG
826
827 bbs = get_loop_body (loop);
828
829 /* Check whether duplication is possible. */
830 if (!can_copy_bbs_p (bbs, loop->num_nodes))
831 {
a023975e
OG
832 free (bbs);
833 return NULL;
834 }
835
836 /* Generate new loop structure. */
9ba025a2 837 new_loop = duplicate_loop (loop, loop_outer (loop));
a023975e
OG
838 if (!new_loop)
839 {
a023975e
OG
840 free (bbs);
841 return NULL;
842 }
843
ac8f6c69 844 exit_dest = single_exit (loop)->dest;
a023975e
OG
845 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
846 exit_dest) == loop->header ?
847 true : false);
848
5ed6ace5 849 new_bbs = XNEWVEC (basic_block, loop->num_nodes);
a023975e 850
ac8f6c69 851 exit = single_exit (loop);
70388d94 852 copy_bbs (bbs, loop->num_nodes, new_bbs,
ac8f6c69 853 &exit, 1, &new_exit, NULL,
b9a66240 854 e->src);
a023975e
OG
855
856 /* Duplicating phi args at exit bbs as coming
857 also from exit of duplicated loop. */
726a989a 858 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi))
a023975e 859 {
726a989a 860 phi = gsi_stmt (gsi);
ac8f6c69 861 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop));
a023975e
OG
862 if (phi_arg)
863 {
864 edge new_loop_exit_edge;
865
866 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
867 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
868 else
869 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
870
d2e398df 871 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
a023975e
OG
872 }
873 }
874
875 if (at_exit) /* Add the loop copy at exit. */
876 {
877 redirect_edge_and_branch_force (e, new_loop->header);
dea61d92 878 PENDING_STMT (e) = NULL;
a023975e
OG
879 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
880 if (was_imm_dom)
881 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
882 }
883 else /* Add the copy at entry. */
884 {
885 edge new_exit_e;
886 edge entry_e = loop_preheader_edge (loop);
887 basic_block preheader = entry_e->src;
888
889 if (!flow_bb_inside_loop_p (new_loop,
890 EDGE_SUCC (new_loop->header, 0)->dest))
891 new_exit_e = EDGE_SUCC (new_loop->header, 0);
892 else
893 new_exit_e = EDGE_SUCC (new_loop->header, 1);
894
895 redirect_edge_and_branch_force (new_exit_e, loop->header);
dea61d92 896 PENDING_STMT (new_exit_e) = NULL;
a023975e
OG
897 set_immediate_dominator (CDI_DOMINATORS, loop->header,
898 new_exit_e->src);
899
900 /* We have to add phi args to the loop->header here as coming
901 from new_exit_e edge. */
726a989a
RB
902 for (gsi = gsi_start_phis (loop->header);
903 !gsi_end_p (gsi);
904 gsi_next (&gsi))
a023975e 905 {
726a989a 906 phi = gsi_stmt (gsi);
a023975e
OG
907 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
908 if (phi_arg)
d2e398df 909 add_phi_arg (phi, phi_arg, new_exit_e);
a023975e
OG
910 }
911
912 redirect_edge_and_branch_force (entry_e, new_loop->header);
dea61d92 913 PENDING_STMT (entry_e) = NULL;
a023975e
OG
914 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
915 }
916
a023975e
OG
917 free (new_bbs);
918 free (bbs);
919
920 return new_loop;
921}
922
923
924/* Given the condition statement COND, put it as the last statement
925 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
926 Assumes that this is the single exit of the guarded loop.
927 Returns the skip edge. */
928
929static edge
63dfe6ff 930slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
749cc4b1 931 basic_block dom_bb)
a023975e 932{
726a989a 933 gimple_stmt_iterator gsi;
a023975e 934 edge new_e, enter_e;
726a989a
RB
935 gimple cond_stmt;
936 gimple_seq gimplify_stmt_list = NULL;
a023975e 937
3ce66cf1 938 enter_e = EDGE_SUCC (guard_bb, 0);
a023975e
OG
939 enter_e->flags &= ~EDGE_FALLTHRU;
940 enter_e->flags |= EDGE_FALSE_VALUE;
726a989a 941 gsi = gsi_last_bb (guard_bb);
a023975e 942
420da8ca
RG
943 cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE);
944 cond_stmt = gimple_build_cond (NE_EXPR,
945 cond, build_int_cst (TREE_TYPE (cond), 0),
726a989a 946 NULL_TREE, NULL_TREE);
749cc4b1 947 if (gimplify_stmt_list)
726a989a 948 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
749cc4b1 949
726a989a
RB
950 gsi = gsi_last_bb (guard_bb);
951 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
749cc4b1 952
3ce66cf1 953 /* Add new edge to connect guard block to the merge/loop-exit block. */
a023975e 954 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
63dfe6ff 955 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
a023975e
OG
956 return new_e;
957}
958
959
d6901754
DN
960/* This function verifies that the following restrictions apply to LOOP:
961 (1) it is innermost
962 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
963 (3) it is single entry, single exit
964 (4) its exit condition is the last stmt in the header
965 (5) E is the entry/exit edge of LOOP.
966 */
a023975e 967
f7064d11 968bool
22ea9ec0 969slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
a023975e 970{
ac8f6c69 971 edge exit_e = single_exit (loop);
a023975e 972 edge entry_e = loop_preheader_edge (loop);
726a989a
RB
973 gimple orig_cond = get_loop_exit_condition (loop);
974 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
a023975e 975
84d65814 976 if (need_ssa_update_p ())
d6901754 977 return false;
a023975e 978
d6901754
DN
979 if (loop->inner
980 /* All loops have an outer scope; the only case loop->outer is NULL is for
981 the function itself. */
9ba025a2 982 || !loop_outer (loop)
d6901754
DN
983 || loop->num_nodes != 2
984 || !empty_block_p (loop->latch)
ac8f6c69 985 || !single_exit (loop)
d6901754 986 /* Verify that new loop exit condition can be trivially modified. */
726a989a 987 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
d6901754
DN
988 || (e != exit_e && e != entry_e))
989 return false;
a023975e
OG
990
991 return true;
992}
993
1d8a9009 994#ifdef ENABLE_CHECKING
f7064d11 995void
63dfe6ff
DN
996slpeel_verify_cfg_after_peeling (struct loop *first_loop,
997 struct loop *second_loop)
998{
ac8f6c69 999 basic_block loop1_exit_bb = single_exit (first_loop)->dest;
70388d94 1000 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src;
63dfe6ff
DN
1001 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
1002
1003 /* A guard that controls whether the second_loop is to be executed or skipped
fa10beec 1004 is placed in first_loop->exit. first_loop->exit therefore has two
63dfe6ff
DN
1005 successors - one is the preheader of second_loop, and the other is a bb
1006 after second_loop.
1007 */
1008 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
a023975e 1009
fa10beec 1010 /* 1. Verify that one of the successors of first_loop->exit is the preheader
63dfe6ff 1011 of second_loop. */
a023975e 1012
0fa2e4df 1013 /* The preheader of new_loop is expected to have two predecessors:
63dfe6ff
DN
1014 first_loop->exit and the block that precedes first_loop. */
1015
1016 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
1017 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
1018 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
1019 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
1020 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
1021
fa10beec 1022 /* Verify that the other successor of first_loop->exit is after the
63dfe6ff
DN
1023 second_loop. */
1024 /* TODO */
1025}
1d8a9009 1026#endif
a023975e 1027
749cc4b1
HJ
1028/* If the run time cost model check determines that vectorization is
1029 not profitable and hence scalar loop should be generated then set
1030 FIRST_NITERS to prologue peeled iterations. This will allow all the
1031 iterations to be executed in the prologue peeled scalar loop. */
1032
1033void
1034set_prologue_iterations (basic_block bb_before_first_loop,
1035 tree first_niters,
1036 struct loop *loop,
1037 unsigned int th)
1038{
1039 edge e;
1040 basic_block cond_bb, then_bb;
726a989a
RB
1041 tree var, prologue_after_cost_adjust_name;
1042 gimple_stmt_iterator gsi;
1043 gimple newphi;
749cc4b1 1044 edge e_true, e_false, e_fallthru;
726a989a
RB
1045 gimple cond_stmt;
1046 gimple_seq gimplify_stmt_list = NULL, stmts = NULL;
749cc4b1
HJ
1047 tree cost_pre_condition = NULL_TREE;
1048 tree scalar_loop_iters =
4f1f33aa 1049 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop)));
749cc4b1
HJ
1050
1051 e = single_pred_edge (bb_before_first_loop);
1052 cond_bb = split_edge(e);
1053
1054 e = single_pred_edge (bb_before_first_loop);
1055 then_bb = split_edge(e);
1056 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb);
1057
1058 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop,
1059 EDGE_FALSE_VALUE);
1060 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb);
1061
1062 e_true = EDGE_PRED (then_bb, 0);
1063 e_true->flags &= ~EDGE_FALLTHRU;
1064 e_true->flags |= EDGE_TRUE_VALUE;
1065
1066 e_fallthru = EDGE_SUCC (then_bb, 0);
1067
1068 cost_pre_condition =
1069 build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1070 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
1071 cost_pre_condition =
1072 force_gimple_operand (cost_pre_condition, &gimplify_stmt_list,
1073 true, NULL_TREE);
726a989a 1074 cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition,
420da8ca
RG
1075 build_int_cst (TREE_TYPE (cost_pre_condition),
1076 0), NULL_TREE, NULL_TREE);
749cc4b1 1077
726a989a 1078 gsi = gsi_last_bb (cond_bb);
749cc4b1 1079 if (gimplify_stmt_list)
726a989a 1080 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
749cc4b1 1081
726a989a
RB
1082 gsi = gsi_last_bb (cond_bb);
1083 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
749cc4b1
HJ
1084
1085 var = create_tmp_var (TREE_TYPE (scalar_loop_iters),
1086 "prologue_after_cost_adjust");
1087 add_referenced_var (var);
1088 prologue_after_cost_adjust_name =
726a989a 1089 force_gimple_operand (scalar_loop_iters, &stmts, false, var);
749cc4b1 1090
726a989a
RB
1091 gsi = gsi_last_bb (then_bb);
1092 if (stmts)
1093 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
749cc4b1
HJ
1094
1095 newphi = create_phi_node (var, bb_before_first_loop);
1096 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru);
1097 add_phi_arg (newphi, first_niters, e_false);
1098
1099 first_niters = PHI_RESULT (newphi);
1100}
1101
1102
63dfe6ff 1103/* Function slpeel_tree_peel_loop_to_edge.
a023975e 1104
63dfe6ff
DN
1105 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
1106 that is placed on the entry (exit) edge E of LOOP. After this transformation
1107 we have two loops one after the other - first-loop iterates FIRST_NITERS
1108 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
749cc4b1
HJ
1109 If the cost model indicates that it is profitable to emit a scalar
1110 loop instead of the vector one, then the prolog (epilog) loop will iterate
1111 for the entire unchanged scalar iterations of the loop.
a023975e 1112
63dfe6ff
DN
1113 Input:
1114 - LOOP: the loop to be peeled.
1115 - E: the exit or entry edge of LOOP.
1116 If it is the entry edge, we peel the first iterations of LOOP. In this
1117 case first-loop is LOOP, and second-loop is the newly created loop.
1118 If it is the exit edge, we peel the last iterations of LOOP. In this
1119 case, first-loop is the newly created loop, and second-loop is LOOP.
1120 - NITERS: the number of iterations that LOOP iterates.
1121 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
e7a531ae 1122 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
63dfe6ff
DN
1123 for updating the loop bound of the first-loop to FIRST_NITERS. If it
1124 is false, the caller of this function may want to take care of this
e7a531ae 1125 (this can be useful if we don't want new stmts added to first-loop).
749cc4b1 1126 - TH: cost model profitability threshold of iterations for vectorization.
fa10beec 1127 - CHECK_PROFITABILITY: specify whether cost model check has not occurred
749cc4b1
HJ
1128 during versioning and hence needs to occur during
1129 prologue generation or whether cost model check
fa10beec 1130 has not occurred during prologue generation and hence
749cc4b1
HJ
1131 needs to occur during epilogue generation.
1132
a023975e 1133
63dfe6ff
DN
1134 Output:
1135 The function returns a pointer to the new loop-copy, or NULL if it failed
e7a531ae 1136 to perform the transformation.
63dfe6ff
DN
1137
1138 The function generates two if-then-else guards: one before the first loop,
1139 and the other before the second loop:
1140 The first guard is:
1141 if (FIRST_NITERS == 0) then skip the first loop,
1142 and go directly to the second loop.
1143 The second guard is:
1144 if (FIRST_NITERS == NITERS) then skip the second loop.
1145
1146 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
1147 FORNOW the resulting code will not be in loop-closed-ssa form.
1148*/
a023975e 1149
a023975e 1150struct loop*
d73be268 1151slpeel_tree_peel_loop_to_edge (struct loop *loop,
f88a8cfa 1152 edge e, tree first_niters,
acdc40df 1153 tree niters, bool update_first_loop_count,
749cc4b1 1154 unsigned int th, bool check_profitability)
a023975e
OG
1155{
1156 struct loop *new_loop = NULL, *first_loop, *second_loop;
1157 edge skip_e;
749cc4b1 1158 tree pre_condition = NULL_TREE;
a023975e 1159 bitmap definitions;
63dfe6ff
DN
1160 basic_block bb_before_second_loop, bb_after_second_loop;
1161 basic_block bb_before_first_loop;
1162 basic_block bb_between_loops;
3ce66cf1 1163 basic_block new_exit_bb;
ac8f6c69 1164 edge exit_e = single_exit (loop);
7353a8c1 1165 LOC loop_loc;
749cc4b1 1166 tree cost_pre_condition = NULL_TREE;
63dfe6ff 1167
d6901754
DN
1168 if (!slpeel_can_duplicate_loop_p (loop, e))
1169 return NULL;
63dfe6ff
DN
1170
1171 /* We have to initialize cfg_hooks. Then, when calling
a023975e 1172 cfg_hooks->split_edge, the function tree_split_edge
63dfe6ff 1173 is actually called and, when calling cfg_hooks->duplicate_block,
a023975e 1174 the function tree_duplicate_bb is called. */
726a989a 1175 gimple_register_cfg_hooks ();
a023975e 1176
63dfe6ff
DN
1177
1178 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
1179 Resulting CFG would be:
1180
1181 first_loop:
1182 do {
1183 } while ...
1184
1185 second_loop:
1186 do {
1187 } while ...
1188
1189 orig_exit_bb:
1190 */
1191
d73be268 1192 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e)))
a023975e 1193 {
7353a8c1 1194 loop_loc = find_loop_location (loop);
c866976a
LB
1195 if (dump_file && (dump_flags & TDF_DETAILS))
1196 {
1197 if (loop_loc != UNKNOWN_LOC)
1198 fprintf (dump_file, "\n%s:%d: note: ",
1199 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
1200 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1201 }
a023975e
OG
1202 return NULL;
1203 }
63dfe6ff 1204
a023975e
OG
1205 if (e == exit_e)
1206 {
63dfe6ff 1207 /* NEW_LOOP was placed after LOOP. */
a023975e
OG
1208 first_loop = loop;
1209 second_loop = new_loop;
1210 }
63dfe6ff 1211 else
a023975e 1212 {
63dfe6ff 1213 /* NEW_LOOP was placed before LOOP. */
a023975e
OG
1214 first_loop = new_loop;
1215 second_loop = loop;
1216 }
1217
84d65814 1218 definitions = ssa_names_to_replace ();
63dfe6ff
DN
1219 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1220 rename_variables_in_loop (new_loop);
a023975e 1221
a023975e 1222
749cc4b1 1223 /* 2. Add the guard code in one of the following ways:
63dfe6ff 1224
749cc4b1 1225 2.a Add the guard that controls whether the first loop is executed.
fa10beec 1226 This occurs when this function is invoked for prologue or epilogue
749cc4b1 1227 generation and when the cost model check can be done at compile time.
63dfe6ff 1228
749cc4b1 1229 Resulting CFG would be:
63dfe6ff 1230
749cc4b1
HJ
1231 bb_before_first_loop:
1232 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1233 GOTO first-loop
a023975e 1234
749cc4b1
HJ
1235 first_loop:
1236 do {
1237 } while ...
a023975e 1238
749cc4b1
HJ
1239 bb_before_second_loop:
1240
1241 second_loop:
1242 do {
1243 } while ...
1244
1245 orig_exit_bb:
1246
1247 2.b Add the cost model check that allows the prologue
1248 to iterate for the entire unchanged scalar
1249 iterations of the loop in the event that the cost
1250 model indicates that the scalar loop is more
1251 profitable than the vector one. This occurs when
1252 this function is invoked for prologue generation
1253 and the cost model check needs to be done at run
1254 time.
1255
1256 Resulting CFG after prologue peeling would be:
1257
1258 if (scalar_loop_iterations <= th)
1259 FIRST_NITERS = scalar_loop_iterations
1260
1261 bb_before_first_loop:
1262 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1263 GOTO first-loop
1264
1265 first_loop:
1266 do {
1267 } while ...
1268
1269 bb_before_second_loop:
1270
1271 second_loop:
1272 do {
1273 } while ...
1274
1275 orig_exit_bb:
1276
1277 2.c Add the cost model check that allows the epilogue
1278 to iterate for the entire unchanged scalar
1279 iterations of the loop in the event that the cost
1280 model indicates that the scalar loop is more
1281 profitable than the vector one. This occurs when
1282 this function is invoked for epilogue generation
1283 and the cost model check needs to be done at run
1284 time.
1285
1286 Resulting CFG after prologue peeling would be:
1287
1288 bb_before_first_loop:
1289 if ((scalar_loop_iterations <= th)
1290 ||
1291 FIRST_NITERS == 0) GOTO bb_before_second_loop
1292 GOTO first-loop
1293
1294 first_loop:
1295 do {
1296 } while ...
1297
1298 bb_before_second_loop:
1299
1300 second_loop:
1301 do {
1302 } while ...
1303
1304 orig_exit_bb:
1305 */
a023975e 1306
63dfe6ff 1307 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
ac8f6c69 1308 bb_before_second_loop = split_edge (single_exit (first_loop));
63dfe6ff 1309
749cc4b1
HJ
1310 /* Epilogue peeling. */
1311 if (!update_first_loop_count)
1312 {
1313 pre_condition =
1314 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1315 build_int_cst (TREE_TYPE (first_niters), 0));
1316 if (check_profitability)
1317 {
4f1f33aa
JJ
1318 tree scalar_loop_iters
1319 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED
1320 (loop_vec_info_for_loop (loop)));
1321 cost_pre_condition =
749cc4b1
HJ
1322 build2 (LE_EXPR, boolean_type_node, scalar_loop_iters,
1323 build_int_cst (TREE_TYPE (scalar_loop_iters), th));
4f1f33aa 1324
749cc4b1
HJ
1325 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
1326 cost_pre_condition, pre_condition);
1327 }
1328 }
1329
1330 /* Prologue peeling. */
1331 else
1332 {
1333 if (check_profitability)
1334 set_prologue_iterations (bb_before_first_loop, first_niters,
1335 loop, th);
1336
1337 pre_condition =
1338 fold_build2 (LE_EXPR, boolean_type_node, first_niters,
1339 build_int_cst (TREE_TYPE (first_niters), 0));
1340 }
acdc40df 1341
63dfe6ff
DN
1342 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1343 bb_before_second_loop, bb_before_first_loop);
3ce66cf1
DN
1344 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop,
1345 first_loop == new_loop,
1346 &new_exit_bb, &definitions);
63dfe6ff
DN
1347
1348
1349 /* 3. Add the guard that controls whether the second loop is executed.
1350 Resulting CFG would be:
79fe1b3b 1351
63dfe6ff
DN
1352 bb_before_first_loop:
1353 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1354 GOTO first-loop
a023975e 1355
63dfe6ff
DN
1356 first_loop:
1357 do {
1358 } while ...
a023975e 1359
63dfe6ff
DN
1360 bb_between_loops:
1361 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1362 GOTO bb_before_second_loop
a023975e 1363
63dfe6ff 1364 bb_before_second_loop:
a023975e 1365
63dfe6ff
DN
1366 second_loop:
1367 do {
1368 } while ...
a023975e 1369
63dfe6ff 1370 bb_after_second_loop:
a023975e 1371
63dfe6ff
DN
1372 orig_exit_bb:
1373 */
1374
3ce66cf1 1375 bb_between_loops = new_exit_bb;
ac8f6c69 1376 bb_after_second_loop = split_edge (single_exit (second_loop));
a023975e 1377
5f55a1ba 1378 pre_condition =
987b67bc 1379 fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
63dfe6ff
DN
1380 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1381 bb_after_second_loop, bb_before_first_loop);
3ce66cf1
DN
1382 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop,
1383 second_loop == new_loop, &new_exit_bb);
a023975e 1384
63dfe6ff
DN
1385 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1386 */
1387 if (update_first_loop_count)
1388 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
a023975e 1389
8bdbfff5 1390 BITMAP_FREE (definitions);
84d65814 1391 delete_update_ssa ();
63dfe6ff 1392
a023975e
OG
1393 return new_loop;
1394}
1395
7353a8c1
LB
1396/* Function vect_get_loop_location.
1397
1398 Extract the location of the loop in the source code.
1399 If the loop is not well formed for vectorization, an estimated
1400 location is calculated.
1401 Return the loop location if succeed and NULL if not. */
1402
f7064d11 1403LOC
7353a8c1
LB
1404find_loop_location (struct loop *loop)
1405{
726a989a 1406 gimple stmt = NULL;
7353a8c1 1407 basic_block bb;
726a989a 1408 gimple_stmt_iterator si;
7353a8c1
LB
1409
1410 if (!loop)
1411 return UNKNOWN_LOC;
1412
726a989a 1413 stmt = get_loop_exit_condition (loop);
7353a8c1 1414
726a989a
RB
1415 if (stmt && gimple_location (stmt) != UNKNOWN_LOC)
1416 return gimple_location (stmt);
7353a8c1
LB
1417
1418 /* If we got here the loop is probably not "well formed",
1419 try to estimate the loop location */
1420
1421 if (!loop->header)
1422 return UNKNOWN_LOC;
1423
1424 bb = loop->header;
1425
726a989a 1426 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
7353a8c1 1427 {
726a989a
RB
1428 stmt = gsi_stmt (si);
1429 if (gimple_location (stmt) != UNKNOWN_LOC)
1430 return gimple_location (stmt);
7353a8c1
LB
1431 }
1432
1433 return UNKNOWN_LOC;
1434}
1435
1436
c866976a
LB
1437/*************************************************************************
1438 Vectorization Debug Information.
1439 *************************************************************************/
1440
1441/* Function vect_set_verbosity_level.
1442
1443 Called from toplev.c upon detection of the
1444 -ftree-vectorizer-verbose=N option. */
1445
1446void
1447vect_set_verbosity_level (const char *val)
1448{
1449 unsigned int vl;
1450
1451 vl = atoi (val);
1452 if (vl < MAX_VERBOSITY_LEVEL)
1453 vect_verbosity_level = vl;
1454 else
1455 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1;
1456}
1457
1458
1459/* Function vect_set_dump_settings.
1460
1461 Fix the verbosity level of the vectorizer if the
1462 requested level was not set explicitly using the flag
1463 -ftree-vectorizer-verbose=N.
1464 Decide where to print the debugging information (dump_file/stderr).
1465 If the user defined the verbosity level, but there is no dump file,
1466 print to stderr, otherwise print to the dump file. */
1467
1468static void
1469vect_set_dump_settings (void)
1470{
1471 vect_dump = dump_file;
1472
1473 /* Check if the verbosity level was defined by the user: */
1474 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL)
1475 {
1476 /* If there is no dump file, print to stderr. */
1477 if (!dump_file)
1478 vect_dump = stderr;
1479 return;
1480 }
1481
1482 /* User didn't specify verbosity level: */
8ca3515f 1483 if (dump_file && (dump_flags & TDF_DETAILS))
c866976a 1484 vect_verbosity_level = REPORT_DETAILS;
8ca3515f 1485 else if (dump_file && (dump_flags & TDF_STATS))
c866976a
LB
1486 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS;
1487 else
1488 vect_verbosity_level = REPORT_NONE;
8ca3515f
DN
1489
1490 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE);
c866976a
LB
1491}
1492
1493
1494/* Function debug_loop_details.
1495
1496 For vectorization debug dumps. */
1497
f7064d11 1498bool
00518cb1 1499vect_print_dump_info (enum verbosity_levels vl)
c866976a
LB
1500{
1501 if (vl > vect_verbosity_level)
1502 return false;
1503
0be79f24
DN
1504 if (!current_function_decl || !vect_dump)
1505 return false;
1506
00518cb1 1507 if (vect_loop_location == UNKNOWN_LOC)
c866976a 1508 fprintf (vect_dump, "\n%s:%d: note: ",
5fbd2051
MS
1509 DECL_SOURCE_FILE (current_function_decl),
1510 DECL_SOURCE_LINE (current_function_decl));
c866976a 1511 else
00518cb1
DP
1512 fprintf (vect_dump, "\n%s:%d: note: ",
1513 LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location));
c866976a 1514
c866976a
LB
1515 return true;
1516}
1517
1518
f88a8cfa
DN
1519/*************************************************************************
1520 Vectorization Utilities.
1521 *************************************************************************/
1522
79fe1b3b
DN
1523/* Function new_stmt_vec_info.
1524
1525 Create and initialize a new stmt_vec_info struct for STMT. */
1526
1527stmt_vec_info
726a989a 1528new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo)
79fe1b3b
DN
1529{
1530 stmt_vec_info res;
1531 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1532
1533 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1534 STMT_VINFO_STMT (res) = stmt;
ef302293 1535 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
89d67cca
DN
1536 STMT_VINFO_RELEVANT (res) = 0;
1537 STMT_VINFO_LIVE_P (res) = false;
79fe1b3b
DN
1538 STMT_VINFO_VECTYPE (res) = NULL;
1539 STMT_VINFO_VEC_STMT (res) = NULL;
20f06221
DN
1540 STMT_VINFO_IN_PATTERN_P (res) = false;
1541 STMT_VINFO_RELATED_STMT (res) = NULL;
79fe1b3b 1542 STMT_VINFO_DATA_REF (res) = NULL;
468c2ac0
DN
1543
1544 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL;
1545 STMT_VINFO_DR_OFFSET (res) = NULL;
1546 STMT_VINFO_DR_INIT (res) = NULL;
1547 STMT_VINFO_DR_STEP (res) = NULL;
1548 STMT_VINFO_DR_ALIGNED_TO (res) = NULL;
1549
726a989a
RB
1550 if (gimple_code (stmt) == GIMPLE_PHI
1551 && is_loop_header_bb_p (gimple_bb (stmt)))
88088c03
DN
1552 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type;
1553 else
1554 STMT_VINFO_DEF_TYPE (res) = vect_loop_def;
61d3cdbb 1555 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5);
792ed98b
HJ
1556 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0;
1557 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0;
805e2059 1558 STMT_SLP_TYPE (res) = 0;
726a989a
RB
1559 DR_GROUP_FIRST_DR (res) = NULL;
1560 DR_GROUP_NEXT_DR (res) = NULL;
98b44b0e
IR
1561 DR_GROUP_SIZE (res) = 0;
1562 DR_GROUP_STORE_COUNT (res) = 0;
1563 DR_GROUP_GAP (res) = 0;
726a989a 1564 DR_GROUP_SAME_DR_STMT (res) = NULL;
6004caaf 1565 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false;
79fe1b3b
DN
1566
1567 return res;
1568}
1569
726a989a
RB
1570/* Create a hash table for stmt_vec_info. */
1571
1572void
1573init_stmt_vec_info_vec (void)
1574{
1575 gcc_assert (!stmt_vec_info_vec);
1576 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50);
1577}
1578
1579/* Free hash table for stmt_vec_info. */
1580
1581void
1582free_stmt_vec_info_vec (void)
1583{
1584 gcc_assert (stmt_vec_info_vec);
1585 VEC_free (vec_void_p, heap, stmt_vec_info_vec);
1586}
79fe1b3b 1587
62878103
VK
1588/* Free stmt vectorization related info. */
1589
1590void
726a989a 1591free_stmt_vec_info (gimple stmt)
62878103
VK
1592{
1593 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1594
1595 if (!stmt_info)
1596 return;
1597
1598 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
726a989a 1599 set_vinfo_for_stmt (stmt, NULL);
62878103 1600 free (stmt_info);
62878103
VK
1601}
1602
1603
d29de1bf
DN
1604/* Function bb_in_loop_p
1605
1606 Used as predicate for dfs order traversal of the loop bbs. */
1607
1608static bool
1609bb_in_loop_p (const_basic_block bb, const void *data)
1610{
586de218 1611 const struct loop *const loop = (const struct loop *)data;
d29de1bf
DN
1612 if (flow_bb_inside_loop_p (loop, bb))
1613 return true;
1614 return false;
1615}
1616
1617
79fe1b3b
DN
1618/* Function new_loop_vec_info.
1619
1620 Create and initialize a new loop_vec_info struct for LOOP, as well as
1621 stmt_vec_info structs for all the stmts in LOOP. */
1622
1623loop_vec_info
1624new_loop_vec_info (struct loop *loop)
1625{
1626 loop_vec_info res;
1627 basic_block *bbs;
726a989a 1628 gimple_stmt_iterator si;
d29de1bf 1629 unsigned int i, nbbs;
79fe1b3b
DN
1630
1631 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
d29de1bf 1632 LOOP_VINFO_LOOP (res) = loop;
79fe1b3b
DN
1633
1634 bbs = get_loop_body (loop);
1635
d29de1bf 1636 /* Create/Update stmt_info for all stmts in the loop. */
79fe1b3b
DN
1637 for (i = 0; i < loop->num_nodes; i++)
1638 {
1639 basic_block bb = bbs[i];
88088c03 1640
d29de1bf
DN
1641 /* BBs in a nested inner-loop will have been already processed (because
1642 we will have called vect_analyze_loop_form for any nested inner-loop).
1643 Therefore, for stmts in an inner-loop we just want to update the
1644 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new
1645 loop_info of the outer-loop we are currently considering to vectorize
1646 (instead of the loop_info of the inner-loop).
1647 For stmts in other BBs we need to create a stmt_info from scratch. */
1648 if (bb->loop_father != loop)
79fe1b3b 1649 {
d29de1bf
DN
1650 /* Inner-loop bb. */
1651 gcc_assert (loop->inner && bb->loop_father == loop->inner);
726a989a 1652 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
d29de1bf 1653 {
726a989a 1654 gimple phi = gsi_stmt (si);
d29de1bf 1655 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
726a989a
RB
1656 loop_vec_info inner_loop_vinfo =
1657 STMT_VINFO_LOOP_VINFO (stmt_info);
d29de1bf
DN
1658 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
1659 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
1660 }
726a989a 1661 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
d29de1bf 1662 {
726a989a 1663 gimple stmt = gsi_stmt (si);
d29de1bf 1664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
726a989a
RB
1665 loop_vec_info inner_loop_vinfo =
1666 STMT_VINFO_LOOP_VINFO (stmt_info);
d29de1bf
DN
1667 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo));
1668 STMT_VINFO_LOOP_VINFO (stmt_info) = res;
1669 }
1670 }
1671 else
1672 {
1673 /* bb in current nest. */
726a989a 1674 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
d29de1bf 1675 {
726a989a
RB
1676 gimple phi = gsi_stmt (si);
1677 gimple_set_uid (phi, 0);
1678 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res));
d29de1bf 1679 }
79fe1b3b 1680
726a989a 1681 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
d29de1bf 1682 {
726a989a
RB
1683 gimple stmt = gsi_stmt (si);
1684 gimple_set_uid (stmt, 0);
1685 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
d29de1bf 1686 }
79fe1b3b
DN
1687 }
1688 }
1689
d29de1bf
DN
1690 /* CHECKME: We want to visit all BBs before their successors (except for
1691 latch blocks, for which this assertion wouldn't hold). In the simple
1692 case of the loop forms we allow, a dfs order of the BBs would the same
1693 as reversed postorder traversal, so we are safe. */
1694
1695 free (bbs);
1696 bbs = XCNEWVEC (basic_block, loop->num_nodes);
1697 nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1698 bbs, loop->num_nodes, loop);
1699 gcc_assert (nbbs == loop->num_nodes);
1700
79fe1b3b 1701 LOOP_VINFO_BBS (res) = bbs;
a023975e 1702 LOOP_VINFO_NITERS (res) = NULL;
749cc4b1 1703 LOOP_VINFO_NITERS_UNCHANGED (res) = NULL;
3a70f3ef 1704 LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0;
79fe1b3b 1705 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
5f55a1ba 1706 LOOP_PEELING_FOR_ALIGNMENT (res) = 0;
79fe1b3b 1707 LOOP_VINFO_VECT_FACTOR (res) = 0;
ebf78a47
SP
1708 LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10);
1709 LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10);
0dc0a70b 1710 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
bc1edb77 1711 LOOP_VINFO_MAY_MISALIGN_STMTS (res) =
726a989a
RB
1712 VEC_alloc (gimple, heap,
1713 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS));
bc1edb77 1714 LOOP_VINFO_MAY_ALIAS_DDRS (res) =
726a989a
RB
1715 VEC_alloc (ddr_p, heap,
1716 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
1717 LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10);
805e2059
IR
1718 LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10);
1719 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1;
a023975e 1720
79fe1b3b
DN
1721 return res;
1722}
1723
1724
1725/* Function destroy_loop_vec_info.
1726
1727 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1728 stmts in the loop. */
1729
1730void
d29de1bf 1731destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts)
79fe1b3b
DN
1732{
1733 struct loop *loop;
1734 basic_block *bbs;
1735 int nbbs;
726a989a 1736 gimple_stmt_iterator si;
79fe1b3b 1737 int j;
805e2059
IR
1738 VEC (slp_instance, heap) *slp_instances;
1739 slp_instance instance;
79fe1b3b
DN
1740
1741 if (!loop_vinfo)
1742 return;
1743
1744 loop = LOOP_VINFO_LOOP (loop_vinfo);
1745
1746 bbs = LOOP_VINFO_BBS (loop_vinfo);
1747 nbbs = loop->num_nodes;
1748
d29de1bf
DN
1749 if (!clean_stmts)
1750 {
1751 free (LOOP_VINFO_BBS (loop_vinfo));
1752 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1753 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
726a989a 1754 VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
d29de1bf
DN
1755
1756 free (loop_vinfo);
1757 loop->aux = NULL;
1758 return;
1759 }
1760
79fe1b3b
DN
1761 for (j = 0; j < nbbs; j++)
1762 {
1763 basic_block bb = bbs[j];
88088c03 1764
726a989a
RB
1765 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1766 free_stmt_vec_info (gsi_stmt (si));
88088c03 1767
726a989a 1768 for (si = gsi_start_bb (bb); !gsi_end_p (si); )
79fe1b3b 1769 {
726a989a 1770 gimple stmt = gsi_stmt (si);
79fe1b3b 1771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
bb748329
DN
1772
1773 if (stmt_info)
1774 {
a8b28492
DN
1775 /* Check if this is a "pattern stmt" (introduced by the
1776 vectorizer during the pattern recognition pass). */
1777 bool remove_stmt_p = false;
726a989a 1778 gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
a8b28492
DN
1779 if (orig_stmt)
1780 {
1781 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
1782 if (orig_stmt_info
1783 && STMT_VINFO_IN_PATTERN_P (orig_stmt_info))
1784 remove_stmt_p = true;
1785 }
1786
1787 /* Free stmt_vec_info. */
62878103 1788 free_stmt_vec_info (stmt);
a8b28492
DN
1789
1790 /* Remove dead "pattern stmts". */
1791 if (remove_stmt_p)
726a989a 1792 gsi_remove (&si, true);
bb748329 1793 }
726a989a 1794 gsi_next (&si);
79fe1b3b
DN
1795 }
1796 }
1797
1798 free (LOOP_VINFO_BBS (loop_vinfo));
ebf78a47
SP
1799 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo));
1800 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo));
726a989a 1801 VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo));
bc1edb77 1802 VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo));
805e2059
IR
1803 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
1804 for (j = 0; VEC_iterate (slp_instance, slp_instances, j, instance); j++)
0fca40f5
IR
1805 vect_free_slp_instance (instance);
1806
805e2059 1807 VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo));
726a989a 1808 VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo));
79fe1b3b
DN
1809
1810 free (loop_vinfo);
28e44f4f 1811 loop->aux = NULL;
79fe1b3b
DN
1812}
1813
1814
79fe1b3b
DN
1815/* Function vect_force_dr_alignment_p.
1816
1817 Returns whether the alignment of a DECL can be forced to be aligned
1818 on ALIGNMENT bit boundary. */
1819
f7064d11 1820bool
58f9752a 1821vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
79fe1b3b
DN
1822{
1823 if (TREE_CODE (decl) != VAR_DECL)
1824 return false;
1825
1826 if (DECL_EXTERNAL (decl))
1827 return false;
1828
d75bf0ca
DN
1829 if (TREE_ASM_WRITTEN (decl))
1830 return false;
1831
79fe1b3b
DN
1832 if (TREE_STATIC (decl))
1833 return (alignment <= MAX_OFILE_ALIGNMENT);
1834 else
2e3f842f 1835 return (alignment <= MAX_STACK_ALIGNMENT);
79fe1b3b
DN
1836}
1837
1838
79fe1b3b
DN
1839/* Function get_vectype_for_scalar_type.
1840
1841 Returns the vector type corresponding to SCALAR_TYPE as supported
1842 by the target. */
1843
f7064d11 1844tree
79fe1b3b
DN
1845get_vectype_for_scalar_type (tree scalar_type)
1846{
1847 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1848 int nbytes = GET_MODE_SIZE (inner_mode);
1849 int nunits;
6775f1f3 1850 tree vectype;
79fe1b3b 1851
9d3a9de1 1852 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode))
79fe1b3b
DN
1853 return NULL_TREE;
1854
9d3a9de1 1855 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD)
79fe1b3b 1856 is expected. */
9d3a9de1 1857 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes;
79fe1b3b 1858
6775f1f3 1859 vectype = build_vector_type (scalar_type, nunits);
00518cb1 1860 if (vect_print_dump_info (REPORT_DETAILS))
f0923257 1861 {
c866976a
LB
1862 fprintf (vect_dump, "get vectype with %d units of type ", nunits);
1863 print_generic_expr (vect_dump, scalar_type, TDF_SLIM);
f0923257
DN
1864 }
1865
1866 if (!vectype)
6775f1f3 1867 return NULL_TREE;
f0923257 1868
00518cb1 1869 if (vect_print_dump_info (REPORT_DETAILS))
f0923257 1870 {
c866976a
LB
1871 fprintf (vect_dump, "vectype: ");
1872 print_generic_expr (vect_dump, vectype, TDF_SLIM);
f0923257
DN
1873 }
1874
c4336539
PB
1875 if (!VECTOR_MODE_P (TYPE_MODE (vectype))
1876 && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
f0923257 1877 {
00518cb1 1878 if (vect_print_dump_info (REPORT_DETAILS))
c866976a 1879 fprintf (vect_dump, "mode not supported by target.");
f0923257
DN
1880 return NULL_TREE;
1881 }
1882
6775f1f3 1883 return vectype;
79fe1b3b
DN
1884}
1885
1886
f7064d11 1887/* Function vect_supportable_dr_alignment
79fe1b3b 1888
f7064d11
DN
1889 Return whether the data reference DR is supported with respect to its
1890 alignment. */
79fe1b3b 1891
f7064d11
DN
1892enum dr_alignment_support
1893vect_supportable_dr_alignment (struct data_reference *dr)
79fe1b3b 1894{
726a989a 1895 gimple stmt = DR_STMT (dr);
468c2ac0
DN
1896 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1897 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
f7064d11 1898 enum machine_mode mode = (int) TYPE_MODE (vectype);
468c2ac0
DN
1899 struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info));
1900 bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
1901 bool invariant_in_outerloop = false;
79fe1b3b 1902
f7064d11
DN
1903 if (aligned_access_p (dr))
1904 return dr_aligned;
79fe1b3b 1905
468c2ac0
DN
1906 if (nested_in_vect_loop)
1907 {
1908 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
1909 invariant_in_outerloop =
1910 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
1911 }
1912
f7064d11 1913 /* Possibly unaligned access. */
468c2ac0
DN
1914
1915 /* We can choose between using the implicit realignment scheme (generating
1916 a misaligned_move stmt) and the explicit realignment scheme (generating
1917 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
1918 realignment scheme: optimized, and unoptimized.
1919 We can optimize the realignment only if the step between consecutive
1920 vector loads is equal to the vector size. Since the vector memory
1921 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
1922 is guaranteed that the misalignment amount remains the same throughout the
1923 execution of the vectorized loop. Therefore, we can create the
1924 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
1925 at the loop preheader.
1926
1927 However, in the case of outer-loop vectorization, when vectorizing a
1928 memory access in the inner-loop nested within the LOOP that is now being
1929 vectorized, while it is guaranteed that the misalignment of the
1930 vectorized memory access will remain the same in different outer-loop
1931 iterations, it is *not* guaranteed that is will remain the same throughout
1932 the execution of the inner-loop. This is because the inner-loop advances
1933 with the original scalar step (and not in steps of VS). If the inner-loop
15dc95cb 1934 step happens to be a multiple of VS, then the misalignment remains fixed
468c2ac0
DN
1935 and we can use the optimized realignment scheme. For example:
1936
1937 for (i=0; i<N; i++)
1938 for (j=0; j<M; j++)
1939 s += a[i+j];
1940
1941 When vectorizing the i-loop in the above example, the step between
1942 consecutive vector loads is 1, and so the misalignment does not remain
1943 fixed across the execution of the inner-loop, and the realignment cannot
1944 be optimized (as illustrated in the following pseudo vectorized loop):
1945
1946 for (i=0; i<N; i+=4)
1947 for (j=0; j<M; j++){
1948 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
1949 // when j is {0,1,2,3,4,5,6,7,...} respectively.
1950 // (assuming that we start from an aligned address).
1951 }
1952
1953 We therefore have to use the unoptimized realignment scheme:
1954
1955 for (i=0; i<N; i+=4)
1956 for (j=k; j<M; j+=4)
1957 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
1958 // that the misalignment of the initial address is
1959 // 0).
1960
1961 The loop can then be vectorized as follows:
1962
1963 for (k=0; k<4; k++){
1964 rt = get_realignment_token (&vp[k]);
1965 for (i=0; i<N; i+=4){
1966 v1 = vp[i+k];
1967 for (j=k; j<M; j+=4){
1968 v2 = vp[i+j+VS-1];
1969 va = REALIGN_LOAD <v1,v2,rt>;
1970 vs += va;
1971 v1 = v2;
1972 }
1973 }
1974 } */
1975
f7064d11
DN
1976 if (DR_IS_READ (dr))
1977 {
468c2ac0
DN
1978 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
1979 CODE_FOR_nothing
f7064d11
DN
1980 && (!targetm.vectorize.builtin_mask_for_load
1981 || targetm.vectorize.builtin_mask_for_load ()))
468c2ac0 1982 {
9d3a9de1
L
1983 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1984 if (nested_in_vect_loop
1985 && (TREE_INT_CST_LOW (DR_STEP (dr))
1986 != GET_MODE_SIZE (TYPE_MODE (vectype))))
1987 return dr_explicit_realign;
1988 else
1989 return dr_explicit_realign_optimized;
468c2ac0 1990 }
7ccf35ed 1991
468c2ac0
DN
1992 if (optab_handler (movmisalign_optab, mode)->insn_code !=
1993 CODE_FOR_nothing)
f7064d11
DN
1994 /* Can't software pipeline the loads, but can at least do them. */
1995 return dr_unaligned_supported;
1996 }
7ccf35ed 1997
f7064d11
DN
1998 /* Unsupported. */
1999 return dr_unaligned_unsupported;
2000}
7ccf35ed 2001
7ccf35ed 2002
f7064d11 2003/* Function vect_is_simple_use.
7ccf35ed 2004
f7064d11
DN
2005 Input:
2006 LOOP - the loop that is being vectorized.
2007 OPERAND - operand of a stmt in LOOP.
2008 DEF - the defining stmt in case OPERAND is an SSA_NAME.
7ccf35ed 2009
f7064d11
DN
2010 Returns whether a stmt with OPERAND can be vectorized.
2011 Supportable operands are constants, loop invariants, and operands that are
2012 defined by the current iteration of the loop. Unsupportable operands are
2013 those that are defined by a previous iteration of the loop (as is the case
2014 in reduction/induction computations). */
7ccf35ed 2015
f7064d11 2016bool
726a989a 2017vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt,
88088c03 2018 tree *def, enum vect_def_type *dt)
f7064d11 2019{
f7064d11 2020 basic_block bb;
88088c03 2021 stmt_vec_info stmt_vinfo;
f7064d11 2022 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7ccf35ed 2023
726a989a 2024 *def_stmt = NULL;
88088c03
DN
2025 *def = NULL_TREE;
2026
00518cb1 2027 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
2028 {
2029 fprintf (vect_dump, "vect_is_simple_use: operand ");
2030 print_generic_expr (vect_dump, operand, TDF_SLIM);
2031 }
2032
f7064d11 2033 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
88088c03
DN
2034 {
2035 *dt = vect_constant_def;
2036 return true;
2037 }
617428e9 2038 if (is_gimple_min_invariant (operand))
4ee279f2 2039 {
617428e9
JH
2040 *def = operand;
2041 *dt = vect_invariant_def;
2042 return true;
4ee279f2 2043 }
dedd42d5
RG
2044
2045 if (TREE_CODE (operand) == PAREN_EXPR)
2046 {
2047 if (vect_print_dump_info (REPORT_DETAILS))
2048 fprintf (vect_dump, "non-associatable copy.");
2049 operand = TREE_OPERAND (operand, 0);
2050 }
f7064d11 2051 if (TREE_CODE (operand) != SSA_NAME)
88088c03 2052 {
00518cb1 2053 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
2054 fprintf (vect_dump, "not ssa-name.");
2055 return false;
2056 }
2057
2058 *def_stmt = SSA_NAME_DEF_STMT (operand);
726a989a 2059 if (*def_stmt == NULL)
79fe1b3b 2060 {
00518cb1 2061 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11
DN
2062 fprintf (vect_dump, "no def_stmt.");
2063 return false;
79fe1b3b
DN
2064 }
2065
00518cb1 2066 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
2067 {
2068 fprintf (vect_dump, "def_stmt: ");
726a989a 2069 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM);
88088c03
DN
2070 }
2071
f7064d11 2072 /* empty stmt is expected only in case of a function argument.
726a989a
RB
2073 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */
2074 if (gimple_nop_p (*def_stmt))
79fe1b3b 2075 {
726a989a
RB
2076 *def = operand;
2077 *dt = vect_invariant_def;
2078 return true;
79fe1b3b 2079 }
f7064d11 2080
726a989a 2081 bb = gimple_bb (*def_stmt);
88088c03
DN
2082 if (!flow_bb_inside_loop_p (loop, bb))
2083 *dt = vect_invariant_def;
2084 else
2085 {
2086 stmt_vinfo = vinfo_for_stmt (*def_stmt);
2087 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo);
2088 }
2089
2090 if (*dt == vect_unknown_def_type)
6775f1f3 2091 {
00518cb1 2092 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
2093 fprintf (vect_dump, "Unsupported pattern.");
2094 return false;
6775f1f3 2095 }
f7064d11 2096
00518cb1 2097 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
2098 fprintf (vect_dump, "type of def: %d.",*dt);
2099
726a989a 2100 switch (gimple_code (*def_stmt))
88088c03 2101 {
726a989a
RB
2102 case GIMPLE_PHI:
2103 *def = gimple_phi_result (*def_stmt);
88088c03
DN
2104 break;
2105
726a989a
RB
2106 case GIMPLE_ASSIGN:
2107 *def = gimple_assign_lhs (*def_stmt);
88088c03
DN
2108 break;
2109
726a989a
RB
2110 case GIMPLE_CALL:
2111 *def = gimple_call_lhs (*def_stmt);
2112 if (*def != NULL)
2113 break;
2114 /* FALLTHRU */
88088c03 2115 default:
00518cb1 2116 if (vect_print_dump_info (REPORT_DETAILS))
88088c03
DN
2117 fprintf (vect_dump, "unsupported defining stmt: ");
2118 return false;
6775f1f3 2119 }
79fe1b3b 2120
88088c03
DN
2121 return true;
2122}
2123
2124
89d67cca
DN
2125/* Function supportable_widening_operation
2126
2127 Check whether an operation represented by the code CODE is a
2128 widening operation that is supported by the target platform in
2129 vector form (i.e., when operating on arguments of type VECTYPE).
2130
d9987fb4
UB
2131 Widening operations we currently support are NOP (CONVERT), FLOAT
2132 and WIDEN_MULT. This function checks if these operations are supported
2133 by the target platform either directly (via vector tree-codes), or via
2134 target builtins.
89d67cca
DN
2135
2136 Output:
2137 - CODE1 and CODE2 are codes of vector operations to be used when
2138 vectorizing the operation, if available.
2139 - DECL1 and DECL2 are decls of target builtin functions to be used
2140 when vectorizing the operation, if available. In this case,
ad2dd72a 2141 CODE1 and CODE2 are CALL_EXPR.
5d593372
IR
2142 - MULTI_STEP_CVT determines the number of required intermediate steps in
2143 case of multi-step conversion (like char->short->int - in that case
2144 MULTI_STEP_CVT will be 1).
2145 - INTERM_TYPES contains the intermediate type required to perform the
2146 widening operation (short in the above example). */
89d67cca
DN
2147
2148bool
726a989a 2149supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype,
89d67cca 2150 tree *decl1, tree *decl2,
ad2dd72a 2151 enum tree_code *code1, enum tree_code *code2,
5d593372
IR
2152 int *multi_step_cvt,
2153 VEC (tree, heap) **interm_types)
89d67cca
DN
2154{
2155 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
d29de1bf
DN
2156 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
2157 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
89d67cca
DN
2158 bool ordered_p;
2159 enum machine_mode vec_mode;
5d593372 2160 enum insn_code icode1 = 0, icode2 = 0;
89d67cca 2161 optab optab1, optab2;
726a989a 2162 tree type = gimple_expr_type (stmt);
89d67cca
DN
2163 tree wide_vectype = get_vectype_for_scalar_type (type);
2164 enum tree_code c1, c2;
2165
8115817b 2166 /* The result of a vectorized widening operation usually requires two vectors
89d67cca
DN
2167 (because the widened results do not fit int one vector). The generated
2168 vector results would normally be expected to be generated in the same
fa10beec 2169 order as in the original scalar computation, i.e. if 8 results are
89d67cca
DN
2170 generated in each vector iteration, they are to be organized as follows:
2171 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
2172
2173 However, in the special case that the result of the widening operation is
2f8e468b 2174 used in a reduction computation only, the order doesn't matter (because
89d67cca 2175 when vectorizing a reduction we change the order of the computation).
2f8e468b 2176 Some targets can take advantage of this and generate more efficient code.
89d67cca
DN
2177 For example, targets like Altivec, that support widen_mult using a sequence
2178 of {mult_even,mult_odd} generate the following vectors:
d29de1bf
DN
2179 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
2180
fa10beec 2181 When vectorizing outer-loops, we execute the inner-loop sequentially
d29de1bf
DN
2182 (each vectorized inner-loop iteration contributes to VF outer-loop
2183 iterations in parallel). We therefore don't allow to change the order
2184 of the computation in the inner-loop during outer-loop vectorization. */
89d67cca 2185
d29de1bf
DN
2186 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
2187 && !nested_in_vect_loop_p (vect_loop, stmt))
89d67cca
DN
2188 ordered_p = false;
2189 else
2190 ordered_p = true;
2191
2192 if (!ordered_p
2193 && code == WIDEN_MULT_EXPR
2194 && targetm.vectorize.builtin_mul_widen_even
2195 && targetm.vectorize.builtin_mul_widen_even (vectype)
2196 && targetm.vectorize.builtin_mul_widen_odd
2197 && targetm.vectorize.builtin_mul_widen_odd (vectype))
2198 {
2199 if (vect_print_dump_info (REPORT_DETAILS))
2200 fprintf (vect_dump, "Unordered widening operation detected.");
2201
2202 *code1 = *code2 = CALL_EXPR;
2203 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
2204 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
2205 return true;
2206 }
2207
2208 switch (code)
2209 {
2210 case WIDEN_MULT_EXPR:
2211 if (BYTES_BIG_ENDIAN)
2212 {
2213 c1 = VEC_WIDEN_MULT_HI_EXPR;
2214 c2 = VEC_WIDEN_MULT_LO_EXPR;
2215 }
2216 else
2217 {
2218 c2 = VEC_WIDEN_MULT_HI_EXPR;
2219 c1 = VEC_WIDEN_MULT_LO_EXPR;
2220 }
2221 break;
2222
1043771b 2223 CASE_CONVERT:
89d67cca
DN
2224 if (BYTES_BIG_ENDIAN)
2225 {
2226 c1 = VEC_UNPACK_HI_EXPR;
2227 c2 = VEC_UNPACK_LO_EXPR;
2228 }
2229 else
2230 {
2231 c2 = VEC_UNPACK_HI_EXPR;
2232 c1 = VEC_UNPACK_LO_EXPR;
2233 }
2234 break;
2235
d9987fb4
UB
2236 case FLOAT_EXPR:
2237 if (BYTES_BIG_ENDIAN)
2238 {
2239 c1 = VEC_UNPACK_FLOAT_HI_EXPR;
2240 c2 = VEC_UNPACK_FLOAT_LO_EXPR;
2241 }
2242 else
2243 {
2244 c2 = VEC_UNPACK_FLOAT_HI_EXPR;
2245 c1 = VEC_UNPACK_FLOAT_LO_EXPR;
2246 }
2247 break;
2248
1a5f8b89
UB
2249 case FIX_TRUNC_EXPR:
2250 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/
2251 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for
2252 computing the operation. */
2253 return false;
2254
89d67cca
DN
2255 default:
2256 gcc_unreachable ();
2257 }
2258
9f106823
UB
2259 if (code == FIX_TRUNC_EXPR)
2260 {
2261 /* The signedness is determined from output operand. */
71d46ca5
MM
2262 optab1 = optab_for_tree_code (c1, type, optab_default);
2263 optab2 = optab_for_tree_code (c2, type, optab_default);
9f106823
UB
2264 }
2265 else
2266 {
71d46ca5
MM
2267 optab1 = optab_for_tree_code (c1, vectype, optab_default);
2268 optab2 = optab_for_tree_code (c2, vectype, optab_default);
9f106823 2269 }
89d67cca
DN
2270
2271 if (!optab1 || !optab2)
2272 return false;
2273
2274 vec_mode = TYPE_MODE (vectype);
166cdb08 2275 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing
5d593372
IR
2276 || (icode2 = optab_handler (optab2, vec_mode)->insn_code)
2277 == CODE_FOR_nothing)
89d67cca
DN
2278 return false;
2279
5d593372
IR
2280 /* Check if it's a multi-step conversion that can be done using intermediate
2281 types. */
ad2dd72a 2282 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
5d593372 2283 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
ad2dd72a 2284 {
5d593372
IR
2285 int i;
2286 tree prev_type = vectype, intermediate_type;
2287 enum machine_mode intermediate_mode, prev_mode = vec_mode;
2288 optab optab3, optab4;
ad2dd72a 2289
5d593372
IR
2290 if (!CONVERT_EXPR_CODE_P (code))
2291 return false;
2292
2293 *code1 = c1;
2294 *code2 = c2;
2295
2296 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
2297 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
2298 to get to NARROW_VECTYPE, and fail if we do not. */
2299 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
2300 for (i = 0; i < 3; i++)
2301 {
2302 intermediate_mode = insn_data[icode1].operand[0].mode;
2303 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
2304 TYPE_UNSIGNED (prev_type));
2305 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default);
2306 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default);
2307
2308 if (!optab3 || !optab4
2309 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
ad2dd72a
IR
2310 == CODE_FOR_nothing
2311 || insn_data[icode1].operand[0].mode != intermediate_mode
5d593372 2312 || (icode2 = optab2->handlers[(int) prev_mode].insn_code)
ad2dd72a
IR
2313 == CODE_FOR_nothing
2314 || insn_data[icode2].operand[0].mode != intermediate_mode
5d593372 2315 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code)
ad2dd72a 2316 == CODE_FOR_nothing
ad2dd72a 2317 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code)
5d593372 2318 == CODE_FOR_nothing)
ad2dd72a 2319 return false;
5d593372
IR
2320
2321 VEC_quick_push (tree, *interm_types, intermediate_type);
2322 (*multi_step_cvt)++;
2323
2324 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype)
2325 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype))
2326 return true;
2327
2328 prev_type = intermediate_type;
2329 prev_mode = intermediate_mode;
ad2dd72a
IR
2330 }
2331
2332 return false;
2333 }
2334
9f106823
UB
2335 *code1 = c1;
2336 *code2 = c2;
89d67cca
DN
2337 return true;
2338}
2339
2340
d9987fb4
UB
2341/* Function supportable_narrowing_operation
2342
2343 Check whether an operation represented by the code CODE is a
2344 narrowing operation that is supported by the target platform in
2345 vector form (i.e., when operating on arguments of type VECTYPE).
2346
2347 Narrowing operations we currently support are NOP (CONVERT) and
2348 FIX_TRUNC. This function checks if these operations are supported by
2349 the target platform directly via vector tree-codes.
2350
2351 Output:
2352 - CODE1 is the code of a vector operation to be used when
ad2dd72a 2353 vectorizing the operation, if available.
5d593372
IR
2354 - MULTI_STEP_CVT determines the number of required intermediate steps in
2355 case of multi-step conversion (like int->short->char - in that case
2356 MULTI_STEP_CVT will be 1).
2357 - INTERM_TYPES contains the intermediate type required to perform the
2358 narrowing operation (short in the above example). */
d9987fb4
UB
2359
2360bool
2361supportable_narrowing_operation (enum tree_code code,
5d593372
IR
2362 const_gimple stmt, tree vectype,
2363 enum tree_code *code1, int *multi_step_cvt,
2364 VEC (tree, heap) **interm_types)
d9987fb4
UB
2365{
2366 enum machine_mode vec_mode;
2367 enum insn_code icode1;
ad2dd72a 2368 optab optab1, interm_optab;
726a989a 2369 tree type = gimple_expr_type (stmt);
d9987fb4
UB
2370 tree narrow_vectype = get_vectype_for_scalar_type (type);
2371 enum tree_code c1;
5d593372
IR
2372 tree intermediate_type, prev_type;
2373 int i;
d9987fb4
UB
2374
2375 switch (code)
2376 {
1043771b 2377 CASE_CONVERT:
d9987fb4
UB
2378 c1 = VEC_PACK_TRUNC_EXPR;
2379 break;
2380
2381 case FIX_TRUNC_EXPR:
2382 c1 = VEC_PACK_FIX_TRUNC_EXPR;
2383 break;
2384
1a5f8b89
UB
2385 case FLOAT_EXPR:
2386 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR
2387 tree code and optabs used for computing the operation. */
2388 return false;
2389
d9987fb4
UB
2390 default:
2391 gcc_unreachable ();
2392 }
2393
9f106823
UB
2394 if (code == FIX_TRUNC_EXPR)
2395 /* The signedness is determined from output operand. */
71d46ca5 2396 optab1 = optab_for_tree_code (c1, type, optab_default);
9f106823 2397 else
71d46ca5 2398 optab1 = optab_for_tree_code (c1, vectype, optab_default);
d9987fb4
UB
2399
2400 if (!optab1)
2401 return false;
2402
2403 vec_mode = TYPE_MODE (vectype);
ad2dd72a
IR
2404 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code)
2405 == CODE_FOR_nothing)
d9987fb4
UB
2406 return false;
2407
5d593372
IR
2408 /* Check if it's a multi-step conversion that can be done using intermediate
2409 types. */
ad2dd72a
IR
2410 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype))
2411 {
5d593372
IR
2412 enum machine_mode intermediate_mode, prev_mode = vec_mode;
2413
2414 *code1 = c1;
2415 prev_type = vectype;
2416 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS
2417 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS
2418 to get to NARROW_VECTYPE, and fail if we do not. */
2419 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS);
2420 for (i = 0; i < 3; i++)
2421 {
2422 intermediate_mode = insn_data[icode1].operand[0].mode;
2423 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode,
2424 TYPE_UNSIGNED (prev_type));
2425 interm_optab = optab_for_tree_code (c1, intermediate_type,
2426 optab_default);
2427 if (!interm_optab
2428 || (icode1 = optab1->handlers[(int) prev_mode].insn_code)
2429 == CODE_FOR_nothing
2430 || insn_data[icode1].operand[0].mode != intermediate_mode
2431 || (icode1
2432 = interm_optab->handlers[(int) intermediate_mode].insn_code)
2433 == CODE_FOR_nothing)
2434 return false;
ad2dd72a 2435
5d593372
IR
2436 VEC_quick_push (tree, *interm_types, intermediate_type);
2437 (*multi_step_cvt)++;
2438
2439 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype))
2440 return true;
ad2dd72a 2441
5d593372
IR
2442 prev_type = intermediate_type;
2443 prev_mode = intermediate_mode;
2444 }
2445
2446 return false;
ad2dd72a
IR
2447 }
2448
9f106823 2449 *code1 = c1;
d9987fb4
UB
2450 return true;
2451}
2452
2453
61d3cdbb
DN
2454/* Function reduction_code_for_scalar_code
2455
2456 Input:
2457 CODE - tree_code of a reduction operations.
2458
2459 Output:
619519c8 2460 REDUC_CODE - the corresponding tree-code to be used to reduce the
61d3cdbb
DN
2461 vector of partial results into a single scalar result (which
2462 will also reside in a vector).
2463
2464 Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */
2465
2466bool
2467reduction_code_for_scalar_code (enum tree_code code,
2468 enum tree_code *reduc_code)
2469{
2470 switch (code)
2471 {
2472 case MAX_EXPR:
2473 *reduc_code = REDUC_MAX_EXPR;
2474 return true;
2475
2476 case MIN_EXPR:
2477 *reduc_code = REDUC_MIN_EXPR;
2478 return true;
2479
2480 case PLUS_EXPR:
2481 *reduc_code = REDUC_PLUS_EXPR;
2482 return true;
2483
2484 default:
2485 return false;
2486 }
2487}
2488
726a989a
RB
2489/* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
2490 STMT is printed with a message MSG. */
2491
2492static void
2493report_vect_op (gimple stmt, const char *msg)
2494{
2495 fprintf (vect_dump, "%s", msg);
2496 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2497}
61d3cdbb 2498
88088c03
DN
2499/* Function vect_is_simple_reduction
2500
89fd06fb 2501 Detect a cross-iteration def-use cycle that represents a simple
607fb860 2502 reduction computation. We look for the following pattern:
88088c03
DN
2503
2504 loop_header:
2505 a1 = phi < a0, a2 >
2506 a3 = ...
2507 a2 = operation (a3, a1)
2508
2509 such that:
61d3cdbb 2510 1. operation is commutative and associative and it is safe to
2e48874f 2511 change the order of the computation.
61d3cdbb
DN
2512 2. no uses for a2 in the loop (a2 is used out of the loop)
2513 3. no uses of a1 in the loop besides the reduction operation.
2514
2515 Condition 1 is tested here.
2516 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */
88088c03 2517
726a989a
RB
2518gimple
2519vect_is_simple_reduction (loop_vec_info loop_info, gimple phi)
88088c03 2520{
726a989a 2521 struct loop *loop = (gimple_bb (phi))->loop_father;
d29de1bf 2522 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
61d3cdbb
DN
2523 edge latch_e = loop_latch_edge (loop);
2524 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
726a989a 2525 gimple def_stmt, def1, def2;
61d3cdbb 2526 enum tree_code code;
726a989a 2527 tree op1, op2;
61d3cdbb 2528 tree type;
b3832a9f
DN
2529 int nloop_uses;
2530 tree name;
2531 imm_use_iterator imm_iter;
2532 use_operand_p use_p;
61d3cdbb 2533
d29de1bf
DN
2534 gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop));
2535
b3832a9f
DN
2536 name = PHI_RESULT (phi);
2537 nloop_uses = 0;
2538 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
61d3cdbb 2539 {
726a989a
RB
2540 gimple use_stmt = USE_STMT (use_p);
2541 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
b3832a9f
DN
2542 && vinfo_for_stmt (use_stmt)
2543 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2544 nloop_uses++;
2545 if (nloop_uses > 1)
61d3cdbb 2546 {
b3832a9f
DN
2547 if (vect_print_dump_info (REPORT_DETAILS))
2548 fprintf (vect_dump, "reduction used in loop.");
726a989a 2549 return NULL;
61d3cdbb 2550 }
b3832a9f
DN
2551 }
2552
2553 if (TREE_CODE (loop_arg) != SSA_NAME)
2554 {
2555 if (vect_print_dump_info (REPORT_DETAILS))
2556 {
2557 fprintf (vect_dump, "reduction: not ssa_name: ");
2558 print_generic_expr (vect_dump, loop_arg, TDF_SLIM);
2559 }
726a989a 2560 return NULL;
61d3cdbb 2561 }
88088c03 2562
61d3cdbb
DN
2563 def_stmt = SSA_NAME_DEF_STMT (loop_arg);
2564 if (!def_stmt)
2565 {
00518cb1 2566 if (vect_print_dump_info (REPORT_DETAILS))
b3832a9f 2567 fprintf (vect_dump, "reduction: no def_stmt.");
726a989a 2568 return NULL;
61d3cdbb
DN
2569 }
2570
726a989a 2571 if (!is_gimple_assign (def_stmt))
61d3cdbb 2572 {
00518cb1 2573 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2574 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM);
2575 return NULL;
61d3cdbb
DN
2576 }
2577
726a989a 2578 name = gimple_assign_lhs (def_stmt);
b3832a9f
DN
2579 nloop_uses = 0;
2580 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2581 {
726a989a
RB
2582 gimple use_stmt = USE_STMT (use_p);
2583 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))
b3832a9f
DN
2584 && vinfo_for_stmt (use_stmt)
2585 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt)))
2586 nloop_uses++;
2587 if (nloop_uses > 1)
2588 {
2589 if (vect_print_dump_info (REPORT_DETAILS))
2590 fprintf (vect_dump, "reduction used in loop.");
726a989a 2591 return NULL;
b3832a9f
DN
2592 }
2593 }
2594
726a989a
RB
2595 code = gimple_assign_rhs_code (def_stmt);
2596
61d3cdbb
DN
2597 if (!commutative_tree_code (code) || !associative_tree_code (code))
2598 {
00518cb1 2599 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2600 report_vect_op (def_stmt, "reduction: not commutative/associative: ");
2601 return NULL;
61d3cdbb
DN
2602 }
2603
726a989a 2604 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
61d3cdbb 2605 {
00518cb1 2606 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2607 report_vect_op (def_stmt, "reduction: not binary operation: ");
2608 return NULL;
61d3cdbb
DN
2609 }
2610
726a989a
RB
2611 op1 = gimple_assign_rhs1 (def_stmt);
2612 op2 = gimple_assign_rhs2 (def_stmt);
61d3cdbb
DN
2613 if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
2614 {
00518cb1 2615 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2616 report_vect_op (def_stmt, "reduction: uses not ssa_names: ");
2617 return NULL;
61d3cdbb
DN
2618 }
2619
dcb081fc 2620 /* Check that it's ok to change the order of the computation. */
726a989a 2621 type = TREE_TYPE (gimple_assign_lhs (def_stmt));
dcb081fc
RH
2622 if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1))
2623 || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2)))
61d3cdbb 2624 {
00518cb1 2625 if (vect_print_dump_info (REPORT_DETAILS))
61d3cdbb
DN
2626 {
2627 fprintf (vect_dump, "reduction: multiple types: operation type: ");
2628 print_generic_expr (vect_dump, type, TDF_SLIM);
2629 fprintf (vect_dump, ", operands types: ");
2630 print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM);
2631 fprintf (vect_dump, ",");
2632 print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM);
2633 }
726a989a 2634 return NULL;
61d3cdbb
DN
2635 }
2636
d29de1bf
DN
2637 /* Generally, when vectorizing a reduction we change the order of the
2638 computation. This may change the behavior of the program in some
2639 cases, so we need to check that this is ok. One exception is when
2640 vectorizing an outer-loop: the inner-loop is executed sequentially,
fa10beec 2641 and therefore vectorizing reductions in the inner-loop during
d29de1bf
DN
2642 outer-loop vectorization is safe. */
2643
61d3cdbb 2644 /* CHECKME: check for !flag_finite_math_only too? */
a1a82611 2645 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math
d29de1bf 2646 && !nested_in_vect_loop_p (vect_loop, def_stmt))
61d3cdbb 2647 {
c83eecad 2648 /* Changing the order of operations changes the semantics. */
00518cb1 2649 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2650 report_vect_op (def_stmt, "reduction: unsafe fp math optimization: ");
2651 return NULL;
61d3cdbb 2652 }
d29de1bf
DN
2653 else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type)
2654 && !nested_in_vect_loop_p (vect_loop, def_stmt))
61d3cdbb 2655 {
c83eecad 2656 /* Changing the order of operations changes the semantics. */
00518cb1 2657 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2658 report_vect_op (def_stmt, "reduction: unsafe int math optimization: ");
2659 return NULL;
325217ed
CF
2660 }
2661 else if (SAT_FIXED_POINT_TYPE_P (type))
2662 {
2663 /* Changing the order of operations changes the semantics. */
2664 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2665 report_vect_op (def_stmt,
2666 "reduction: unsafe fixed-point math optimization: ");
2667 return NULL;
61d3cdbb
DN
2668 }
2669
2670 /* reduction is safe. we're dealing with one of the following:
2671 1) integer arithmetic and no trapv
2672 2) floating point arithmetic, and special flags permit this optimization.
2673 */
2674 def1 = SSA_NAME_DEF_STMT (op1);
2675 def2 = SSA_NAME_DEF_STMT (op2);
726a989a 2676 if (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2))
61d3cdbb 2677 {
00518cb1 2678 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2679 report_vect_op (def_stmt, "reduction: no defs for operands: ");
2680 return NULL;
61d3cdbb
DN
2681 }
2682
fbf798fc
DN
2683
2684 /* Check that one def is the reduction def, defined by PHI,
d29de1bf
DN
2685 the other def is either defined in the loop ("vect_loop_def"),
2686 or it's an induction (defined by a loop-header phi-node). */
fbf798fc
DN
2687
2688 if (def2 == phi
726a989a
RB
2689 && flow_bb_inside_loop_p (loop, gimple_bb (def1))
2690 && (is_gimple_assign (def1)
d29de1bf 2691 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def
726a989a 2692 || (gimple_code (def1) == GIMPLE_PHI
d29de1bf 2693 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def
726a989a 2694 && !is_loop_header_bb_p (gimple_bb (def1)))))
61d3cdbb 2695 {
00518cb1 2696 if (vect_print_dump_info (REPORT_DETAILS))
726a989a 2697 report_vect_op (def_stmt, "detected reduction:");
61d3cdbb
DN
2698 return def_stmt;
2699 }
fbf798fc 2700 else if (def1 == phi
726a989a
RB
2701 && flow_bb_inside_loop_p (loop, gimple_bb (def2))
2702 && (is_gimple_assign (def2)
d29de1bf 2703 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def
726a989a 2704 || (gimple_code (def2) == GIMPLE_PHI
d29de1bf 2705 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def
726a989a 2706 && !is_loop_header_bb_p (gimple_bb (def2)))))
61d3cdbb 2707 {
61d3cdbb
DN
2708 /* Swap operands (just for simplicity - so that the rest of the code
2709 can assume that the reduction variable is always the last (second)
2710 argument). */
00518cb1 2711 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2712 report_vect_op (def_stmt ,
2713 "detected reduction: need to swap operands:");
2714 swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
2715 gimple_assign_rhs2_ptr (def_stmt));
61d3cdbb
DN
2716 return def_stmt;
2717 }
2718 else
2719 {
00518cb1 2720 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
2721 report_vect_op (def_stmt, "reduction: unknown pattern.");
2722 return NULL;
61d3cdbb 2723 }
79fe1b3b
DN
2724}
2725
2726
f7064d11 2727/* Function vect_is_simple_iv_evolution.
79fe1b3b 2728
f7064d11
DN
2729 FORNOW: A simple evolution of an induction variables in the loop is
2730 considered a polynomial evolution with constant step. */
79fe1b3b 2731
f7064d11
DN
2732bool
2733vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2734 tree * step)
79fe1b3b 2735{
f7064d11
DN
2736 tree init_expr;
2737 tree step_expr;
f7064d11 2738 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
79fe1b3b 2739
f7064d11
DN
2740 /* When there is no evolution in this loop, the evolution function
2741 is not "simple". */
2742 if (evolution_part == NULL_TREE)
2743 return false;
2744
2745 /* When the evolution is a polynomial of degree >= 2
2746 the evolution function is not "simple". */
2747 if (tree_is_chrec (evolution_part))
2748 return false;
2749
2750 step_expr = evolution_part;
fbf798fc 2751 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
79fe1b3b 2752
00518cb1 2753 if (vect_print_dump_info (REPORT_DETAILS))
79fe1b3b 2754 {
f7064d11
DN
2755 fprintf (vect_dump, "step: ");
2756 print_generic_expr (vect_dump, step_expr, TDF_SLIM);
2757 fprintf (vect_dump, ", init: ");
2758 print_generic_expr (vect_dump, init_expr, TDF_SLIM);
79fe1b3b
DN
2759 }
2760
f7064d11
DN
2761 *init = init_expr;
2762 *step = step_expr;
79fe1b3b 2763
f7064d11 2764 if (TREE_CODE (step_expr) != INTEGER_CST)
fbf798fc 2765 {
00518cb1 2766 if (vect_print_dump_info (REPORT_DETAILS))
f7064d11 2767 fprintf (vect_dump, "step unknown.");
7ccf35ed
DN
2768 return false;
2769 }
2770
a023975e
OG
2771 return true;
2772}
2773
2774
79fe1b3b
DN
2775/* Function vectorize_loops.
2776
2777 Entry Point to loop vectorization phase. */
2778
4d2280f6 2779unsigned
d73be268 2780vectorize_loops (void)
79fe1b3b 2781{
b52485c6 2782 unsigned int i;
79fe1b3b 2783 unsigned int num_vectorized_loops = 0;
42fd6772
ZD
2784 unsigned int vect_loops_num;
2785 loop_iterator li;
2786 struct loop *loop;
79fe1b3b 2787
f9be04cd
RG
2788 vect_loops_num = number_of_loops ();
2789
2790 /* Bail out if there are no loops. */
2791 if (vect_loops_num <= 1)
2792 return 0;
2793
c866976a
LB
2794 /* Fix the verbosity level if not defined explicitly by the user. */
2795 vect_set_dump_settings ();
2796
90ff949f
DN
2797 /* Allocate the bitmap that records which virtual variables that
2798 need to be renamed. */
38635499 2799 vect_memsyms_to_rename = BITMAP_ALLOC (NULL);
90ff949f 2800
726a989a
RB
2801 init_stmt_vec_info_vec ();
2802
79fe1b3b
DN
2803 /* ----------- Analyze loops. ----------- */
2804
2805 /* If some loop was duplicated, it gets bigger number
2806 than all previously defined loops. This fact allows us to run
2807 only over initial loops skipping newly generated ones. */
677cc14d 2808 FOR_EACH_LOOP (li, loop, 0)
8bcf15f6
JH
2809 if (optimize_loop_nest_for_speed_p (loop))
2810 {
2811 loop_vec_info loop_vinfo;
79fe1b3b 2812
8bcf15f6
JH
2813 vect_loop_location = find_loop_location (loop);
2814 loop_vinfo = vect_analyze_loop (loop);
2815 loop->aux = loop_vinfo;
79fe1b3b 2816
8bcf15f6
JH
2817 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
2818 continue;
79fe1b3b 2819
8bcf15f6
JH
2820 vect_transform_loop (loop_vinfo);
2821 num_vectorized_loops++;
2822 }
0c5e4273 2823 vect_loop_location = UNKNOWN_LOC;
79fe1b3b 2824
01902653 2825 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops);
f9be04cd
RG
2826 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS)
2827 || (vect_print_dump_info (REPORT_VECTORIZED_LOOPS)
2828 && num_vectorized_loops > 0))
c866976a 2829 fprintf (vect_dump, "vectorized %u loops in function.\n",
79fe1b3b
DN
2830 num_vectorized_loops);
2831
2832 /* ----------- Finalize. ----------- */
2833
38635499 2834 BITMAP_FREE (vect_memsyms_to_rename);
90ff949f 2835
b52485c6 2836 for (i = 1; i < vect_loops_num; i++)
79fe1b3b 2837 {
6775f1f3
IR
2838 loop_vec_info loop_vinfo;
2839
42fd6772 2840 loop = get_loop (i);
79fe1b3b 2841 if (!loop)
6775f1f3 2842 continue;
3d9a9f94 2843 loop_vinfo = (loop_vec_info) loop->aux;
d29de1bf 2844 destroy_loop_vec_info (loop_vinfo, true);
79fe1b3b
DN
2845 loop->aux = NULL;
2846 }
4d2280f6 2847
726a989a
RB
2848 free_stmt_vec_info_vec ();
2849
4d2280f6 2850 return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0;
79fe1b3b 2851}
f4b3ca72
JH
2852
2853/* Increase alignment of global arrays to improve vectorization potential.
2854 TODO:
2855 - Consider also structs that have an array field.
2856 - Use ipa analysis to prune arrays that can't be vectorized?
2857 This should involve global alignment analysis and in the future also
2858 array padding. */
2859
2860static unsigned int
2861increase_alignment (void)
2862{
2863 struct varpool_node *vnode;
2864
2865 /* Increase the alignment of all global arrays for vectorization. */
2866 for (vnode = varpool_nodes_queue;
2867 vnode;
2868 vnode = vnode->next_needed)
2869 {
2870 tree vectype, decl = vnode->decl;
2871 unsigned int alignment;
2872
2873 if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE)
2874 continue;
2875 vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl)));
2876 if (!vectype)
2877 continue;
2878 alignment = TYPE_ALIGN (vectype);
2879 if (DECL_ALIGN (decl) >= alignment)
2880 continue;
2881
2882 if (vect_can_force_dr_alignment_p (decl, alignment))
2883 {
2884 DECL_ALIGN (decl) = TYPE_ALIGN (vectype);
2885 DECL_USER_ALIGN (decl) = 1;
2886 if (dump_file)
2887 {
2888 fprintf (dump_file, "Increasing alignment of decl: ");
2889 print_generic_expr (dump_file, decl, TDF_SLIM);
2890 }
2891 }
2892 }
2893 return 0;
2894}
2895
fe1c7546 2896static bool
f4b3ca72
JH
2897gate_increase_alignment (void)
2898{
2899 return flag_section_anchors && flag_tree_vectorize;
2900}
2901
8ddbbcae 2902struct simple_ipa_opt_pass pass_ipa_increase_alignment =
f4b3ca72 2903{
8ddbbcae
JH
2904 {
2905 SIMPLE_IPA_PASS,
f4b3ca72
JH
2906 "increase_alignment", /* name */
2907 gate_increase_alignment, /* gate */
2908 increase_alignment, /* execute */
2909 NULL, /* sub */
2910 NULL, /* next */
2911 0, /* static_pass_number */
2912 0, /* tv_id */
2913 0, /* properties_required */
2914 0, /* properties_provided */
2915 0, /* properties_destroyed */
2916 0, /* todo_flags_start */
8ddbbcae
JH
2917 0 /* todo_flags_finish */
2918 }
f4b3ca72 2919};