]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vectorizer.c
* basic-block.h, c-common.c, c-cppbuiltin.c, c-lang.c,
[thirdparty/gcc.git] / gcc / tree-vectorizer.c
1 /* Loop Vectorization
2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to the Free
19 Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA. */
21
22 /* Loop Vectorization Pass.
23
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
28
29 For example, the vectorizer transforms the following simple loop:
30
31 short a[N]; short b[N]; short c[N]; int i;
32
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
35 }
36
37 as if it was manually vectorized by rewriting the source code into:
38
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
43
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
49 }
50
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
55
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
63
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
69
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
74
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
85
86 For example, say stmt S1 was vectorized into stmt VS1:
87
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
91
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
96
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
104
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
118
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
121 */
122
123 #include "config.h"
124 #include "system.h"
125 #include "coretypes.h"
126 #include "tm.h"
127 #include "errors.h"
128 #include "ggc.h"
129 #include "tree.h"
130 #include "target.h"
131
132 #include "rtl.h"
133 #include "basic-block.h"
134 #include "diagnostic.h"
135 #include "tree-flow.h"
136 #include "tree-dump.h"
137 #include "timevar.h"
138 #include "cfgloop.h"
139 #include "cfglayout.h"
140 #include "expr.h"
141 #include "optabs.h"
142 #include "toplev.h"
143 #include "tree-chrec.h"
144 #include "tree-data-ref.h"
145 #include "tree-scalar-evolution.h"
146 #include "tree-vectorizer.h"
147 #include "tree-pass.h"
148 #include "langhooks.h"
149
150
151 /*************************************************************************
152 Simple Loop Peeling Utilities
153 *************************************************************************/
154
155 /* Entry point for peeling of simple loops.
156 Peel the first/last iterations of a loop.
157 It can be used outside of the vectorizer for loops that are simple enough
158 (see function documentation). In the vectorizer it is used to peel the
159 last few iterations when the loop bound is unknown or does not evenly
160 divide by the vectorization factor, and to peel the first few iterations
161 to force the alignment of data references in the loop. */
162 struct loop *slpeel_tree_peel_loop_to_edge
163 (struct loop *, struct loops *, edge, tree, tree, bool);
164 static struct loop *slpeel_tree_duplicate_loop_to_edge_cfg
165 (struct loop *, struct loops *, edge);
166 static void slpeel_update_phis_for_duplicate_loop
167 (struct loop *, struct loop *, bool after);
168 static void slpeel_update_phi_nodes_for_guard (edge, struct loop *, bool, bool);
169 static void slpeel_make_loop_iterate_ntimes (struct loop *, tree);
170 static edge slpeel_add_loop_guard (basic_block, tree, basic_block, basic_block);
171 static bool slpeel_can_duplicate_loop_p (struct loop *, edge);
172 static void allocate_new_names (bitmap);
173 static void rename_use_op (use_operand_p);
174 static void rename_def_op (def_operand_p, tree);
175 static void rename_variables_in_bb (basic_block);
176 static void free_new_names (bitmap);
177 static void rename_variables_in_loop (struct loop *);
178 #ifdef ENABLE_CHECKING
179 static void slpeel_verify_cfg_after_peeling (struct loop *, struct loop *);
180 #endif
181
182
183 /*************************************************************************
184 Vectorization Utilities.
185 *************************************************************************/
186
187 /* Main analysis functions. */
188 static loop_vec_info vect_analyze_loop (struct loop *);
189 static loop_vec_info vect_analyze_loop_form (struct loop *);
190 static bool vect_analyze_data_refs (loop_vec_info);
191 static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
192 static bool vect_analyze_scalar_cycles (loop_vec_info);
193 static bool vect_analyze_data_ref_accesses (loop_vec_info);
194 static bool vect_analyze_data_refs_alignment (loop_vec_info);
195 static bool vect_compute_data_refs_alignment (loop_vec_info);
196 static bool vect_analyze_operations (loop_vec_info);
197
198 /* Main code transformation functions. */
199 static void vect_transform_loop (loop_vec_info, struct loops *);
200 static bool vect_transform_stmt (tree, block_stmt_iterator *);
201 static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
202 static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
203 static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
204 static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
205 static enum dr_alignment_support vect_supportable_dr_alignment
206 (struct data_reference *);
207 static void vect_align_data_ref (tree);
208 static void vect_enhance_data_refs_alignment (loop_vec_info);
209
210 /* Utility functions for the analyses. */
211 static bool vect_is_simple_use (tree , struct loop *, tree *);
212 static bool exist_non_indexing_operands_for_use_p (tree, tree);
213 static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
214 static void vect_mark_relevant (varray_type *, tree);
215 static bool vect_stmt_relevant_p (tree, loop_vec_info);
216 static tree vect_get_loop_niters (struct loop *, tree *);
217 static bool vect_compute_data_ref_alignment (struct data_reference *);
218 static bool vect_analyze_data_ref_access (struct data_reference *);
219 static bool vect_can_force_dr_alignment_p (tree, unsigned int);
220 static struct data_reference * vect_analyze_pointer_ref_access
221 (tree, tree, bool);
222 static bool vect_can_advance_ivs_p (struct loop *);
223 static tree vect_get_base_and_offset (struct data_reference *, tree, tree,
224 loop_vec_info, tree *, tree *, tree *,
225 bool*);
226 static struct data_reference * vect_analyze_pointer_ref_access
227 (tree, tree, bool);
228 static tree vect_get_ptr_offset (tree, tree, tree *);
229 static tree vect_get_memtag_and_dr
230 (tree, tree, bool, loop_vec_info, tree, struct data_reference **);
231 static bool vect_analyze_offset_expr (tree, struct loop *, tree, tree *,
232 tree *, tree *);
233 static tree vect_strip_conversion (tree);
234
235 /* Utility functions for the code transformation. */
236 static tree vect_create_destination_var (tree, tree);
237 static tree vect_create_data_ref_ptr
238 (tree, block_stmt_iterator *, tree, tree *, bool);
239 static tree vect_create_index_for_vector_ref
240 (struct loop *, block_stmt_iterator *);
241 static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
242 static tree get_vectype_for_scalar_type (tree);
243 static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
244 static tree vect_get_vec_def_for_operand (tree, tree);
245 static tree vect_init_vector (tree, tree);
246 static void vect_finish_stmt_generation
247 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
248
249 /* Utility function dealing with loop peeling (not peeling itself). */
250 static void vect_generate_tmps_on_preheader
251 (loop_vec_info, tree *, tree *, tree *);
252 static tree vect_build_loop_niters (loop_vec_info);
253 static void vect_update_ivs_after_vectorizer (struct loop *, tree, edge);
254 static tree vect_gen_niters_for_prolog_loop (loop_vec_info, tree);
255 static void vect_update_inits_of_dr (struct data_reference *, tree niters);
256 static void vect_update_inits_of_drs (loop_vec_info, tree);
257 static void vect_do_peeling_for_alignment (loop_vec_info, struct loops *);
258 static void vect_do_peeling_for_loop_bound
259 (loop_vec_info, tree *, struct loops *);
260
261 /* Utilities for creation and deletion of vec_info structs. */
262 loop_vec_info new_loop_vec_info (struct loop *loop);
263 void destroy_loop_vec_info (loop_vec_info);
264 stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
265
266 static bool vect_debug_stats (struct loop *loop);
267 static bool vect_debug_details (struct loop *loop);
268
269 \f
270 /*************************************************************************
271 Simple Loop Peeling Utilities
272
273 Utilities to support loop peeling for vectorization purposes.
274 *************************************************************************/
275
276
277 /* For each definition in DEFINITIONS this function allocates
278 new ssa name. */
279
280 static void
281 allocate_new_names (bitmap definitions)
282 {
283 unsigned ver;
284 bitmap_iterator bi;
285
286 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
287 {
288 tree def = ssa_name (ver);
289 tree *new_name_ptr = xmalloc (sizeof (tree));
290
291 bool abnormal = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (def);
292
293 *new_name_ptr = duplicate_ssa_name (def, SSA_NAME_DEF_STMT (def));
294 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*new_name_ptr) = abnormal;
295
296 SSA_NAME_AUX (def) = new_name_ptr;
297 }
298 }
299
300
301 /* Renames the use *OP_P. */
302
303 static void
304 rename_use_op (use_operand_p op_p)
305 {
306 tree *new_name_ptr;
307
308 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
309 return;
310
311 new_name_ptr = SSA_NAME_AUX (USE_FROM_PTR (op_p));
312
313 /* Something defined outside of the loop. */
314 if (!new_name_ptr)
315 return;
316
317 /* An ordinary ssa name defined in the loop. */
318
319 SET_USE (op_p, *new_name_ptr);
320 }
321
322
323 /* Renames the def *OP_P in statement STMT. */
324
325 static void
326 rename_def_op (def_operand_p op_p, tree stmt)
327 {
328 tree *new_name_ptr;
329
330 if (TREE_CODE (DEF_FROM_PTR (op_p)) != SSA_NAME)
331 return;
332
333 new_name_ptr = SSA_NAME_AUX (DEF_FROM_PTR (op_p));
334
335 /* Something defined outside of the loop. */
336 if (!new_name_ptr)
337 return;
338
339 /* An ordinary ssa name defined in the loop. */
340
341 SET_DEF (op_p, *new_name_ptr);
342 SSA_NAME_DEF_STMT (DEF_FROM_PTR (op_p)) = stmt;
343 }
344
345
346 /* Renames the variables in basic block BB. */
347
348 static void
349 rename_variables_in_bb (basic_block bb)
350 {
351 tree phi;
352 block_stmt_iterator bsi;
353 tree stmt;
354 stmt_ann_t ann;
355 use_optype uses;
356 vuse_optype vuses;
357 def_optype defs;
358 v_may_def_optype v_may_defs;
359 v_must_def_optype v_must_defs;
360 unsigned i;
361 edge e;
362 edge_iterator ei;
363 struct loop *loop = bb->loop_father;
364
365 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
366 rename_def_op (PHI_RESULT_PTR (phi), phi);
367
368 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
369 {
370 stmt = bsi_stmt (bsi);
371 get_stmt_operands (stmt);
372 ann = stmt_ann (stmt);
373
374 uses = USE_OPS (ann);
375 for (i = 0; i < NUM_USES (uses); i++)
376 rename_use_op (USE_OP_PTR (uses, i));
377
378 defs = DEF_OPS (ann);
379 for (i = 0; i < NUM_DEFS (defs); i++)
380 rename_def_op (DEF_OP_PTR (defs, i), stmt);
381
382 vuses = VUSE_OPS (ann);
383 for (i = 0; i < NUM_VUSES (vuses); i++)
384 rename_use_op (VUSE_OP_PTR (vuses, i));
385
386 v_may_defs = V_MAY_DEF_OPS (ann);
387 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
388 {
389 rename_use_op (V_MAY_DEF_OP_PTR (v_may_defs, i));
390 rename_def_op (V_MAY_DEF_RESULT_PTR (v_may_defs, i), stmt);
391 }
392
393 v_must_defs = V_MUST_DEF_OPS (ann);
394 for (i = 0; i < NUM_V_MUST_DEFS (v_must_defs); i++)
395 {
396 rename_use_op (V_MUST_DEF_KILL_PTR (v_must_defs, i));
397 rename_def_op (V_MUST_DEF_RESULT_PTR (v_must_defs, i), stmt);
398 }
399 }
400
401 FOR_EACH_EDGE (e, ei, bb->succs)
402 {
403 if (!flow_bb_inside_loop_p (loop, e->dest))
404 continue;
405 for (phi = phi_nodes (e->dest); phi; phi = PHI_CHAIN (phi))
406 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (phi, e));
407 }
408 }
409
410
411 /* Releases the structures holding the new ssa names. */
412
413 static void
414 free_new_names (bitmap definitions)
415 {
416 unsigned ver;
417 bitmap_iterator bi;
418
419 EXECUTE_IF_SET_IN_BITMAP (definitions, 0, ver, bi)
420 {
421 tree def = ssa_name (ver);
422
423 if (SSA_NAME_AUX (def))
424 {
425 free (SSA_NAME_AUX (def));
426 SSA_NAME_AUX (def) = NULL;
427 }
428 }
429 }
430
431
432 /* Renames variables in new generated LOOP. */
433
434 static void
435 rename_variables_in_loop (struct loop *loop)
436 {
437 unsigned i;
438 basic_block *bbs;
439
440 bbs = get_loop_body (loop);
441
442 for (i = 0; i < loop->num_nodes; i++)
443 rename_variables_in_bb (bbs[i]);
444
445 free (bbs);
446 }
447
448
449 /* Update the PHI nodes of NEW_LOOP.
450
451 NEW_LOOP is a duplicate of ORIG_LOOP.
452 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP:
453 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it
454 executes before it. */
455
456 static void
457 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop,
458 struct loop *new_loop, bool after)
459 {
460 tree *new_name_ptr, new_ssa_name;
461 tree phi_new, phi_orig;
462 tree def;
463 edge orig_loop_latch = loop_latch_edge (orig_loop);
464 edge orig_entry_e = loop_preheader_edge (orig_loop);
465 edge new_loop_exit_e = new_loop->exit_edges[0];
466 edge new_loop_entry_e = loop_preheader_edge (new_loop);
467 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e);
468
469 /*
470 step 1. For each loop-header-phi:
471 Add the first phi argument for the phi in NEW_LOOP
472 (the one associated with the entry of NEW_LOOP)
473
474 step 2. For each loop-header-phi:
475 Add the second phi argument for the phi in NEW_LOOP
476 (the one associated with the latch of NEW_LOOP)
477
478 step 3. Update the phis in the successor block of NEW_LOOP.
479
480 case 1: NEW_LOOP was placed before ORIG_LOOP:
481 The successor block of NEW_LOOP is the header of ORIG_LOOP.
482 Updating the phis in the successor block can therefore be done
483 along with the scanning of the loop header phis, because the
484 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same
485 phi nodes, organized in the same order.
486
487 case 2: NEW_LOOP was placed after ORIG_LOOP:
488 The successor block of NEW_LOOP is the original exit block of
489 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis.
490 We postpone updating these phis to a later stage (when
491 loop guards are added).
492 */
493
494
495 /* Scan the phis in the headers of the old and new loops
496 (they are organized in exactly the same order). */
497
498 for (phi_new = phi_nodes (new_loop->header),
499 phi_orig = phi_nodes (orig_loop->header);
500 phi_new && phi_orig;
501 phi_new = PHI_CHAIN (phi_new), phi_orig = PHI_CHAIN (phi_orig))
502 {
503 /* step 1. */
504 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e);
505 add_phi_arg (phi_new, def, new_loop_entry_e);
506
507 /* step 2. */
508 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch);
509 if (TREE_CODE (def) != SSA_NAME)
510 continue;
511
512 new_name_ptr = SSA_NAME_AUX (def);
513 if (!new_name_ptr)
514 /* Something defined outside of the loop. */
515 continue;
516
517 /* An ordinary ssa name defined in the loop. */
518 new_ssa_name = *new_name_ptr;
519 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop));
520
521 /* step 3 (case 1). */
522 if (!after)
523 {
524 gcc_assert (new_loop_exit_e == orig_entry_e);
525 SET_PHI_ARG_DEF (phi_orig,
526 phi_arg_from_edge (phi_orig, new_loop_exit_e),
527 new_ssa_name);
528 }
529 }
530 }
531
532
533 /* Update PHI nodes for a guard of the LOOP.
534
535 Input:
536 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that
537 controls whether LOOP is to be executed. GUARD_EDGE is the edge that
538 originates from the guard-bb, skips LOOP and reaches the (unique) exit
539 bb of LOOP. This loop-exit-bb is an empty bb with one successor.
540 We denote this bb NEW_MERGE_BB because it had a single predecessor (the
541 LOOP header) before the guard code was added, and now it became a merge
542 point of two paths - the path that ends with the LOOP exit-edge, and
543 the path that ends with GUARD_EDGE.
544
545 This function creates and updates the relevant phi nodes to account for
546 the new incoming edge (GUARD_EDGE) into NEW_MERGE_BB:
547 1. Create phi nodes at NEW_MERGE_BB.
548 2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted
549 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB
550 was added:
551
552 ===> The CFG before the guard-code was added:
553 LOOP_header_bb:
554 if (exit_loop) goto update_bb : LOOP_header_bb
555 update_bb:
556
557 ==> The CFG after the guard-code was added:
558 guard_bb:
559 if (LOOP_guard_condition) goto new_merge_bb : LOOP_header_bb
560 LOOP_header_bb:
561 if (exit_loop_condition) goto new_merge_bb : LOOP_header_bb
562 new_merge_bb:
563 goto update_bb
564 update_bb:
565
566 - ENTRY_PHIS: If ENTRY_PHIS is TRUE, this indicates that the phis in
567 UPDATE_BB are loop entry phis, like the phis in the LOOP header,
568 organized in the same order.
569 If ENTRY_PHIs is FALSE, this indicates that the phis in UPDATE_BB are
570 loop exit phis.
571
572 - IS_NEW_LOOP: TRUE if LOOP is a new loop (a duplicated copy of another
573 "original" loop). FALSE if LOOP is an original loop (not a newly
574 created copy). The SSA_NAME_AUX fields of the defs in the original
575 loop are the corresponding new ssa-names used in the new duplicated
576 loop copy. IS_NEW_LOOP indicates which of the two args of the phi
577 nodes in UPDATE_BB takes the original ssa-name, and which takes the
578 new name: If IS_NEW_LOOP is TRUE, the phi-arg that is associated with
579 the LOOP-exit-edge takes the new-name, and the phi-arg that is
580 associated with GUARD_EDGE takes the original name. If IS_NEW_LOOP is
581 FALSE, it's the other way around.
582 */
583
584 static void
585 slpeel_update_phi_nodes_for_guard (edge guard_edge,
586 struct loop *loop,
587 bool entry_phis,
588 bool is_new_loop)
589 {
590 tree orig_phi, new_phi, update_phi;
591 tree guard_arg, loop_arg;
592 basic_block new_merge_bb = guard_edge->dest;
593 edge e = EDGE_SUCC (new_merge_bb, 0);
594 basic_block update_bb = e->dest;
595 basic_block orig_bb = (entry_phis ? loop->header : update_bb);
596
597 for (orig_phi = phi_nodes (orig_bb), update_phi = phi_nodes (update_bb);
598 orig_phi && update_phi;
599 orig_phi = PHI_CHAIN (orig_phi), update_phi = PHI_CHAIN (update_phi))
600 {
601 /* 1. Generate new phi node in NEW_MERGE_BB: */
602 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)),
603 new_merge_bb);
604
605 /* 2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge
606 of LOOP. Set the two phi args in NEW_PHI for these edges: */
607 if (entry_phis)
608 {
609 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi,
610 EDGE_SUCC (loop->latch, 0));
611 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop->entry_edges[0]);
612 }
613 else /* exit phis */
614 {
615 tree orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
616 tree *new_name_ptr = SSA_NAME_AUX (orig_def);
617 tree new_name;
618
619 if (new_name_ptr)
620 new_name = *new_name_ptr;
621 else
622 /* Something defined outside of the loop */
623 new_name = orig_def;
624
625 if (is_new_loop)
626 {
627 guard_arg = orig_def;
628 loop_arg = new_name;
629 }
630 else
631 {
632 guard_arg = new_name;
633 loop_arg = orig_def;
634 }
635 }
636 add_phi_arg (new_phi, loop_arg, loop->exit_edges[0]);
637 add_phi_arg (new_phi, guard_arg, guard_edge);
638
639 /* 3. Update phi in successor block. */
640 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg
641 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg);
642 SET_PHI_ARG_DEF (update_phi, phi_arg_from_edge (update_phi, e),
643 PHI_RESULT (new_phi));
644 }
645
646 set_phi_nodes (new_merge_bb, phi_reverse (phi_nodes (new_merge_bb)));
647 }
648
649
650 /* Make the LOOP iterate NITERS times. This is done by adding a new IV
651 that starts at zero, increases by one and its limit is NITERS.
652
653 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
654
655 static void
656 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters)
657 {
658 tree indx_before_incr, indx_after_incr, cond_stmt, cond;
659 tree orig_cond;
660 edge exit_edge = loop->exit_edges[0];
661 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
662 tree begin_label = tree_block_label (loop->latch);
663 tree exit_label = tree_block_label (loop->single_exit->dest);
664 tree init = build_int_cst (TREE_TYPE (niters), 0);
665 tree step = build_int_cst (TREE_TYPE (niters), 1);
666 tree then_label;
667 tree else_label;
668
669 orig_cond = get_loop_exit_condition (loop);
670 gcc_assert (orig_cond);
671 create_iv (init, step, NULL_TREE, loop,
672 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
673
674 /* CREATE_IV uses BSI_INSERT with TSI_NEW_STMT, so we want to get
675 back to the exit condition statement. */
676 bsi_next (&loop_exit_bsi);
677 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond);
678
679 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
680 {
681 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, niters);
682 then_label = build1 (GOTO_EXPR, void_type_node, exit_label);
683 else_label = build1 (GOTO_EXPR, void_type_node, begin_label);
684 }
685 else /* 'then' edge loops back. */
686 {
687 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, niters);
688 then_label = build1 (GOTO_EXPR, void_type_node, begin_label);
689 else_label = build1 (GOTO_EXPR, void_type_node, exit_label);
690 }
691
692 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond), cond,
693 then_label, else_label);
694 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
695
696 /* Remove old loop exit test: */
697 bsi_remove (&loop_exit_bsi);
698
699 if (vect_debug_stats (loop) || vect_debug_details (loop))
700 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
701
702 loop->nb_iterations = niters;
703 }
704
705
706 /* Given LOOP this function generates a new copy of it and puts it
707 on E which is either the entry or exit of LOOP. */
708
709 static struct loop *
710 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, struct loops *loops,
711 edge e)
712 {
713 struct loop *new_loop;
714 basic_block *new_bbs, *bbs;
715 bool at_exit;
716 bool was_imm_dom;
717 basic_block exit_dest;
718 tree phi, phi_arg;
719
720 at_exit = (e == loop->exit_edges[0]);
721 if (!at_exit && e != loop_preheader_edge (loop))
722 {
723 if (dump_file && (dump_flags & TDF_DETAILS))
724 fprintf (dump_file, "Edge is not an entry nor an exit edge.\n");
725 return NULL;
726 }
727
728 bbs = get_loop_body (loop);
729
730 /* Check whether duplication is possible. */
731 if (!can_copy_bbs_p (bbs, loop->num_nodes))
732 {
733 if (vect_debug_stats (loop) || vect_debug_details (loop))
734 fprintf (dump_file, "Cannot copy basic blocks.\n");
735 free (bbs);
736 return NULL;
737 }
738
739 /* Generate new loop structure. */
740 new_loop = duplicate_loop (loops, loop, loop->outer);
741 if (!new_loop)
742 {
743 if (vect_debug_stats (loop) || vect_debug_details (loop))
744 fprintf (dump_file, "duplicate_loop returns NULL.\n");
745 free (bbs);
746 return NULL;
747 }
748
749 exit_dest = loop->exit_edges[0]->dest;
750 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
751 exit_dest) == loop->header ?
752 true : false);
753
754 new_bbs = xmalloc (sizeof (basic_block) * loop->num_nodes);
755
756 copy_bbs (bbs, loop->num_nodes, new_bbs, NULL, 0, NULL, NULL);
757
758 /* Duplicating phi args at exit bbs as coming
759 also from exit of duplicated loop. */
760 for (phi = phi_nodes (exit_dest); phi; phi = PHI_CHAIN (phi))
761 {
762 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, loop->exit_edges[0]);
763 if (phi_arg)
764 {
765 edge new_loop_exit_edge;
766
767 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch)
768 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1);
769 else
770 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0);
771
772 add_phi_arg (phi, phi_arg, new_loop_exit_edge);
773 }
774 }
775
776 if (at_exit) /* Add the loop copy at exit. */
777 {
778 redirect_edge_and_branch_force (e, new_loop->header);
779 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src);
780 if (was_imm_dom)
781 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header);
782 }
783 else /* Add the copy at entry. */
784 {
785 edge new_exit_e;
786 edge entry_e = loop_preheader_edge (loop);
787 basic_block preheader = entry_e->src;
788
789 if (!flow_bb_inside_loop_p (new_loop,
790 EDGE_SUCC (new_loop->header, 0)->dest))
791 new_exit_e = EDGE_SUCC (new_loop->header, 0);
792 else
793 new_exit_e = EDGE_SUCC (new_loop->header, 1);
794
795 redirect_edge_and_branch_force (new_exit_e, loop->header);
796 set_immediate_dominator (CDI_DOMINATORS, loop->header,
797 new_exit_e->src);
798
799 /* We have to add phi args to the loop->header here as coming
800 from new_exit_e edge. */
801 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
802 {
803 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e);
804 if (phi_arg)
805 add_phi_arg (phi, phi_arg, new_exit_e);
806 }
807
808 redirect_edge_and_branch_force (entry_e, new_loop->header);
809 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader);
810 }
811
812 flow_loop_scan (new_loop, LOOP_ALL);
813 flow_loop_scan (loop, LOOP_ALL);
814 free (new_bbs);
815 free (bbs);
816
817 return new_loop;
818 }
819
820
821 /* Given the condition statement COND, put it as the last statement
822 of GUARD_BB; EXIT_BB is the basic block to skip the loop;
823 Assumes that this is the single exit of the guarded loop.
824 Returns the skip edge. */
825
826 static edge
827 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb,
828 basic_block dom_bb)
829 {
830 block_stmt_iterator bsi;
831 edge new_e, enter_e;
832 tree cond_stmt, then_label, else_label;
833
834 enter_e = EDGE_SUCC (guard_bb, 0);
835 enter_e->flags &= ~EDGE_FALLTHRU;
836 enter_e->flags |= EDGE_FALSE_VALUE;
837 bsi = bsi_last (guard_bb);
838
839 then_label = build1 (GOTO_EXPR, void_type_node,
840 tree_block_label (exit_bb));
841 else_label = build1 (GOTO_EXPR, void_type_node,
842 tree_block_label (enter_e->dest));
843 cond_stmt = build3 (COND_EXPR, void_type_node, cond,
844 then_label, else_label);
845 bsi_insert_after (&bsi, cond_stmt, BSI_NEW_STMT);
846 /* Add new edge to connect entry block to the second loop. */
847 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE);
848 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb);
849 return new_e;
850 }
851
852
853 /* This function verifies that the following restrictions apply to LOOP:
854 (1) it is innermost
855 (2) it consists of exactly 2 basic blocks - header, and an empty latch.
856 (3) it is single entry, single exit
857 (4) its exit condition is the last stmt in the header
858 (5) E is the entry/exit edge of LOOP.
859 */
860
861 static bool
862 slpeel_can_duplicate_loop_p (struct loop *loop, edge e)
863 {
864 edge exit_e = loop->exit_edges [0];
865 edge entry_e = loop_preheader_edge (loop);
866 tree orig_cond = get_loop_exit_condition (loop);
867 block_stmt_iterator loop_exit_bsi = bsi_last (exit_e->src);
868
869 if (any_marked_for_rewrite_p ())
870 return false;
871
872 if (loop->inner
873 /* All loops have an outer scope; the only case loop->outer is NULL is for
874 the function itself. */
875 || !loop->outer
876 || loop->num_nodes != 2
877 || !empty_block_p (loop->latch)
878 || loop->num_exits != 1
879 || loop->num_entries != 1
880 /* Verify that new loop exit condition can be trivially modified. */
881 || (!orig_cond || orig_cond != bsi_stmt (loop_exit_bsi))
882 || (e != exit_e && e != entry_e))
883 return false;
884
885 return true;
886 }
887
888 #ifdef ENABLE_CHECKING
889 static void
890 slpeel_verify_cfg_after_peeling (struct loop *first_loop,
891 struct loop *second_loop)
892 {
893 basic_block loop1_exit_bb = first_loop->exit_edges[0]->dest;
894 basic_block loop2_entry_bb = second_loop->pre_header;
895 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src;
896
897 /* A guard that controls whether the second_loop is to be executed or skipped
898 is placed in first_loop->exit. first_loopt->exit therefore has two
899 successors - one is the preheader of second_loop, and the other is a bb
900 after second_loop.
901 */
902 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2);
903
904
905 /* 1. Verify that one of the successors of first_loopt->exit is the preheader
906 of second_loop. */
907
908 /* The preheader of new_loop is expected to have two predessors:
909 first_loop->exit and the block that precedes first_loop. */
910
911 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2
912 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb
913 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb)
914 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb
915 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb)));
916
917 /* Verify that the other successor of first_loopt->exit is after the
918 second_loop. */
919 /* TODO */
920 }
921 #endif
922
923 /* Function slpeel_tree_peel_loop_to_edge.
924
925 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop
926 that is placed on the entry (exit) edge E of LOOP. After this transformation
927 we have two loops one after the other - first-loop iterates FIRST_NITERS
928 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times.
929
930 Input:
931 - LOOP: the loop to be peeled.
932 - E: the exit or entry edge of LOOP.
933 If it is the entry edge, we peel the first iterations of LOOP. In this
934 case first-loop is LOOP, and second-loop is the newly created loop.
935 If it is the exit edge, we peel the last iterations of LOOP. In this
936 case, first-loop is the newly created loop, and second-loop is LOOP.
937 - NITERS: the number of iterations that LOOP iterates.
938 - FIRST_NITERS: the number of iterations that the first-loop should iterate.
939 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible
940 for updating the loop bound of the first-loop to FIRST_NITERS. If it
941 is false, the caller of this function may want to take care of this
942 (this can be useful if we don't want new stmts added to first-loop).
943
944 Output:
945 The function returns a pointer to the new loop-copy, or NULL if it failed
946 to perform the transformation.
947
948 The function generates two if-then-else guards: one before the first loop,
949 and the other before the second loop:
950 The first guard is:
951 if (FIRST_NITERS == 0) then skip the first loop,
952 and go directly to the second loop.
953 The second guard is:
954 if (FIRST_NITERS == NITERS) then skip the second loop.
955
956 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p).
957 FORNOW the resulting code will not be in loop-closed-ssa form.
958 */
959
960 struct loop*
961 slpeel_tree_peel_loop_to_edge (struct loop *loop, struct loops *loops,
962 edge e, tree first_niters,
963 tree niters, bool update_first_loop_count)
964 {
965 struct loop *new_loop = NULL, *first_loop, *second_loop;
966 edge skip_e;
967 tree pre_condition;
968 bitmap definitions;
969 basic_block bb_before_second_loop, bb_after_second_loop;
970 basic_block bb_before_first_loop;
971 basic_block bb_between_loops;
972 edge exit_e = loop->exit_edges [0];
973
974 if (!slpeel_can_duplicate_loop_p (loop, e))
975 return NULL;
976
977 /* We have to initialize cfg_hooks. Then, when calling
978 cfg_hooks->split_edge, the function tree_split_edge
979 is actually called and, when calling cfg_hooks->duplicate_block,
980 the function tree_duplicate_bb is called. */
981 tree_register_cfg_hooks ();
982
983
984 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP).
985 Resulting CFG would be:
986
987 first_loop:
988 do {
989 } while ...
990
991 second_loop:
992 do {
993 } while ...
994
995 orig_exit_bb:
996 */
997
998 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, loops, e)))
999 {
1000 if (vect_debug_stats (loop) || vect_debug_details (loop))
1001 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n");
1002 return NULL;
1003 }
1004
1005 if (e == exit_e)
1006 {
1007 /* NEW_LOOP was placed after LOOP. */
1008 first_loop = loop;
1009 second_loop = new_loop;
1010 }
1011 else
1012 {
1013 /* NEW_LOOP was placed before LOOP. */
1014 first_loop = new_loop;
1015 second_loop = loop;
1016 }
1017
1018 definitions = marked_ssa_names ();
1019 allocate_new_names (definitions);
1020 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e);
1021 rename_variables_in_loop (new_loop);
1022
1023
1024 /* 2. Add the guard that controls whether the first loop is executed.
1025 Resulting CFG would be:
1026
1027 bb_before_first_loop:
1028 if (FIRST_NITERS == 0) GOTO bb_before_second_loop
1029 GOTO first-loop
1030
1031 first_loop:
1032 do {
1033 } while ...
1034
1035 bb_before_second_loop:
1036
1037 second_loop:
1038 do {
1039 } while ...
1040
1041 orig_exit_bb:
1042 */
1043
1044 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop));
1045 add_bb_to_loop (bb_before_first_loop, first_loop->outer);
1046 bb_before_second_loop = split_edge (first_loop->exit_edges[0]);
1047 add_bb_to_loop (bb_before_second_loop, first_loop->outer);
1048 flow_loop_scan (first_loop, LOOP_ALL);
1049 flow_loop_scan (second_loop, LOOP_ALL);
1050
1051 pre_condition =
1052 build2 (LE_EXPR, boolean_type_node, first_niters, integer_zero_node);
1053 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition,
1054 bb_before_second_loop, bb_before_first_loop);
1055 slpeel_update_phi_nodes_for_guard (skip_e, first_loop, true /* entry-phis */,
1056 first_loop == new_loop);
1057
1058
1059 /* 3. Add the guard that controls whether the second loop is executed.
1060 Resulting CFG would be:
1061
1062 bb_before_first_loop:
1063 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop)
1064 GOTO first-loop
1065
1066 first_loop:
1067 do {
1068 } while ...
1069
1070 bb_between_loops:
1071 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop)
1072 GOTO bb_before_second_loop
1073
1074 bb_before_second_loop:
1075
1076 second_loop:
1077 do {
1078 } while ...
1079
1080 bb_after_second_loop:
1081
1082 orig_exit_bb:
1083 */
1084
1085 bb_between_loops = split_edge (first_loop->exit_edges[0]);
1086 add_bb_to_loop (bb_between_loops, first_loop->outer);
1087 bb_after_second_loop = split_edge (second_loop->exit_edges[0]);
1088 add_bb_to_loop (bb_after_second_loop, second_loop->outer);
1089 flow_loop_scan (first_loop, LOOP_ALL);
1090 flow_loop_scan (second_loop, LOOP_ALL);
1091
1092 pre_condition = build2 (EQ_EXPR, boolean_type_node, first_niters, niters);
1093 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition,
1094 bb_after_second_loop, bb_before_first_loop);
1095 slpeel_update_phi_nodes_for_guard (skip_e, second_loop, false /* exit-phis */,
1096 second_loop == new_loop);
1097
1098 /* Flow loop scan does not update loop->single_exit field. */
1099 first_loop->single_exit = first_loop->exit_edges[0];
1100 second_loop->single_exit = second_loop->exit_edges[0];
1101
1102 /* 4. Make first-loop iterate FIRST_NITERS times, if requested.
1103 */
1104 if (update_first_loop_count)
1105 slpeel_make_loop_iterate_ntimes (first_loop, first_niters);
1106
1107 free_new_names (definitions);
1108 BITMAP_XFREE (definitions);
1109 unmark_all_for_rewrite ();
1110
1111 return new_loop;
1112 }
1113
1114 \f
1115 /* Here the proper Vectorizer starts. */
1116
1117 /*************************************************************************
1118 Vectorization Utilities.
1119 *************************************************************************/
1120
1121 /* Function new_stmt_vec_info.
1122
1123 Create and initialize a new stmt_vec_info struct for STMT. */
1124
1125 stmt_vec_info
1126 new_stmt_vec_info (tree stmt, struct loop *loop)
1127 {
1128 stmt_vec_info res;
1129 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
1130
1131 STMT_VINFO_TYPE (res) = undef_vec_info_type;
1132 STMT_VINFO_STMT (res) = stmt;
1133 STMT_VINFO_LOOP (res) = loop;
1134 STMT_VINFO_RELEVANT_P (res) = 0;
1135 STMT_VINFO_VECTYPE (res) = NULL;
1136 STMT_VINFO_VEC_STMT (res) = NULL;
1137 STMT_VINFO_DATA_REF (res) = NULL;
1138 STMT_VINFO_MEMTAG (res) = NULL;
1139 STMT_VINFO_VECT_DR_BASE (res) = NULL;
1140 STMT_VINFO_VECT_INIT_OFFSET (res) = NULL_TREE;
1141 STMT_VINFO_VECT_STEP (res) = NULL_TREE;
1142 STMT_VINFO_VECT_BASE_ALIGNED_P (res) = false;
1143 STMT_VINFO_VECT_MISALIGNMENT (res) = NULL_TREE;
1144
1145 return res;
1146 }
1147
1148
1149 /* Function new_loop_vec_info.
1150
1151 Create and initialize a new loop_vec_info struct for LOOP, as well as
1152 stmt_vec_info structs for all the stmts in LOOP. */
1153
1154 loop_vec_info
1155 new_loop_vec_info (struct loop *loop)
1156 {
1157 loop_vec_info res;
1158 basic_block *bbs;
1159 block_stmt_iterator si;
1160 unsigned int i;
1161
1162 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
1163
1164 bbs = get_loop_body (loop);
1165
1166 /* Create stmt_info for all stmts in the loop. */
1167 for (i = 0; i < loop->num_nodes; i++)
1168 {
1169 basic_block bb = bbs[i];
1170 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1171 {
1172 tree stmt = bsi_stmt (si);
1173 stmt_ann_t ann;
1174
1175 get_stmt_operands (stmt);
1176 ann = stmt_ann (stmt);
1177 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
1178 }
1179 }
1180
1181 LOOP_VINFO_LOOP (res) = loop;
1182 LOOP_VINFO_BBS (res) = bbs;
1183 LOOP_VINFO_EXIT_COND (res) = NULL;
1184 LOOP_VINFO_NITERS (res) = NULL;
1185 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
1186 LOOP_DO_PEELING_FOR_ALIGNMENT (res) = false;
1187 LOOP_VINFO_VECT_FACTOR (res) = 0;
1188 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
1189 "loop_write_datarefs");
1190 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
1191 "loop_read_datarefs");
1192 LOOP_VINFO_UNALIGNED_DR (res) = NULL;
1193
1194 return res;
1195 }
1196
1197
1198 /* Function destroy_loop_vec_info.
1199
1200 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
1201 stmts in the loop. */
1202
1203 void
1204 destroy_loop_vec_info (loop_vec_info loop_vinfo)
1205 {
1206 struct loop *loop;
1207 basic_block *bbs;
1208 int nbbs;
1209 block_stmt_iterator si;
1210 int j;
1211
1212 if (!loop_vinfo)
1213 return;
1214
1215 loop = LOOP_VINFO_LOOP (loop_vinfo);
1216
1217 bbs = LOOP_VINFO_BBS (loop_vinfo);
1218 nbbs = loop->num_nodes;
1219
1220 for (j = 0; j < nbbs; j++)
1221 {
1222 basic_block bb = bbs[j];
1223 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1224 {
1225 tree stmt = bsi_stmt (si);
1226 stmt_ann_t ann = stmt_ann (stmt);
1227 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1228 free (stmt_info);
1229 set_stmt_info (ann, NULL);
1230 }
1231 }
1232
1233 free (LOOP_VINFO_BBS (loop_vinfo));
1234 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
1235 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
1236
1237 free (loop_vinfo);
1238 }
1239
1240
1241 /* Function debug_loop_stats.
1242
1243 For vectorization statistics dumps. */
1244
1245 static bool
1246 vect_debug_stats (struct loop *loop)
1247 {
1248 basic_block bb;
1249 block_stmt_iterator si;
1250 tree node = NULL_TREE;
1251
1252 if (!dump_file || !(dump_flags & TDF_STATS))
1253 return false;
1254
1255 if (!loop)
1256 {
1257 fprintf (dump_file, "\n");
1258 return true;
1259 }
1260
1261 if (!loop->header)
1262 return false;
1263
1264 bb = loop->header;
1265
1266 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1267 {
1268 node = bsi_stmt (si);
1269 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1270 break;
1271 }
1272
1273 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1274 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1275 {
1276 fprintf (dump_file, "\nloop at %s:%d: ",
1277 EXPR_FILENAME (node), EXPR_LINENO (node));
1278 return true;
1279 }
1280
1281 return false;
1282 }
1283
1284
1285 /* Function debug_loop_details.
1286
1287 For vectorization debug dumps. */
1288
1289 static bool
1290 vect_debug_details (struct loop *loop)
1291 {
1292 basic_block bb;
1293 block_stmt_iterator si;
1294 tree node = NULL_TREE;
1295
1296 if (!dump_file || !(dump_flags & TDF_DETAILS))
1297 return false;
1298
1299 if (!loop)
1300 {
1301 fprintf (dump_file, "\n");
1302 return true;
1303 }
1304
1305 if (!loop->header)
1306 return false;
1307
1308 bb = loop->header;
1309
1310 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
1311 {
1312 node = bsi_stmt (si);
1313 if (node && EXPR_P (node) && EXPR_LOCUS (node))
1314 break;
1315 }
1316
1317 if (node && EXPR_P (node) && EXPR_LOCUS (node)
1318 && EXPR_FILENAME (node) && EXPR_LINENO (node))
1319 {
1320 fprintf (dump_file, "\nloop at %s:%d: ",
1321 EXPR_FILENAME (node), EXPR_LINENO (node));
1322 return true;
1323 }
1324
1325 return false;
1326 }
1327
1328
1329 /* Function vect_get_ptr_offset
1330
1331 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
1332
1333 static tree
1334 vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
1335 tree vectype ATTRIBUTE_UNUSED,
1336 tree *offset ATTRIBUTE_UNUSED)
1337 {
1338 /* TODO: Use alignment information. */
1339 return NULL_TREE;
1340 }
1341
1342
1343 /* Function vect_strip_conversions
1344
1345 Strip conversions that don't narrow the mode. */
1346
1347 static tree
1348 vect_strip_conversion (tree expr)
1349 {
1350 tree to, ti, oprnd0;
1351
1352 while (TREE_CODE (expr) == NOP_EXPR || TREE_CODE (expr) == CONVERT_EXPR)
1353 {
1354 to = TREE_TYPE (expr);
1355 oprnd0 = TREE_OPERAND (expr, 0);
1356 ti = TREE_TYPE (oprnd0);
1357
1358 if (!INTEGRAL_TYPE_P (to) || !INTEGRAL_TYPE_P (ti))
1359 return NULL_TREE;
1360 if (GET_MODE_SIZE (TYPE_MODE (to)) < GET_MODE_SIZE (TYPE_MODE (ti)))
1361 return NULL_TREE;
1362
1363 expr = oprnd0;
1364 }
1365 return expr;
1366 }
1367
1368
1369 /* Function vect_analyze_offset_expr
1370
1371 Given an offset expression EXPR received from get_inner_reference, analyze
1372 it and create an expression for INITIAL_OFFSET by substituting the variables
1373 of EXPR with initial_condition of the corresponding access_fn in the loop.
1374 E.g.,
1375 for i
1376 for (j = 3; j < N; j++)
1377 a[j].b[i][j] = 0;
1378
1379 For a[j].b[i][j], EXPR will be 'i * C_i + j * C_j + C'. 'i' cannot be
1380 substituted, since its access_fn in the inner loop is i. 'j' will be
1381 substituted with 3. An INITIAL_OFFSET will be 'i * C_i + C`', where
1382 C` = 3 * C_j + C.
1383
1384 Compute MISALIGN (the misalignment of the data reference initial access from
1385 its base) if possible. Misalignment can be calculated only if all the
1386 variables can be substituted with constants, or if a variable is multiplied
1387 by a multiple of VECTYPE_ALIGNMENT. In the above example, since 'i' cannot
1388 be substituted, MISALIGN will be NULL_TREE in case that C_i is not a multiple
1389 of VECTYPE_ALIGNMENT, and C` otherwise. (We perform MISALIGN modulo
1390 VECTYPE_ALIGNMENT computation in the caller of this function).
1391
1392 STEP is an evolution of the data reference in this loop in bytes.
1393 In the above example, STEP is C_j.
1394
1395 Return FALSE, if the analysis fails, e.g., there is no access_fn for a
1396 variable. In this case, all the outputs (INITIAL_OFFSET, MISALIGN and STEP)
1397 are NULL_TREEs. Otherwise, return TRUE.
1398
1399 */
1400
1401 static bool
1402 vect_analyze_offset_expr (tree expr,
1403 struct loop *loop,
1404 tree vectype_alignment,
1405 tree *initial_offset,
1406 tree *misalign,
1407 tree *step)
1408 {
1409 tree oprnd0;
1410 tree oprnd1;
1411 tree left_offset = size_zero_node;
1412 tree right_offset = size_zero_node;
1413 tree left_misalign = size_zero_node;
1414 tree right_misalign = size_zero_node;
1415 tree left_step = size_zero_node;
1416 tree right_step = size_zero_node;
1417 enum tree_code code;
1418 tree init, evolution;
1419
1420 *step = NULL_TREE;
1421 *misalign = NULL_TREE;
1422 *initial_offset = NULL_TREE;
1423
1424 /* Strip conversions that don't narrow the mode. */
1425 expr = vect_strip_conversion (expr);
1426 if (!expr)
1427 return false;
1428
1429 /* Stop conditions:
1430 1. Constant. */
1431 if (TREE_CODE (expr) == INTEGER_CST)
1432 {
1433 *initial_offset = fold_convert (sizetype, expr);
1434 *misalign = fold_convert (sizetype, expr);
1435 *step = size_zero_node;
1436 return true;
1437 }
1438
1439 /* 2. Variable. Try to substitute with initial_condition of the corresponding
1440 access_fn in the current loop. */
1441 if (SSA_VAR_P (expr))
1442 {
1443 tree access_fn = analyze_scalar_evolution (loop, expr);
1444
1445 if (access_fn == chrec_dont_know)
1446 /* No access_fn. */
1447 return false;
1448
1449 init = initial_condition_in_loop_num (access_fn, loop->num);
1450 if (init == expr && !expr_invariant_in_loop_p (loop, init))
1451 /* Not enough information: may be not loop invariant.
1452 E.g., for a[b[i]], we get a[D], where D=b[i]. EXPR is D, its
1453 initial_condition is D, but it depends on i - loop's induction
1454 variable. */
1455 return false;
1456
1457 evolution = evolution_part_in_loop_num (access_fn, loop->num);
1458 if (evolution && TREE_CODE (evolution) != INTEGER_CST)
1459 /* Evolution is not constant. */
1460 return false;
1461
1462 if (TREE_CODE (init) == INTEGER_CST)
1463 *misalign = fold_convert (sizetype, init);
1464 else
1465 /* Not constant, misalignment cannot be calculated. */
1466 *misalign = NULL_TREE;
1467
1468 *initial_offset = fold_convert (sizetype, init);
1469
1470 *step = evolution ? fold_convert (sizetype, evolution) : size_zero_node;
1471 return true;
1472 }
1473
1474 /* Recursive computation. */
1475 if (!BINARY_CLASS_P (expr))
1476 {
1477 /* We expect to get binary expressions (PLUS/MINUS and MULT). */
1478 if (vect_debug_details (NULL))
1479 {
1480 fprintf (dump_file, "Not binary expression ");
1481 print_generic_expr (dump_file, expr, TDF_SLIM);
1482 }
1483 return false;
1484 }
1485 oprnd0 = TREE_OPERAND (expr, 0);
1486 oprnd1 = TREE_OPERAND (expr, 1);
1487
1488 if (!vect_analyze_offset_expr (oprnd0, loop, vectype_alignment, &left_offset,
1489 &left_misalign, &left_step)
1490 || !vect_analyze_offset_expr (oprnd1, loop, vectype_alignment,
1491 &right_offset, &right_misalign, &right_step))
1492 return false;
1493
1494 /* The type of the operation: plus, minus or mult. */
1495 code = TREE_CODE (expr);
1496 switch (code)
1497 {
1498 case MULT_EXPR:
1499 if (TREE_CODE (right_offset) != INTEGER_CST)
1500 /* RIGHT_OFFSET can be not constant. For example, for arrays of variable
1501 sized types.
1502 FORNOW: We don't support such cases. */
1503 return false;
1504
1505 /* Strip conversions that don't narrow the mode. */
1506 left_offset = vect_strip_conversion (left_offset);
1507 if (!left_offset)
1508 return false;
1509 /* Misalignment computation. */
1510 if (SSA_VAR_P (left_offset))
1511 {
1512 /* If the left side contains variable that cannot be substituted with
1513 constant, we check if the right side is a multiple of ALIGNMENT. */
1514 if (integer_zerop (size_binop (TRUNC_MOD_EXPR, right_offset,
1515 vectype_alignment)))
1516 *misalign = size_zero_node;
1517 else
1518 /* If the remainder is not zero or the right side isn't constant, we
1519 can't compute misalignment. */
1520 *misalign = NULL_TREE;
1521 }
1522 else
1523 {
1524 /* The left operand was successfully substituted with constant. */
1525 if (left_misalign)
1526 /* In case of EXPR '(i * C1 + j) * C2', LEFT_MISALIGN is
1527 NULL_TREE. */
1528 *misalign = size_binop (code, left_misalign, right_misalign);
1529 else
1530 *misalign = NULL_TREE;
1531 }
1532
1533 /* Step calculation. */
1534 /* Multiply the step by the right operand. */
1535 *step = size_binop (MULT_EXPR, left_step, right_offset);
1536 break;
1537
1538 case PLUS_EXPR:
1539 case MINUS_EXPR:
1540 /* Combine the recursive calculations for step and misalignment. */
1541 *step = size_binop (code, left_step, right_step);
1542
1543 if (left_misalign && right_misalign)
1544 *misalign = size_binop (code, left_misalign, right_misalign);
1545 else
1546 *misalign = NULL_TREE;
1547
1548 break;
1549
1550 default:
1551 gcc_unreachable ();
1552 }
1553
1554 /* Compute offset. */
1555 *initial_offset = fold_convert (sizetype,
1556 fold (build2 (code, TREE_TYPE (left_offset),
1557 left_offset,
1558 right_offset)));
1559 return true;
1560 }
1561
1562
1563 /* Function vect_get_base_and_offset
1564
1565 Return the BASE of the data reference EXPR.
1566 If VECTYPE is given, also compute the INITIAL_OFFSET from BASE, MISALIGN and
1567 STEP.
1568 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset
1569 'a.b[i] + 4B' from a (can be an expression), MISALIGN is an OFFSET
1570 instantiated with initial_conditions of access_functions of variables,
1571 modulo alignment, and STEP is the evolution of the DR_REF in this loop.
1572
1573 Function get_inner_reference is used for the above in case of ARRAY_REF and
1574 COMPONENT_REF.
1575
1576 Input:
1577 EXPR - the memory reference that is being analyzed
1578 DR - the data_reference struct of the _original_ memory reference
1579 (Note: DR_REF (DR) is not necessarily EXPR)
1580 VECTYPE - the type that defines the alignment (i.e, we compute
1581 alignment relative to TYPE_ALIGN(VECTYPE))
1582
1583 Output:
1584 BASE (returned value) - the base of the data reference EXPR.
1585 E.g, if EXPR is a.b[k].c[i][j] the returned
1586 base is a.
1587 INITIAL_OFFSET - initial offset of EXPR from BASE (an expression)
1588 MISALIGN - offset of EXPR from BASE in bytes (a constant) or NULL_TREE if the
1589 computation is impossible
1590 STEP - evolution of the DR_REF in the loop
1591 BASE_ALIGNED_P - indicates if BASE is aligned
1592
1593 If something unexpected is encountered (an unsupported form of data-ref),
1594 then NULL_TREE is returned. */
1595
1596 static tree
1597 vect_get_base_and_offset (struct data_reference *dr,
1598 tree expr,
1599 tree vectype,
1600 loop_vec_info loop_vinfo,
1601 tree *initial_offset,
1602 tree *misalign,
1603 tree *step,
1604 bool *base_aligned_p)
1605 {
1606 tree this_offset = size_zero_node;
1607 tree this_misalign = size_zero_node;
1608 tree this_step = size_zero_node;
1609 tree base = NULL_TREE;
1610 tree next_ref;
1611 tree oprnd0, oprnd1;
1612 enum tree_code code = TREE_CODE (expr);
1613 HOST_WIDE_INT pbitsize;
1614 HOST_WIDE_INT pbitpos;
1615 tree poffset;
1616 enum machine_mode pmode;
1617 int punsignedp, pvolatilep;
1618 tree bit_pos_in_bytes;
1619 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1620
1621 *base_aligned_p = false;
1622
1623 switch (code)
1624 {
1625 /* These cases end the recursion: */
1626 case VAR_DECL:
1627 case PARM_DECL:
1628 *initial_offset = size_zero_node;
1629 *step = size_zero_node;
1630 *misalign = size_zero_node;
1631 if (DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
1632 *base_aligned_p = true;
1633 return expr;
1634
1635 case SSA_NAME:
1636 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
1637 return NULL_TREE;
1638
1639 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
1640 {
1641 base = vect_get_ptr_offset (expr, vectype, misalign);
1642 if (base)
1643 *base_aligned_p = true;
1644 }
1645 else
1646 {
1647 *base_aligned_p = true;
1648 *misalign = size_zero_node;
1649 }
1650 *initial_offset = size_zero_node;
1651 *step = size_zero_node;
1652 return expr;
1653
1654 case INTEGER_CST:
1655 *initial_offset = fold_convert (sizetype, expr);
1656 *misalign = fold_convert (sizetype, expr);
1657 *step = size_zero_node;
1658 return expr;
1659
1660 /* These cases continue the recursion: */
1661 case ADDR_EXPR:
1662 oprnd0 = TREE_OPERAND (expr, 0);
1663 next_ref = oprnd0;
1664 break;
1665
1666 case INDIRECT_REF:
1667 oprnd0 = TREE_OPERAND (expr, 0);
1668 next_ref = oprnd0;
1669 break;
1670
1671 case PLUS_EXPR:
1672 case MINUS_EXPR:
1673 oprnd0 = TREE_OPERAND (expr, 0);
1674 oprnd1 = TREE_OPERAND (expr, 1);
1675
1676 /* In case we have a PLUS_EXPR of the form
1677 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
1678 This is verified in vect_get_memtag_and_dr. */
1679 base = vect_get_base_and_offset (dr, oprnd1, vectype, loop_vinfo,
1680 &this_offset, &this_misalign,
1681 &this_step, base_aligned_p);
1682 /* Offset was already computed in vect_analyze_pointer_ref_access. */
1683 this_offset = size_zero_node;
1684
1685 if (!base)
1686 this_misalign = NULL_TREE;
1687
1688 next_ref = oprnd0;
1689 break;
1690
1691 default:
1692 if (!handled_component_p (expr))
1693 /* Unsupported expression. */
1694 return NULL_TREE;
1695
1696 /* Find the base and the offset from it. */
1697 next_ref = get_inner_reference (expr, &pbitsize, &pbitpos, &poffset,
1698 &pmode, &punsignedp, &pvolatilep, false);
1699 if (!next_ref)
1700 return NULL_TREE;
1701
1702 if (poffset
1703 && !vect_analyze_offset_expr (poffset, loop, TYPE_SIZE_UNIT (vectype),
1704 &this_offset, &this_misalign,
1705 &this_step))
1706 {
1707 /* Failed to compute offset or step. */
1708 *step = NULL_TREE;
1709 *initial_offset = NULL_TREE;
1710 *misalign = NULL_TREE;
1711 return NULL_TREE;
1712 }
1713
1714 /* Add bit position to OFFSET and MISALIGN. */
1715
1716 bit_pos_in_bytes = size_int (pbitpos/BITS_PER_UNIT);
1717 /* Check that there is no remainder in bits. */
1718 if (pbitpos%BITS_PER_UNIT)
1719 {
1720 if (vect_debug_details (NULL))
1721 fprintf (dump_file, "bit offset alignment.");
1722 return NULL_TREE;
1723 }
1724 this_offset = fold (size_binop (PLUS_EXPR, bit_pos_in_bytes,
1725 fold_convert (sizetype, this_offset)));
1726 if (this_misalign)
1727 this_misalign = size_binop (PLUS_EXPR, this_misalign, bit_pos_in_bytes);
1728
1729 /* Continue the recursion to refine the base (get_inner_reference returns
1730 &a for &a[i], and not a). */
1731 break;
1732 }
1733
1734 base = vect_get_base_and_offset (dr, next_ref, vectype, loop_vinfo,
1735 initial_offset, misalign, step,
1736 base_aligned_p);
1737 if (base)
1738 {
1739 /* Combine the results. */
1740 if (this_misalign && *misalign)
1741 *misalign = size_binop (PLUS_EXPR, *misalign, this_misalign);
1742 else
1743 *misalign = NULL_TREE;
1744
1745 *step = size_binop (PLUS_EXPR, *step, this_step);
1746
1747 *initial_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (*initial_offset),
1748 *initial_offset, this_offset));
1749
1750 if (vect_debug_details (NULL))
1751 {
1752 print_generic_expr (dump_file, expr, TDF_SLIM);
1753 fprintf (dump_file, "\n --> total offset for ref: ");
1754 print_generic_expr (dump_file, *initial_offset, TDF_SLIM);
1755 fprintf (dump_file, "\n --> total misalign for ref: ");
1756 print_generic_expr (dump_file, *misalign, TDF_SLIM);
1757 fprintf (dump_file, "\n --> total step for ref: ");
1758 print_generic_expr (dump_file, *step, TDF_SLIM);
1759 }
1760 }
1761 return base;
1762 }
1763
1764
1765 /* Function vect_force_dr_alignment_p.
1766
1767 Returns whether the alignment of a DECL can be forced to be aligned
1768 on ALIGNMENT bit boundary. */
1769
1770 static bool
1771 vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
1772 {
1773 if (TREE_CODE (decl) != VAR_DECL)
1774 return false;
1775
1776 if (DECL_EXTERNAL (decl))
1777 return false;
1778
1779 if (TREE_ASM_WRITTEN (decl))
1780 return false;
1781
1782 if (TREE_STATIC (decl))
1783 return (alignment <= MAX_OFILE_ALIGNMENT);
1784 else
1785 /* This is not 100% correct. The absolute correct stack alignment
1786 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
1787 PREFERRED_STACK_BOUNDARY is honored by all translation units.
1788 However, until someone implements forced stack alignment, SSE
1789 isn't really usable without this. */
1790 return (alignment <= PREFERRED_STACK_BOUNDARY);
1791 }
1792
1793
1794 /* Function vect_get_new_vect_var.
1795
1796 Returns a name for a new variable. The current naming scheme appends the
1797 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1798 the name of vectorizer generated variables, and appends that to NAME if
1799 provided. */
1800
1801 static tree
1802 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1803 {
1804 const char *prefix;
1805 int prefix_len;
1806 tree new_vect_var;
1807
1808 if (var_kind == vect_simple_var)
1809 prefix = "vect_";
1810 else
1811 prefix = "vect_p";
1812
1813 prefix_len = strlen (prefix);
1814
1815 if (name)
1816 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
1817 else
1818 new_vect_var = create_tmp_var (type, prefix);
1819
1820 return new_vect_var;
1821 }
1822
1823
1824 /* Function vect_create_index_for_vector_ref.
1825
1826 Create (and return) an index variable, along with it's update chain in the
1827 loop. This variable will be used to access a memory location in a vector
1828 operation.
1829
1830 Input:
1831 LOOP: The loop being vectorized.
1832 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
1833 function can be added here, or in the loop pre-header.
1834
1835 Output:
1836 Return an index that will be used to index a vector array. It is expected
1837 that a pointer to the first vector will be used as the base address for the
1838 indexed reference.
1839
1840 FORNOW: we are not trying to be efficient, just creating a new index each
1841 time from scratch. At this time all vector references could use the same
1842 index.
1843
1844 TODO: create only one index to be used by all vector references. Record
1845 the index in the LOOP_VINFO the first time this procedure is called and
1846 return it on subsequent calls. The increment of this index must be placed
1847 just before the conditional expression that ends the single block loop. */
1848
1849 static tree
1850 vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
1851 {
1852 tree init, step;
1853 tree indx_before_incr, indx_after_incr;
1854
1855 /* It is assumed that the base pointer used for vectorized access contains
1856 the address of the first vector. Therefore the index used for vectorized
1857 access must be initialized to zero and incremented by 1. */
1858
1859 init = integer_zero_node;
1860 step = integer_one_node;
1861
1862 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
1863 create_iv (init, step, NULL_TREE, loop, bsi, false,
1864 &indx_before_incr, &indx_after_incr);
1865
1866 return indx_before_incr;
1867 }
1868
1869
1870 /* Function vect_create_addr_base_for_vector_ref.
1871
1872 Create an expression that computes the address of the first memory location
1873 that will be accessed for a data reference.
1874
1875 Input:
1876 STMT: The statement containing the data reference.
1877 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
1878 OFFSET: Optional. If supplied, it is be added to the initial address.
1879
1880 Output:
1881 1. Return an SSA_NAME whose value is the address of the memory location of
1882 the first vector of the data reference.
1883 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
1884 these statement(s) which define the returned SSA_NAME.
1885
1886 FORNOW: We are only handling array accesses with step 1. */
1887
1888 static tree
1889 vect_create_addr_base_for_vector_ref (tree stmt,
1890 tree *new_stmt_list,
1891 tree offset)
1892 {
1893 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1894 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1895 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
1896 tree base_name = unshare_expr (DR_BASE_NAME (dr));
1897 tree ref = DR_REF (dr);
1898 tree scalar_type = TREE_TYPE (ref);
1899 tree scalar_ptr_type = build_pointer_type (scalar_type);
1900 tree vec_stmt;
1901 tree new_temp;
1902 tree addr_base, addr_expr;
1903 tree dest, new_stmt;
1904 tree base_offset = unshare_expr (STMT_VINFO_VECT_INIT_OFFSET (stmt_info));
1905
1906 if (TREE_CODE (TREE_TYPE (data_ref_base)) != POINTER_TYPE)
1907 /* After the analysis stage, we expect to get here only with RECORD_TYPE
1908 and ARRAY_TYPE. */
1909 /* Add '&' to ref_base. */
1910 data_ref_base = build_fold_addr_expr (data_ref_base);
1911 else
1912 {
1913 /* Create '(scalar_type*) base' for pointers. */
1914 tree dest, new_stmt, new_temp, vec_stmt, tmp_base;
1915 tree scalar_array_type = build_array_type (scalar_type, 0);
1916 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
1917 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
1918 add_referenced_tmp_var (array_ptr);
1919
1920 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
1921 add_referenced_tmp_var (dest);
1922 tmp_base = force_gimple_operand (data_ref_base, &new_stmt, false, dest);
1923 append_to_statement_list_force (new_stmt, new_stmt_list);
1924
1925 vec_stmt = fold_convert (scalar_array_ptr_type, tmp_base);
1926 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
1927 new_temp = make_ssa_name (array_ptr, vec_stmt);
1928 TREE_OPERAND (vec_stmt, 0) = new_temp;
1929 append_to_statement_list_force (vec_stmt, new_stmt_list);
1930 data_ref_base = new_temp;
1931 }
1932
1933 /* Create base_offset */
1934 dest = create_tmp_var (TREE_TYPE (base_offset), "base_off");
1935 add_referenced_tmp_var (dest);
1936 base_offset = force_gimple_operand (base_offset, &new_stmt, false, dest);
1937 append_to_statement_list_force (new_stmt, new_stmt_list);
1938
1939 if (offset)
1940 {
1941 tree tmp = create_tmp_var (TREE_TYPE (base_offset), "offset");
1942 add_referenced_tmp_var (tmp);
1943 offset = fold (build2 (MULT_EXPR, TREE_TYPE (offset), offset,
1944 STMT_VINFO_VECT_STEP (stmt_info)));
1945 base_offset = fold (build2 (PLUS_EXPR, TREE_TYPE (base_offset), base_offset,
1946 offset));
1947 base_offset = force_gimple_operand (base_offset, &new_stmt, false, tmp);
1948 append_to_statement_list_force (new_stmt, new_stmt_list);
1949 }
1950
1951 /* base + base_offset */
1952 addr_base = fold (build2 (PLUS_EXPR, TREE_TYPE (data_ref_base), data_ref_base,
1953 base_offset));
1954
1955 /* addr_expr = addr_base */
1956 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
1957 get_name (base_name));
1958 add_referenced_tmp_var (addr_expr);
1959 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
1960 new_temp = make_ssa_name (addr_expr, vec_stmt);
1961 TREE_OPERAND (vec_stmt, 0) = new_temp;
1962 append_to_statement_list_force (vec_stmt, new_stmt_list);
1963
1964 if (vect_debug_details (NULL))
1965 {
1966 fprintf (dump_file, "created ");
1967 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1968 fprintf (dump_file, "\n");
1969 }
1970 return new_temp;
1971 }
1972
1973
1974 /* Function get_vectype_for_scalar_type.
1975
1976 Returns the vector type corresponding to SCALAR_TYPE as supported
1977 by the target. */
1978
1979 static tree
1980 get_vectype_for_scalar_type (tree scalar_type)
1981 {
1982 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
1983 int nbytes = GET_MODE_SIZE (inner_mode);
1984 int nunits;
1985 tree vectype;
1986
1987 if (nbytes == 0)
1988 return NULL_TREE;
1989
1990 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
1991 is expected. */
1992 nunits = UNITS_PER_SIMD_WORD / nbytes;
1993
1994 vectype = build_vector_type (scalar_type, nunits);
1995 if (vect_debug_details (NULL))
1996 {
1997 fprintf (dump_file, "get vectype with %d units of type ", nunits);
1998 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
1999 }
2000
2001 if (!vectype)
2002 return NULL_TREE;
2003
2004 if (vect_debug_details (NULL))
2005 {
2006 fprintf (dump_file, "vectype: ");
2007 print_generic_expr (dump_file, vectype, TDF_SLIM);
2008 }
2009
2010 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2011 {
2012 /* TODO: tree-complex.c sometimes can parallelize operations
2013 on generic vectors. We can vectorize the loop in that case,
2014 but then we should re-run the lowering pass. */
2015 if (vect_debug_details (NULL))
2016 fprintf (dump_file, "mode not supported by target.");
2017 return NULL_TREE;
2018 }
2019
2020 return vectype;
2021 }
2022
2023
2024 /* Function vect_align_data_ref.
2025
2026 Handle mislignment of a memory accesses.
2027
2028 FORNOW: Can't handle misaligned accesses.
2029 Make sure that the dataref is aligned. */
2030
2031 static void
2032 vect_align_data_ref (tree stmt)
2033 {
2034 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2035 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2036
2037 /* FORNOW: can't handle misaligned accesses;
2038 all accesses expected to be aligned. */
2039 gcc_assert (aligned_access_p (dr));
2040 }
2041
2042
2043 /* Function vect_create_data_ref_ptr.
2044
2045 Create a memory reference expression for vector access, to be used in a
2046 vector load/store stmt. The reference is based on a new pointer to vector
2047 type (vp).
2048
2049 Input:
2050 1. STMT: a stmt that references memory. Expected to be of the form
2051 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
2052 2. BSI: block_stmt_iterator where new stmts can be added.
2053 3. OFFSET (optional): an offset to be added to the initial address accessed
2054 by the data-ref in STMT.
2055 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2056 pointing to the initial address.
2057
2058 Output:
2059 1. Declare a new ptr to vector_type, and have it point to the base of the
2060 data reference (initial addressed accessed by the data reference).
2061 For example, for vector of type V8HI, the following code is generated:
2062
2063 v8hi *vp;
2064 vp = (v8hi *)initial_address;
2065
2066 if OFFSET is not supplied:
2067 initial_address = &a[init];
2068 if OFFSET is supplied:
2069 initial_address = &a[init + OFFSET];
2070
2071 Return the initial_address in INITIAL_ADDRESS.
2072
2073 2. Create a data-reference in the loop based on the new vector pointer vp,
2074 and using a new index variable 'idx' as follows:
2075
2076 vp' = vp + update
2077
2078 where if ONLY_INIT is true:
2079 update = zero
2080 and otherwise
2081 update = idx + vector_type_size
2082
2083 Return the pointer vp'.
2084
2085
2086 FORNOW: handle only aligned and consecutive accesses. */
2087
2088 static tree
2089 vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
2090 tree *initial_address, bool only_init)
2091 {
2092 tree base_name;
2093 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2094 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2095 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2096 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2097 tree vect_ptr_type;
2098 tree vect_ptr;
2099 tree tag;
2100 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
2101 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
2102 vuse_optype vuses = STMT_VUSE_OPS (stmt);
2103 int nvuses, nv_may_defs, nv_must_defs;
2104 int i;
2105 tree new_temp;
2106 tree vec_stmt;
2107 tree new_stmt_list = NULL_TREE;
2108 tree idx;
2109 edge pe = loop_preheader_edge (loop);
2110 basic_block new_bb;
2111 tree vect_ptr_init;
2112 tree vectype_size;
2113 tree ptr_update;
2114 tree data_ref_ptr;
2115 tree type, tmp, size;
2116
2117 base_name = unshare_expr (DR_BASE_NAME (dr));
2118 if (vect_debug_details (NULL))
2119 {
2120 tree data_ref_base = base_name;
2121 fprintf (dump_file, "create array_ref of type: ");
2122 print_generic_expr (dump_file, vectype, TDF_SLIM);
2123 if (TREE_CODE (data_ref_base) == VAR_DECL)
2124 fprintf (dump_file, "\nvectorizing a one dimensional array ref: ");
2125 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2126 fprintf (dump_file, "\nvectorizing a multidimensional array ref: ");
2127 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2128 fprintf (dump_file, "\nvectorizing a record based array ref: ");
2129 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2130 fprintf (dump_file, "\nvectorizing a pointer ref: ");
2131 print_generic_expr (dump_file, base_name, TDF_SLIM);
2132 }
2133
2134 /** (1) Create the new vector-pointer variable: **/
2135
2136 vect_ptr_type = build_pointer_type (vectype);
2137 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2138 get_name (base_name));
2139 add_referenced_tmp_var (vect_ptr);
2140
2141
2142 /** (2) Handle aliasing information of the new vector-pointer: **/
2143
2144 tag = STMT_VINFO_MEMTAG (stmt_info);
2145 gcc_assert (tag);
2146 get_var_ann (vect_ptr)->type_mem_tag = tag;
2147
2148 /* Mark for renaming all aliased variables
2149 (i.e, the may-aliases of the type-mem-tag). */
2150 nvuses = NUM_VUSES (vuses);
2151 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
2152 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
2153 for (i = 0; i < nvuses; i++)
2154 {
2155 tree use = VUSE_OP (vuses, i);
2156 if (TREE_CODE (use) == SSA_NAME)
2157 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
2158 }
2159 for (i = 0; i < nv_may_defs; i++)
2160 {
2161 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
2162 if (TREE_CODE (def) == SSA_NAME)
2163 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2164 }
2165 for (i = 0; i < nv_must_defs; i++)
2166 {
2167 tree def = V_MUST_DEF_RESULT (v_must_defs, i);
2168 if (TREE_CODE (def) == SSA_NAME)
2169 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
2170 }
2171
2172
2173 /** (3) Calculate the initial address the vector-pointer, and set
2174 the vector-pointer to point to it before the loop: **/
2175
2176 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2177 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2178 offset);
2179 pe = loop_preheader_edge (loop);
2180 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
2181 gcc_assert (!new_bb);
2182 *initial_address = new_temp;
2183
2184 /* Create: p = (vectype *) initial_base */
2185 vec_stmt = fold_convert (vect_ptr_type, new_temp);
2186 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2187 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2188 TREE_OPERAND (vec_stmt, 0) = new_temp;
2189 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
2190 gcc_assert (!new_bb);
2191 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
2192
2193
2194 /** (4) Handle the updating of the vector-pointer inside the loop: **/
2195
2196 if (only_init) /* No update in loop is required. */
2197 return vect_ptr_init;
2198
2199 idx = vect_create_index_for_vector_ref (loop, bsi);
2200
2201 /* Create: update = idx * vectype_size */
2202 tmp = create_tmp_var (integer_type_node, "update");
2203 add_referenced_tmp_var (tmp);
2204 size = TYPE_SIZE (vect_ptr_type);
2205 type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
2206 ptr_update = create_tmp_var (type, "update");
2207 add_referenced_tmp_var (ptr_update);
2208 vectype_size = TYPE_SIZE_UNIT (vectype);
2209 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
2210 vec_stmt = build2 (MODIFY_EXPR, void_type_node, tmp, vec_stmt);
2211 new_temp = make_ssa_name (tmp, vec_stmt);
2212 TREE_OPERAND (vec_stmt, 0) = new_temp;
2213 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2214 vec_stmt = fold_convert (type, new_temp);
2215 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
2216 new_temp = make_ssa_name (ptr_update, vec_stmt);
2217 TREE_OPERAND (vec_stmt, 0) = new_temp;
2218 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2219
2220 /* Create: data_ref_ptr = vect_ptr_init + update */
2221 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
2222 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
2223 new_temp = make_ssa_name (vect_ptr, vec_stmt);
2224 TREE_OPERAND (vec_stmt, 0) = new_temp;
2225 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2226 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
2227
2228 return data_ref_ptr;
2229 }
2230
2231
2232 /* Function vect_create_destination_var.
2233
2234 Create a new temporary of type VECTYPE. */
2235
2236 static tree
2237 vect_create_destination_var (tree scalar_dest, tree vectype)
2238 {
2239 tree vec_dest;
2240 const char *new_name;
2241
2242 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2243
2244 new_name = get_name (scalar_dest);
2245 if (!new_name)
2246 new_name = "var_";
2247 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
2248 add_referenced_tmp_var (vec_dest);
2249
2250 return vec_dest;
2251 }
2252
2253
2254 /* Function vect_init_vector.
2255
2256 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
2257 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
2258 used in the vectorization of STMT. */
2259
2260 static tree
2261 vect_init_vector (tree stmt, tree vector_var)
2262 {
2263 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2264 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2265 tree new_var;
2266 tree init_stmt;
2267 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2268 tree vec_oprnd;
2269 edge pe;
2270 tree new_temp;
2271 basic_block new_bb;
2272
2273 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
2274 add_referenced_tmp_var (new_var);
2275
2276 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
2277 new_temp = make_ssa_name (new_var, init_stmt);
2278 TREE_OPERAND (init_stmt, 0) = new_temp;
2279
2280 pe = loop_preheader_edge (loop);
2281 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
2282 gcc_assert (!new_bb);
2283
2284 if (vect_debug_details (NULL))
2285 {
2286 fprintf (dump_file, "created new init_stmt: ");
2287 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
2288 }
2289
2290 vec_oprnd = TREE_OPERAND (init_stmt, 0);
2291 return vec_oprnd;
2292 }
2293
2294
2295 /* Function vect_get_vec_def_for_operand.
2296
2297 OP is an operand in STMT. This function returns a (vector) def that will be
2298 used in the vectorized stmt for STMT.
2299
2300 In the case that OP is an SSA_NAME which is defined in the loop, then
2301 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
2302
2303 In case OP is an invariant or constant, a new stmt that creates a vector def
2304 needs to be introduced. */
2305
2306 static tree
2307 vect_get_vec_def_for_operand (tree op, tree stmt)
2308 {
2309 tree vec_oprnd;
2310 tree vec_stmt;
2311 tree def_stmt;
2312 stmt_vec_info def_stmt_info = NULL;
2313 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2314 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2315 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2316 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
2317 basic_block bb;
2318 tree vec_inv;
2319 tree t = NULL_TREE;
2320 tree def;
2321 int i;
2322
2323 if (vect_debug_details (NULL))
2324 {
2325 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
2326 print_generic_expr (dump_file, op, TDF_SLIM);
2327 }
2328
2329 /** ===> Case 1: operand is a constant. **/
2330
2331 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
2332 {
2333 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
2334
2335 tree vec_cst;
2336
2337 /* Build a tree with vector elements. */
2338 if (vect_debug_details (NULL))
2339 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
2340
2341 for (i = nunits - 1; i >= 0; --i)
2342 {
2343 t = tree_cons (NULL_TREE, op, t);
2344 }
2345 vec_cst = build_vector (vectype, t);
2346 return vect_init_vector (stmt, vec_cst);
2347 }
2348
2349 gcc_assert (TREE_CODE (op) == SSA_NAME);
2350
2351 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
2352
2353 def_stmt = SSA_NAME_DEF_STMT (op);
2354 def_stmt_info = vinfo_for_stmt (def_stmt);
2355
2356 if (vect_debug_details (NULL))
2357 {
2358 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
2359 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2360 }
2361
2362
2363 /** ==> Case 2.1: operand is defined inside the loop. **/
2364
2365 if (def_stmt_info)
2366 {
2367 /* Get the def from the vectorized stmt. */
2368
2369 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
2370 gcc_assert (vec_stmt);
2371 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
2372 return vec_oprnd;
2373 }
2374
2375
2376 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
2377 it is a reduction/induction. **/
2378
2379 bb = bb_for_stmt (def_stmt);
2380 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2381 {
2382 if (vect_debug_details (NULL))
2383 fprintf (dump_file, "reduction/induction - unsupported.");
2384 internal_error ("no support for reduction/induction"); /* FORNOW */
2385 }
2386
2387
2388 /** ==> Case 2.3: operand is defined outside the loop -
2389 it is a loop invariant. */
2390
2391 switch (TREE_CODE (def_stmt))
2392 {
2393 case PHI_NODE:
2394 def = PHI_RESULT (def_stmt);
2395 break;
2396 case MODIFY_EXPR:
2397 def = TREE_OPERAND (def_stmt, 0);
2398 break;
2399 case NOP_EXPR:
2400 def = TREE_OPERAND (def_stmt, 0);
2401 gcc_assert (IS_EMPTY_STMT (def_stmt));
2402 def = op;
2403 break;
2404 default:
2405 if (vect_debug_details (NULL))
2406 {
2407 fprintf (dump_file, "unsupported defining stmt: ");
2408 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
2409 }
2410 internal_error ("unsupported defining stmt");
2411 }
2412
2413 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
2414
2415 if (vect_debug_details (NULL))
2416 fprintf (dump_file, "Create vector_inv.");
2417
2418 for (i = nunits - 1; i >= 0; --i)
2419 {
2420 t = tree_cons (NULL_TREE, def, t);
2421 }
2422
2423 vec_inv = build_constructor (vectype, t);
2424 return vect_init_vector (stmt, vec_inv);
2425 }
2426
2427
2428 /* Function vect_finish_stmt_generation.
2429
2430 Insert a new stmt. */
2431
2432 static void
2433 vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
2434 {
2435 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
2436
2437 if (vect_debug_details (NULL))
2438 {
2439 fprintf (dump_file, "add new stmt: ");
2440 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
2441 }
2442
2443 /* Make sure bsi points to the stmt that is being vectorized. */
2444
2445 /* Assumption: any stmts created for the vectorization of stmt S were
2446 inserted before S. BSI is expected to point to S or some new stmt before S.
2447 */
2448
2449 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
2450 bsi_next (bsi);
2451 gcc_assert (stmt == bsi_stmt (*bsi));
2452 }
2453
2454
2455 /* Function vectorizable_assignment.
2456
2457 Check if STMT performs an assignment (copy) that can be vectorized.
2458 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2459 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2460 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2461
2462 static bool
2463 vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2464 {
2465 tree vec_dest;
2466 tree scalar_dest;
2467 tree op;
2468 tree vec_oprnd;
2469 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2470 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2471 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2472 tree new_temp;
2473
2474 /* Is vectorizable assignment? */
2475
2476 if (TREE_CODE (stmt) != MODIFY_EXPR)
2477 return false;
2478
2479 scalar_dest = TREE_OPERAND (stmt, 0);
2480 if (TREE_CODE (scalar_dest) != SSA_NAME)
2481 return false;
2482
2483 op = TREE_OPERAND (stmt, 1);
2484 if (!vect_is_simple_use (op, loop, NULL))
2485 {
2486 if (vect_debug_details (NULL))
2487 fprintf (dump_file, "use not simple.");
2488 return false;
2489 }
2490
2491 if (!vec_stmt) /* transformation not required. */
2492 {
2493 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
2494 return true;
2495 }
2496
2497 /** Trasform. **/
2498 if (vect_debug_details (NULL))
2499 fprintf (dump_file, "transform assignment.");
2500
2501 /* Handle def. */
2502 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2503
2504 /* Handle use. */
2505 op = TREE_OPERAND (stmt, 1);
2506 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
2507
2508 /* Arguments are ready. create the new vector stmt. */
2509 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
2510 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2511 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2512 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2513
2514 return true;
2515 }
2516
2517
2518 /* Function vectorizable_operation.
2519
2520 Check if STMT performs a binary or unary operation that can be vectorized.
2521 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2522 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2523 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2524
2525 static bool
2526 vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2527 {
2528 tree vec_dest;
2529 tree scalar_dest;
2530 tree operation;
2531 tree op0, op1 = NULL;
2532 tree vec_oprnd0, vec_oprnd1=NULL;
2533 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2534 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2535 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2536 int i;
2537 enum tree_code code;
2538 enum machine_mode vec_mode;
2539 tree new_temp;
2540 int op_type;
2541 tree op;
2542 optab optab;
2543
2544 /* Is STMT a vectorizable binary/unary operation? */
2545 if (TREE_CODE (stmt) != MODIFY_EXPR)
2546 return false;
2547
2548 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
2549 return false;
2550
2551 operation = TREE_OPERAND (stmt, 1);
2552 code = TREE_CODE (operation);
2553 optab = optab_for_tree_code (code, vectype);
2554
2555 /* Support only unary or binary operations. */
2556 op_type = TREE_CODE_LENGTH (code);
2557 if (op_type != unary_op && op_type != binary_op)
2558 {
2559 if (vect_debug_details (NULL))
2560 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
2561 return false;
2562 }
2563
2564 for (i = 0; i < op_type; i++)
2565 {
2566 op = TREE_OPERAND (operation, i);
2567 if (!vect_is_simple_use (op, loop, NULL))
2568 {
2569 if (vect_debug_details (NULL))
2570 fprintf (dump_file, "use not simple.");
2571 return false;
2572 }
2573 }
2574
2575 /* Supportable by target? */
2576 if (!optab)
2577 {
2578 if (vect_debug_details (NULL))
2579 fprintf (dump_file, "no optab.");
2580 return false;
2581 }
2582 vec_mode = TYPE_MODE (vectype);
2583 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
2584 {
2585 if (vect_debug_details (NULL))
2586 fprintf (dump_file, "op not supported by target.");
2587 return false;
2588 }
2589
2590 if (!vec_stmt) /* transformation not required. */
2591 {
2592 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
2593 return true;
2594 }
2595
2596 /** Transform. **/
2597
2598 if (vect_debug_details (NULL))
2599 fprintf (dump_file, "transform binary/unary operation.");
2600
2601 /* Handle def. */
2602 scalar_dest = TREE_OPERAND (stmt, 0);
2603 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2604
2605 /* Handle uses. */
2606 op0 = TREE_OPERAND (operation, 0);
2607 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
2608
2609 if (op_type == binary_op)
2610 {
2611 op1 = TREE_OPERAND (operation, 1);
2612 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
2613 }
2614
2615 /* Arguments are ready. create the new vector stmt. */
2616
2617 if (op_type == binary_op)
2618 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2619 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
2620 else
2621 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
2622 build1 (code, vectype, vec_oprnd0));
2623 new_temp = make_ssa_name (vec_dest, *vec_stmt);
2624 TREE_OPERAND (*vec_stmt, 0) = new_temp;
2625 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2626
2627 return true;
2628 }
2629
2630
2631 /* Function vectorizable_store.
2632
2633 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
2634 can be vectorized.
2635 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2636 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2637 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2638
2639 static bool
2640 vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2641 {
2642 tree scalar_dest;
2643 tree data_ref;
2644 tree op;
2645 tree vec_oprnd1;
2646 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2647 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2648 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2649 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2650 enum machine_mode vec_mode;
2651 tree dummy;
2652 enum dr_alignment_support alignment_support_cheme;
2653
2654 /* Is vectorizable store? */
2655
2656 if (TREE_CODE (stmt) != MODIFY_EXPR)
2657 return false;
2658
2659 scalar_dest = TREE_OPERAND (stmt, 0);
2660 if (TREE_CODE (scalar_dest) != ARRAY_REF
2661 && TREE_CODE (scalar_dest) != INDIRECT_REF)
2662 return false;
2663
2664 op = TREE_OPERAND (stmt, 1);
2665 if (!vect_is_simple_use (op, loop, NULL))
2666 {
2667 if (vect_debug_details (NULL))
2668 fprintf (dump_file, "use not simple.");
2669 return false;
2670 }
2671
2672 vec_mode = TYPE_MODE (vectype);
2673 /* FORNOW. In some cases can vectorize even if data-type not supported
2674 (e.g. - array initialization with 0). */
2675 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
2676 return false;
2677
2678 if (!STMT_VINFO_DATA_REF (stmt_info))
2679 return false;
2680
2681
2682 if (!vec_stmt) /* transformation not required. */
2683 {
2684 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
2685 return true;
2686 }
2687
2688 /** Trasform. **/
2689
2690 if (vect_debug_details (NULL))
2691 fprintf (dump_file, "transform store");
2692
2693 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2694 gcc_assert (alignment_support_cheme);
2695 gcc_assert (alignment_support_cheme = dr_aligned); /* FORNOW */
2696
2697 /* Handle use - get the vectorized def from the defining stmt. */
2698 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
2699
2700 /* Handle def. */
2701 /* FORNOW: make sure the data reference is aligned. */
2702 vect_align_data_ref (stmt);
2703 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2704 data_ref = build_fold_indirect_ref (data_ref);
2705
2706 /* Arguments are ready. create the new vector stmt. */
2707 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
2708 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
2709
2710 return true;
2711 }
2712
2713
2714 /* vectorizable_load.
2715
2716 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
2717 can be vectorized.
2718 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
2719 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
2720 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
2721
2722 static bool
2723 vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
2724 {
2725 tree scalar_dest;
2726 tree vec_dest = NULL;
2727 tree data_ref = NULL;
2728 tree op;
2729 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2730 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2731 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2732 tree new_temp;
2733 int mode;
2734 tree init_addr;
2735 tree new_stmt;
2736 tree dummy;
2737 basic_block new_bb;
2738 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
2739 edge pe = loop_preheader_edge (loop);
2740 enum dr_alignment_support alignment_support_cheme;
2741
2742 /* Is vectorizable load? */
2743
2744 if (TREE_CODE (stmt) != MODIFY_EXPR)
2745 return false;
2746
2747 scalar_dest = TREE_OPERAND (stmt, 0);
2748 if (TREE_CODE (scalar_dest) != SSA_NAME)
2749 return false;
2750
2751 op = TREE_OPERAND (stmt, 1);
2752 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
2753 return false;
2754
2755 if (!STMT_VINFO_DATA_REF (stmt_info))
2756 return false;
2757
2758 mode = (int) TYPE_MODE (vectype);
2759
2760 /* FORNOW. In some cases can vectorize even if data-type not supported
2761 (e.g. - data copies). */
2762 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
2763 {
2764 if (vect_debug_details (loop))
2765 fprintf (dump_file, "Aligned load, but unsupported type.");
2766 return false;
2767 }
2768
2769 if (!vec_stmt) /* transformation not required. */
2770 {
2771 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
2772 return true;
2773 }
2774
2775 /** Trasform. **/
2776
2777 if (vect_debug_details (NULL))
2778 fprintf (dump_file, "transform load.");
2779
2780 alignment_support_cheme = vect_supportable_dr_alignment (dr);
2781 gcc_assert (alignment_support_cheme);
2782
2783 if (alignment_support_cheme == dr_aligned
2784 || alignment_support_cheme == dr_unaligned_supported)
2785 {
2786 /* Create:
2787 p = initial_addr;
2788 indx = 0;
2789 loop {
2790 vec_dest = *(p);
2791 indx = indx + 1;
2792 }
2793 */
2794
2795 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2796 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
2797 if (aligned_access_p (dr))
2798 data_ref = build_fold_indirect_ref (data_ref);
2799 else
2800 {
2801 int mis = DR_MISALIGNMENT (dr);
2802 tree tmis = (mis == -1 ? size_zero_node : size_int (mis));
2803 tmis = size_binop (MULT_EXPR, tmis, size_int(BITS_PER_UNIT));
2804 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
2805 }
2806 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2807 new_temp = make_ssa_name (vec_dest, new_stmt);
2808 TREE_OPERAND (new_stmt, 0) = new_temp;
2809 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2810 }
2811 else if (alignment_support_cheme == dr_unaligned_software_pipeline)
2812 {
2813 /* Create:
2814 p1 = initial_addr;
2815 msq_init = *(floor(p1))
2816 p2 = initial_addr + VS - 1;
2817 magic = have_builtin ? builtin_result : initial_address;
2818 indx = 0;
2819 loop {
2820 p2' = p2 + indx * vectype_size
2821 lsq = *(floor(p2'))
2822 vec_dest = realign_load (msq, lsq, magic)
2823 indx = indx + 1;
2824 msq = lsq;
2825 }
2826 */
2827
2828 tree offset;
2829 tree magic;
2830 tree phi_stmt;
2831 tree msq_init;
2832 tree msq, lsq;
2833 tree dataref_ptr;
2834 tree params;
2835
2836 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
2837 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2838 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
2839 &init_addr, true);
2840 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
2841 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2842 new_temp = make_ssa_name (vec_dest, new_stmt);
2843 TREE_OPERAND (new_stmt, 0) = new_temp;
2844 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2845 gcc_assert (!new_bb);
2846 msq_init = TREE_OPERAND (new_stmt, 0);
2847
2848
2849 /* <2> Create lsq = *(floor(p2')) in the loop */
2850 offset = build_int_cst (integer_type_node,
2851 GET_MODE_NUNITS (TYPE_MODE (vectype)));
2852 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
2853 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2854 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
2855 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
2856 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
2857 new_temp = make_ssa_name (vec_dest, new_stmt);
2858 TREE_OPERAND (new_stmt, 0) = new_temp;
2859 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2860 lsq = TREE_OPERAND (new_stmt, 0);
2861
2862
2863 /* <3> */
2864 if (targetm.vectorize.builtin_mask_for_load)
2865 {
2866 /* Create permutation mask, if required, in loop preheader. */
2867 tree builtin_decl;
2868 params = build_tree_list (NULL_TREE, init_addr);
2869 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2870 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2871 new_stmt = build_function_call_expr (builtin_decl, params);
2872 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2873 new_temp = make_ssa_name (vec_dest, new_stmt);
2874 TREE_OPERAND (new_stmt, 0) = new_temp;
2875 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
2876 gcc_assert (!new_bb);
2877 magic = TREE_OPERAND (new_stmt, 0);
2878
2879 /* Since we have just created a CALL_EXPR, we may need to
2880 rename call-clobbered variables. */
2881 mark_call_clobbered_vars_to_rename ();
2882 }
2883 else
2884 {
2885 /* Use current address instead of init_addr for reduced reg pressure.
2886 */
2887 magic = dataref_ptr;
2888 }
2889
2890
2891 /* <4> Create msq = phi <msq_init, lsq> in loop */
2892 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2893 msq = make_ssa_name (vec_dest, NULL_TREE);
2894 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
2895 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2896 add_phi_arg (phi_stmt, msq_init, loop_preheader_edge (loop));
2897 add_phi_arg (phi_stmt, lsq, loop_latch_edge (loop));
2898
2899
2900 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
2901 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2902 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
2903 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
2904 new_temp = make_ssa_name (vec_dest, new_stmt);
2905 TREE_OPERAND (new_stmt, 0) = new_temp;
2906 vect_finish_stmt_generation (stmt, new_stmt, bsi);
2907 }
2908 else
2909 gcc_unreachable ();
2910
2911 *vec_stmt = new_stmt;
2912 return true;
2913 }
2914
2915
2916 /* Function vect_supportable_dr_alignment
2917
2918 Return whether the data reference DR is supported with respect to its
2919 alignment. */
2920
2921 static enum dr_alignment_support
2922 vect_supportable_dr_alignment (struct data_reference *dr)
2923 {
2924 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)));
2925 enum machine_mode mode = (int) TYPE_MODE (vectype);
2926
2927 if (aligned_access_p (dr))
2928 return dr_aligned;
2929
2930 /* Possibly unaligned access. */
2931
2932 if (DR_IS_READ (dr))
2933 {
2934 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
2935 && (!targetm.vectorize.builtin_mask_for_load
2936 || targetm.vectorize.builtin_mask_for_load ()))
2937 return dr_unaligned_software_pipeline;
2938
2939 if (movmisalign_optab->handlers[mode].insn_code != CODE_FOR_nothing)
2940 /* Can't software pipeline the loads, but can at least do them. */
2941 return dr_unaligned_supported;
2942 }
2943
2944 /* Unsupported. */
2945 return dr_unaligned_unsupported;
2946 }
2947
2948
2949 /* Function vect_transform_stmt.
2950
2951 Create a vectorized stmt to replace STMT, and insert it at BSI. */
2952
2953 static bool
2954 vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
2955 {
2956 bool is_store = false;
2957 tree vec_stmt = NULL_TREE;
2958 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2959 bool done;
2960
2961 switch (STMT_VINFO_TYPE (stmt_info))
2962 {
2963 case op_vec_info_type:
2964 done = vectorizable_operation (stmt, bsi, &vec_stmt);
2965 gcc_assert (done);
2966 break;
2967
2968 case assignment_vec_info_type:
2969 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
2970 gcc_assert (done);
2971 break;
2972
2973 case load_vec_info_type:
2974 done = vectorizable_load (stmt, bsi, &vec_stmt);
2975 gcc_assert (done);
2976 break;
2977
2978 case store_vec_info_type:
2979 done = vectorizable_store (stmt, bsi, &vec_stmt);
2980 gcc_assert (done);
2981 is_store = true;
2982 break;
2983 default:
2984 if (vect_debug_details (NULL))
2985 fprintf (dump_file, "stmt not supported.");
2986 gcc_unreachable ();
2987 }
2988
2989 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
2990
2991 return is_store;
2992 }
2993
2994
2995 /* This function builds ni_name = number of iterations loop executes
2996 on the loop preheader. */
2997
2998 static tree
2999 vect_build_loop_niters (loop_vec_info loop_vinfo)
3000 {
3001 tree ni_name, stmt, var;
3002 edge pe;
3003 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3004 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
3005
3006 var = create_tmp_var (TREE_TYPE (ni), "niters");
3007 add_referenced_tmp_var (var);
3008 ni_name = force_gimple_operand (ni, &stmt, false, var);
3009
3010 pe = loop_preheader_edge (loop);
3011 if (stmt)
3012 {
3013 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3014 gcc_assert (!new_bb);
3015 }
3016
3017 return ni_name;
3018 }
3019
3020
3021 /* This function generates the following statements:
3022
3023 ni_name = number of iterations loop executes
3024 ratio = ni_name / vf
3025 ratio_mult_vf_name = ratio * vf
3026
3027 and places them at the loop preheader edge. */
3028
3029 static void
3030 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo,
3031 tree *ni_name_ptr,
3032 tree *ratio_mult_vf_name_ptr,
3033 tree *ratio_name_ptr)
3034 {
3035
3036 edge pe;
3037 basic_block new_bb;
3038 tree stmt, ni_name;
3039 tree var;
3040 tree ratio_name;
3041 tree ratio_mult_vf_name;
3042 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3043 tree ni = LOOP_VINFO_NITERS (loop_vinfo);
3044 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3045 tree log_vf = build_int_cst (unsigned_type_node, exact_log2 (vf));
3046
3047 pe = loop_preheader_edge (loop);
3048
3049 /* Generate temporary variable that contains
3050 number of iterations loop executes. */
3051
3052 ni_name = vect_build_loop_niters (loop_vinfo);
3053
3054 /* Create: ratio = ni >> log2(vf) */
3055
3056 var = create_tmp_var (TREE_TYPE (ni), "bnd");
3057 add_referenced_tmp_var (var);
3058 ratio_name = make_ssa_name (var, NULL_TREE);
3059 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_name,
3060 build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf));
3061 SSA_NAME_DEF_STMT (ratio_name) = stmt;
3062
3063 pe = loop_preheader_edge (loop);
3064 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3065 gcc_assert (!new_bb);
3066
3067 /* Create: ratio_mult_vf = ratio << log2 (vf). */
3068
3069 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf");
3070 add_referenced_tmp_var (var);
3071 ratio_mult_vf_name = make_ssa_name (var, NULL_TREE);
3072 stmt = build2 (MODIFY_EXPR, void_type_node, ratio_mult_vf_name,
3073 build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), ratio_name, log_vf));
3074 SSA_NAME_DEF_STMT (ratio_mult_vf_name) = stmt;
3075
3076 pe = loop_preheader_edge (loop);
3077 new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3078 gcc_assert (!new_bb);
3079
3080 *ni_name_ptr = ni_name;
3081 *ratio_mult_vf_name_ptr = ratio_mult_vf_name;
3082 *ratio_name_ptr = ratio_name;
3083
3084 return;
3085 }
3086
3087
3088 /* Function vect_update_ivs_after_vectorizer.
3089
3090 "Advance" the induction variables of LOOP to the value they should take
3091 after the execution of LOOP. This is currently necessary because the
3092 vectorizer does not handle induction variables that are used after the
3093 loop. Such a situation occurs when the last iterations of LOOP are
3094 peeled, because:
3095 1. We introduced new uses after LOOP for IVs that were not originally used
3096 after LOOP: the IVs of LOOP are now used by an epilog loop.
3097 2. LOOP is going to be vectorized; this means that it will iterate N/VF
3098 times, whereas the loop IVs should be bumped N times.
3099
3100 Input:
3101 - LOOP - a loop that is going to be vectorized. The last few iterations
3102 of LOOP were peeled.
3103 - NITERS - the number of iterations that LOOP executes (before it is
3104 vectorized). i.e, the number of times the ivs should be bumped.
3105 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
3106 coming out from LOOP on which there are uses of the LOOP ivs
3107 (this is the path from LOOP->exit to epilog_loop->preheader).
3108
3109 The new definitions of the ivs are placed in LOOP->exit.
3110 The phi args associated with the edge UPDATE_E in the bb
3111 UPDATE_E->dest are updated accordingly.
3112
3113 Assumption 1: Like the rest of the vectorizer, this function assumes
3114 a single loop exit that has a single predecessor.
3115
3116 Assumption 2: The phi nodes in the LOOP header and in update_bb are
3117 organized in the same order.
3118
3119 Assumption 3: The access function of the ivs is simple enough (see
3120 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
3121
3122 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
3123 coming out of LOOP on which the ivs of LOOP are used (this is the path
3124 that leads to the epilog loop; other paths skip the epilog loop). This
3125 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
3126 needs to have its phis updated.
3127 */
3128
3129 static void
3130 vect_update_ivs_after_vectorizer (struct loop *loop, tree niters, edge update_e)
3131 {
3132 basic_block exit_bb = loop->exit_edges[0]->dest;
3133 tree phi, phi1;
3134 basic_block update_bb = update_e->dest;
3135
3136 /* gcc_assert (vect_can_advance_ivs_p (loop)); */
3137
3138 /* Make sure there exists a single-predecessor exit bb: */
3139 gcc_assert (EDGE_COUNT (exit_bb->preds) == 1);
3140
3141 for (phi = phi_nodes (loop->header), phi1 = phi_nodes (update_bb);
3142 phi && phi1;
3143 phi = PHI_CHAIN (phi), phi1 = PHI_CHAIN (phi1))
3144 {
3145 tree access_fn = NULL;
3146 tree evolution_part;
3147 tree init_expr;
3148 tree step_expr;
3149 tree var, stmt, ni, ni_name;
3150 block_stmt_iterator last_bsi;
3151
3152 /* Skip virtual phi's. */
3153 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3154 {
3155 if (vect_debug_details (NULL))
3156 fprintf (dump_file, "virtual phi. skip.");
3157 continue;
3158 }
3159
3160 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi));
3161 gcc_assert (access_fn);
3162 evolution_part =
3163 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num));
3164 gcc_assert (evolution_part != NULL_TREE);
3165
3166 /* FORNOW: We do not support IVs whose evolution function is a polynomial
3167 of degree >= 2 or exponential. */
3168 gcc_assert (!tree_is_chrec (evolution_part));
3169
3170 step_expr = evolution_part;
3171 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn,
3172 loop->num));
3173
3174 ni = build2 (PLUS_EXPR, TREE_TYPE (init_expr),
3175 build2 (MULT_EXPR, TREE_TYPE (niters),
3176 niters, step_expr), init_expr);
3177
3178 var = create_tmp_var (TREE_TYPE (init_expr), "tmp");
3179 add_referenced_tmp_var (var);
3180
3181 ni_name = force_gimple_operand (ni, &stmt, false, var);
3182
3183 /* Insert stmt into exit_bb. */
3184 last_bsi = bsi_last (exit_bb);
3185 if (stmt)
3186 bsi_insert_before (&last_bsi, stmt, BSI_SAME_STMT);
3187
3188 /* Fix phi expressions in the successor bb. */
3189 gcc_assert (PHI_ARG_DEF_FROM_EDGE (phi1, update_e) ==
3190 PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0)));
3191 SET_PHI_ARG_DEF (phi1, phi_arg_from_edge (phi1, update_e), ni_name);
3192 }
3193 }
3194
3195
3196 /* Function vect_do_peeling_for_loop_bound
3197
3198 Peel the last iterations of the loop represented by LOOP_VINFO.
3199 The peeled iterations form a new epilog loop. Given that the loop now
3200 iterates NITERS times, the new epilog loop iterates
3201 NITERS % VECTORIZATION_FACTOR times.
3202
3203 The original loop will later be made to iterate
3204 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). */
3205
3206 static void
3207 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
3208 struct loops *loops)
3209 {
3210
3211 tree ni_name, ratio_mult_vf_name;
3212 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3213 struct loop *new_loop;
3214 edge update_e;
3215 #ifdef ENABLE_CHECKING
3216 int loop_num;
3217 #endif
3218
3219 if (vect_debug_details (NULL))
3220 fprintf (dump_file, "\n<<vect_transtorm_for_unknown_loop_bound>>\n");
3221
3222 /* Generate the following variables on the preheader of original loop:
3223
3224 ni_name = number of iteration the original loop executes
3225 ratio = ni_name / vf
3226 ratio_mult_vf_name = ratio * vf */
3227 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name,
3228 &ratio_mult_vf_name, ratio);
3229
3230 /* Update loop info. */
3231 loop->pre_header = loop_preheader_edge (loop)->src;
3232 loop->pre_header_edges[0] = loop_preheader_edge (loop);
3233
3234 #ifdef ENABLE_CHECKING
3235 loop_num = loop->num;
3236 #endif
3237 new_loop = slpeel_tree_peel_loop_to_edge (loop, loops, loop->exit_edges[0],
3238 ratio_mult_vf_name, ni_name, false);
3239 #ifdef ENABLE_CHECKING
3240 gcc_assert (new_loop);
3241 gcc_assert (loop_num == loop->num);
3242 slpeel_verify_cfg_after_peeling (loop, new_loop);
3243 #endif
3244
3245 /* A guard that controls whether the new_loop is to be executed or skipped
3246 is placed in LOOP->exit. LOOP->exit therefore has two successors - one
3247 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other
3248 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that
3249 is on the path where the LOOP IVs are used and need to be updated. */
3250
3251 if (EDGE_PRED (new_loop->pre_header, 0)->src == loop->exit_edges[0]->dest)
3252 update_e = EDGE_PRED (new_loop->pre_header, 0);
3253 else
3254 update_e = EDGE_PRED (new_loop->pre_header, 1);
3255
3256 /* Update IVs of original loop as if they were advanced
3257 by ratio_mult_vf_name steps. */
3258 vect_update_ivs_after_vectorizer (loop, ratio_mult_vf_name, update_e);
3259
3260 /* After peeling we have to reset scalar evolution analyzer. */
3261 scev_reset ();
3262
3263 return;
3264 }
3265
3266
3267 /* Function vect_gen_niters_for_prolog_loop
3268
3269 Set the number of iterations for the loop represented by LOOP_VINFO
3270 to the minimum between LOOP_NITERS (the original iteration count of the loop)
3271 and the misalignment of DR - the first data reference recorded in
3272 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of
3273 this loop, the data reference DR will refer to an aligned location.
3274
3275 The following computation is generated:
3276
3277 compute address misalignment in bytes:
3278 addr_mis = addr & (vectype_size - 1)
3279
3280 prolog_niters = min ( LOOP_NITERS , (VF - addr_mis/elem_size)&(VF-1) )
3281
3282 (elem_size = element type size; an element is the scalar element
3283 whose type is the inner type of the vectype) */
3284
3285 static tree
3286 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
3287 {
3288 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
3289 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3290 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3291 tree var, stmt;
3292 tree iters, iters_name;
3293 edge pe;
3294 basic_block new_bb;
3295 tree dr_stmt = DR_STMT (dr);
3296 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt);
3297 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3298 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT;
3299 tree elem_misalign;
3300 tree byte_misalign;
3301 tree new_stmts = NULL_TREE;
3302 tree start_addr =
3303 vect_create_addr_base_for_vector_ref (dr_stmt, &new_stmts, NULL_TREE);
3304 tree ptr_type = TREE_TYPE (start_addr);
3305 tree size = TYPE_SIZE (ptr_type);
3306 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1);
3307 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1);
3308 tree vf_minus_1 = build_int_cst (unsigned_type_node, vf - 1);
3309 tree niters_type = TREE_TYPE (loop_niters);
3310 tree elem_size_log =
3311 build_int_cst (unsigned_type_node, exact_log2 (vectype_align/vf));
3312 tree vf_tree = build_int_cst (unsigned_type_node, vf);
3313
3314 pe = loop_preheader_edge (loop);
3315 new_bb = bsi_insert_on_edge_immediate (pe, new_stmts);
3316 gcc_assert (!new_bb);
3317
3318 /* Create: byte_misalign = addr & (vectype_size - 1) */
3319 byte_misalign = build2 (BIT_AND_EXPR, type, start_addr, vectype_size_minus_1);
3320
3321 /* Create: elem_misalign = byte_misalign / element_size */
3322 elem_misalign =
3323 build2 (RSHIFT_EXPR, unsigned_type_node, byte_misalign, elem_size_log);
3324
3325 /* Create: (niters_type) (VF - elem_misalign)&(VF - 1) */
3326 iters = build2 (MINUS_EXPR, unsigned_type_node, vf_tree, elem_misalign);
3327 iters = build2 (BIT_AND_EXPR, unsigned_type_node, iters, vf_minus_1);
3328 iters = fold_convert (niters_type, iters);
3329
3330 /* Create: prolog_loop_niters = min (iters, loop_niters) */
3331 /* If the loop bound is known at compile time we already verified that it is
3332 greater than vf; since the misalignment ('iters') is at most vf, there's
3333 no need to generate the MIN_EXPR in this case. */
3334 if (TREE_CODE (loop_niters) != INTEGER_CST)
3335 iters = build2 (MIN_EXPR, niters_type, iters, loop_niters);
3336
3337 var = create_tmp_var (niters_type, "prolog_loop_niters");
3338 add_referenced_tmp_var (var);
3339 iters_name = force_gimple_operand (iters, &stmt, false, var);
3340
3341 /* Insert stmt on loop preheader edge. */
3342 pe = loop_preheader_edge (loop);
3343 if (stmt)
3344 {
3345 basic_block new_bb = bsi_insert_on_edge_immediate (pe, stmt);
3346 gcc_assert (!new_bb);
3347 }
3348
3349 return iters_name;
3350 }
3351
3352
3353 /* Function vect_update_inits_of_dr
3354
3355 NITERS iterations were peeled from LOOP. DR represents a data reference
3356 in LOOP. This function updates the information recorded in DR to
3357 account for the fact that the first NITERS iterations had already been
3358 executed. Specifically, it updates the OFFSET field of stmt_info. */
3359
3360 static void
3361 vect_update_inits_of_dr (struct data_reference *dr, tree niters)
3362 {
3363 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
3364 tree offset = STMT_VINFO_VECT_INIT_OFFSET (stmt_info);
3365
3366 niters = fold (build2 (MULT_EXPR, TREE_TYPE (niters), niters,
3367 STMT_VINFO_VECT_STEP (stmt_info)));
3368 offset = fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset, niters));
3369 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
3370 }
3371
3372
3373 /* Function vect_update_inits_of_drs
3374
3375 NITERS iterations were peeled from the loop represented by LOOP_VINFO.
3376 This function updates the information recorded for the data references in
3377 the loop to account for the fact that the first NITERS iterations had
3378 already been executed. Specifically, it updates the initial_condition of the
3379 access_function of all the data_references in the loop. */
3380
3381 static void
3382 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters)
3383 {
3384 unsigned int i;
3385 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3386 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3387
3388 if (dump_file && (dump_flags & TDF_DETAILS))
3389 fprintf (dump_file, "\n<<vect_update_inits_of_dr>>\n");
3390
3391 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3392 {
3393 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3394 vect_update_inits_of_dr (dr, niters);
3395 }
3396
3397 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3398 {
3399 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3400 vect_update_inits_of_dr (dr, niters);
3401 }
3402 }
3403
3404
3405 /* Function vect_do_peeling_for_alignment
3406
3407 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO.
3408 'niters' is set to the misalignment of one of the data references in the
3409 loop, thereby forcing it to refer to an aligned location at the beginning
3410 of the execution of this loop. The data reference for which we are
3411 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */
3412
3413 static void
3414 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo, struct loops *loops)
3415 {
3416 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3417 tree niters_of_prolog_loop, ni_name;
3418 tree n_iters;
3419 struct loop *new_loop;
3420
3421 if (vect_debug_details (NULL))
3422 fprintf (dump_file, "\n<<vect_do_peeling_for_alignment>>\n");
3423
3424 ni_name = vect_build_loop_niters (loop_vinfo);
3425 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
3426
3427 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */
3428 new_loop =
3429 slpeel_tree_peel_loop_to_edge (loop, loops, loop_preheader_edge (loop),
3430 niters_of_prolog_loop, ni_name, true);
3431 #ifdef ENABLE_CHECKING
3432 gcc_assert (new_loop);
3433 slpeel_verify_cfg_after_peeling (new_loop, loop);
3434 #endif
3435
3436 /* Update number of times loop executes. */
3437 n_iters = LOOP_VINFO_NITERS (loop_vinfo);
3438 LOOP_VINFO_NITERS (loop_vinfo) =
3439 build2 (MINUS_EXPR, TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
3440
3441 /* Update the init conditions of the access functions of all data refs. */
3442 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
3443
3444 /* After peeling we have to reset scalar evolution analyzer. */
3445 scev_reset ();
3446
3447 return;
3448 }
3449
3450
3451 /* Function vect_transform_loop.
3452
3453 The analysis phase has determined that the loop is vectorizable.
3454 Vectorize the loop - created vectorized stmts to replace the scalar
3455 stmts in the loop, and update the loop exit condition. */
3456
3457 static void
3458 vect_transform_loop (loop_vec_info loop_vinfo,
3459 struct loops *loops ATTRIBUTE_UNUSED)
3460 {
3461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3462 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3463 int nbbs = loop->num_nodes;
3464 block_stmt_iterator si;
3465 int i;
3466 tree ratio = NULL;
3467 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
3468
3469 if (vect_debug_details (NULL))
3470 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
3471
3472
3473 /* Peel the loop if there are data refs with unknown alignment.
3474 Only one data ref with unknown store is allowed. */
3475
3476 if (LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo))
3477 vect_do_peeling_for_alignment (loop_vinfo, loops);
3478
3479 /* If the loop has a symbolic number of iterations 'n' (i.e. it's not a
3480 compile time constant), or it is a constant that doesn't divide by the
3481 vectorization factor, then an epilog loop needs to be created.
3482 We therefore duplicate the loop: the original loop will be vectorized,
3483 and will compute the first (n/VF) iterations. The second copy of the loop
3484 will remain scalar and will compute the remaining (n%VF) iterations.
3485 (VF is the vectorization factor). */
3486
3487 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3488 || (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3489 && LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0))
3490 vect_do_peeling_for_loop_bound (loop_vinfo, &ratio, loops);
3491 else
3492 ratio = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
3493 LOOP_VINFO_INT_NITERS (loop_vinfo) / vectorization_factor);
3494
3495 /* 1) Make sure the loop header has exactly two entries
3496 2) Make sure we have a preheader basic block. */
3497
3498 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
3499
3500 loop_split_edge_with (loop_preheader_edge (loop), NULL);
3501
3502
3503 /* FORNOW: the vectorizer supports only loops which body consist
3504 of one basic block (header + empty latch). When the vectorizer will
3505 support more involved loop forms, the order by which the BBs are
3506 traversed need to be reconsidered. */
3507
3508 for (i = 0; i < nbbs; i++)
3509 {
3510 basic_block bb = bbs[i];
3511
3512 for (si = bsi_start (bb); !bsi_end_p (si);)
3513 {
3514 tree stmt = bsi_stmt (si);
3515 stmt_vec_info stmt_info;
3516 bool is_store;
3517
3518 if (vect_debug_details (NULL))
3519 {
3520 fprintf (dump_file, "------>vectorizing statement: ");
3521 print_generic_expr (dump_file, stmt, TDF_SLIM);
3522 }
3523 stmt_info = vinfo_for_stmt (stmt);
3524 gcc_assert (stmt_info);
3525 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3526 {
3527 bsi_next (&si);
3528 continue;
3529 }
3530 #ifdef ENABLE_CHECKING
3531 /* FORNOW: Verify that all stmts operate on the same number of
3532 units and no inner unrolling is necessary. */
3533 gcc_assert
3534 (GET_MODE_NUNITS (TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info)))
3535 == vectorization_factor);
3536 #endif
3537 /* -------- vectorize statement ------------ */
3538 if (vect_debug_details (NULL))
3539 fprintf (dump_file, "transform statement.");
3540
3541 is_store = vect_transform_stmt (stmt, &si);
3542 if (is_store)
3543 {
3544 /* free the attached stmt_vec_info and remove the stmt. */
3545 stmt_ann_t ann = stmt_ann (stmt);
3546 free (stmt_info);
3547 set_stmt_info (ann, NULL);
3548 bsi_remove (&si);
3549 continue;
3550 }
3551
3552 bsi_next (&si);
3553 } /* stmts in BB */
3554 } /* BBs in loop */
3555
3556 slpeel_make_loop_iterate_ntimes (loop, ratio);
3557
3558 if (vect_debug_details (loop))
3559 fprintf (dump_file,"Success! loop vectorized.");
3560 if (vect_debug_stats (loop))
3561 fprintf (dump_file, "LOOP VECTORIZED.");
3562 }
3563
3564
3565 /* Function vect_is_simple_use.
3566
3567 Input:
3568 LOOP - the loop that is being vectorized.
3569 OPERAND - operand of a stmt in LOOP.
3570 DEF - the defining stmt in case OPERAND is an SSA_NAME.
3571
3572 Returns whether a stmt with OPERAND can be vectorized.
3573 Supportable operands are constants, loop invariants, and operands that are
3574 defined by the current iteration of the loop. Unsupportable operands are
3575 those that are defined by a previous iteration of the loop (as is the case
3576 in reduction/induction computations). */
3577
3578 static bool
3579 vect_is_simple_use (tree operand, struct loop *loop, tree *def)
3580 {
3581 tree def_stmt;
3582 basic_block bb;
3583
3584 if (def)
3585 *def = NULL_TREE;
3586
3587 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
3588 return true;
3589
3590 if (TREE_CODE (operand) != SSA_NAME)
3591 return false;
3592
3593 def_stmt = SSA_NAME_DEF_STMT (operand);
3594 if (def_stmt == NULL_TREE )
3595 {
3596 if (vect_debug_details (NULL))
3597 fprintf (dump_file, "no def_stmt.");
3598 return false;
3599 }
3600
3601 /* empty stmt is expected only in case of a function argument.
3602 (Otherwise - we expect a phi_node or a modify_expr). */
3603 if (IS_EMPTY_STMT (def_stmt))
3604 {
3605 tree arg = TREE_OPERAND (def_stmt, 0);
3606 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
3607 return true;
3608 if (vect_debug_details (NULL))
3609 {
3610 fprintf (dump_file, "Unexpected empty stmt: ");
3611 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3612 }
3613 return false;
3614 }
3615
3616 /* phi_node inside the loop indicates an induction/reduction pattern.
3617 This is not supported yet. */
3618 bb = bb_for_stmt (def_stmt);
3619 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
3620 {
3621 if (vect_debug_details (NULL))
3622 fprintf (dump_file, "reduction/induction - unsupported.");
3623 return false; /* FORNOW: not supported yet. */
3624 }
3625
3626 /* Expecting a modify_expr or a phi_node. */
3627 if (TREE_CODE (def_stmt) == MODIFY_EXPR
3628 || TREE_CODE (def_stmt) == PHI_NODE)
3629 {
3630 if (def)
3631 *def = def_stmt;
3632 return true;
3633 }
3634
3635 return false;
3636 }
3637
3638
3639 /* Function vect_analyze_operations.
3640
3641 Scan the loop stmts and make sure they are all vectorizable. */
3642
3643 static bool
3644 vect_analyze_operations (loop_vec_info loop_vinfo)
3645 {
3646 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3647 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3648 int nbbs = loop->num_nodes;
3649 block_stmt_iterator si;
3650 unsigned int vectorization_factor = 0;
3651 int i;
3652 bool ok;
3653 tree scalar_type;
3654
3655 if (vect_debug_details (NULL))
3656 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
3657
3658 for (i = 0; i < nbbs; i++)
3659 {
3660 basic_block bb = bbs[i];
3661
3662 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3663 {
3664 tree stmt = bsi_stmt (si);
3665 unsigned int nunits;
3666 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3667 tree vectype;
3668
3669 if (vect_debug_details (NULL))
3670 {
3671 fprintf (dump_file, "==> examining statement: ");
3672 print_generic_expr (dump_file, stmt, TDF_SLIM);
3673 }
3674
3675 gcc_assert (stmt_info);
3676
3677 /* skip stmts which do not need to be vectorized.
3678 this is expected to include:
3679 - the COND_EXPR which is the loop exit condition
3680 - any LABEL_EXPRs in the loop
3681 - computations that are used only for array indexing or loop
3682 control */
3683
3684 if (!STMT_VINFO_RELEVANT_P (stmt_info))
3685 {
3686 if (vect_debug_details (NULL))
3687 fprintf (dump_file, "irrelevant.");
3688 continue;
3689 }
3690
3691 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
3692 {
3693 if (vect_debug_stats (loop) || vect_debug_details (loop))
3694 {
3695 fprintf (dump_file, "not vectorized: vector stmt in loop:");
3696 print_generic_expr (dump_file, stmt, TDF_SLIM);
3697 }
3698 return false;
3699 }
3700
3701 if (STMT_VINFO_DATA_REF (stmt_info))
3702 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
3703 else if (TREE_CODE (stmt) == MODIFY_EXPR)
3704 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
3705 else
3706 scalar_type = TREE_TYPE (stmt);
3707
3708 if (vect_debug_details (NULL))
3709 {
3710 fprintf (dump_file, "get vectype for scalar type: ");
3711 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3712 }
3713
3714 vectype = get_vectype_for_scalar_type (scalar_type);
3715 if (!vectype)
3716 {
3717 if (vect_debug_stats (loop) || vect_debug_details (loop))
3718 {
3719 fprintf (dump_file, "not vectorized: unsupported data-type ");
3720 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
3721 }
3722 return false;
3723 }
3724
3725 if (vect_debug_details (NULL))
3726 {
3727 fprintf (dump_file, "vectype: ");
3728 print_generic_expr (dump_file, vectype, TDF_SLIM);
3729 }
3730 STMT_VINFO_VECTYPE (stmt_info) = vectype;
3731
3732 ok = (vectorizable_operation (stmt, NULL, NULL)
3733 || vectorizable_assignment (stmt, NULL, NULL)
3734 || vectorizable_load (stmt, NULL, NULL)
3735 || vectorizable_store (stmt, NULL, NULL));
3736
3737 if (!ok)
3738 {
3739 if (vect_debug_stats (loop) || vect_debug_details (loop))
3740 {
3741 fprintf (dump_file, "not vectorized: stmt not supported: ");
3742 print_generic_expr (dump_file, stmt, TDF_SLIM);
3743 }
3744 return false;
3745 }
3746
3747 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
3748 if (vect_debug_details (NULL))
3749 fprintf (dump_file, "nunits = %d", nunits);
3750
3751 if (vectorization_factor)
3752 {
3753 /* FORNOW: don't allow mixed units.
3754 This restriction will be relaxed in the future. */
3755 if (nunits != vectorization_factor)
3756 {
3757 if (vect_debug_stats (loop) || vect_debug_details (loop))
3758 fprintf (dump_file, "not vectorized: mixed data-types");
3759 return false;
3760 }
3761 }
3762 else
3763 vectorization_factor = nunits;
3764
3765 #ifdef ENABLE_CHECKING
3766 gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
3767 * vectorization_factor == UNITS_PER_SIMD_WORD);
3768 #endif
3769 }
3770 }
3771
3772 /* TODO: Analyze cost. Decide if worth while to vectorize. */
3773
3774 if (vectorization_factor <= 1)
3775 {
3776 if (vect_debug_stats (loop) || vect_debug_details (loop))
3777 fprintf (dump_file, "not vectorized: unsupported data-type");
3778 return false;
3779 }
3780 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
3781
3782 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && vect_debug_details (NULL))
3783 fprintf (dump_file,
3784 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
3785 vectorization_factor, LOOP_VINFO_INT_NITERS (loop_vinfo));
3786
3787 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3788 && LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)
3789 {
3790 if (vect_debug_stats (loop) || vect_debug_details (loop))
3791 fprintf (dump_file, "not vectorized: iteration count too small.");
3792 return false;
3793 }
3794
3795 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
3796 || LOOP_VINFO_INT_NITERS (loop_vinfo) % vectorization_factor != 0)
3797 {
3798 if (vect_debug_stats (loop) || vect_debug_details (loop))
3799 fprintf (dump_file, "epilog loop required.");
3800 if (!vect_can_advance_ivs_p (loop))
3801 {
3802 if (vect_debug_stats (loop) || vect_debug_details (loop))
3803 fprintf (dump_file, "not vectorized: can't create epilog loop 1.");
3804 return false;
3805 }
3806 if (!slpeel_can_duplicate_loop_p (loop, loop->exit_edges[0]))
3807 {
3808 if (vect_debug_stats (loop) || vect_debug_details (loop))
3809 fprintf (dump_file, "not vectorized: can't create epilog loop 2.");
3810 return false;
3811 }
3812 }
3813
3814 return true;
3815 }
3816
3817
3818 /* Function exist_non_indexing_operands_for_use_p
3819
3820 USE is one of the uses attached to STMT. Check if USE is
3821 used in STMT for anything other than indexing an array. */
3822
3823 static bool
3824 exist_non_indexing_operands_for_use_p (tree use, tree stmt)
3825 {
3826 tree operand;
3827 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3828
3829 /* USE corresponds to some operand in STMT. If there is no data
3830 reference in STMT, then any operand that corresponds to USE
3831 is not indexing an array. */
3832 if (!STMT_VINFO_DATA_REF (stmt_info))
3833 return true;
3834
3835 /* STMT has a data_ref. FORNOW this means that its of one of
3836 the following forms:
3837 -1- ARRAY_REF = var
3838 -2- var = ARRAY_REF
3839 (This should have been verified in analyze_data_refs).
3840
3841 'var' in the second case corresponds to a def, not a use,
3842 so USE cannot correspond to any operands that are not used
3843 for array indexing.
3844
3845 Therefore, all we need to check is if STMT falls into the
3846 first case, and whether var corresponds to USE. */
3847
3848 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
3849 return false;
3850
3851 operand = TREE_OPERAND (stmt, 1);
3852
3853 if (TREE_CODE (operand) != SSA_NAME)
3854 return false;
3855
3856 if (operand == use)
3857 return true;
3858
3859 return false;
3860 }
3861
3862
3863 /* Function vect_is_simple_iv_evolution.
3864
3865 FORNOW: A simple evolution of an induction variables in the loop is
3866 considered a polynomial evolution with constant step. */
3867
3868 static bool
3869 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
3870 tree * step, bool strict)
3871 {
3872 tree init_expr;
3873 tree step_expr;
3874
3875 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
3876
3877 /* When there is no evolution in this loop, the evolution function
3878 is not "simple". */
3879 if (evolution_part == NULL_TREE)
3880 return false;
3881
3882 /* When the evolution is a polynomial of degree >= 2
3883 the evolution function is not "simple". */
3884 if (tree_is_chrec (evolution_part))
3885 return false;
3886
3887 step_expr = evolution_part;
3888 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
3889
3890 if (vect_debug_details (NULL))
3891 {
3892 fprintf (dump_file, "step: ");
3893 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3894 fprintf (dump_file, ", init: ");
3895 print_generic_expr (dump_file, init_expr, TDF_SLIM);
3896 }
3897
3898 *init = init_expr;
3899 *step = step_expr;
3900
3901 if (TREE_CODE (step_expr) != INTEGER_CST)
3902 {
3903 if (vect_debug_details (NULL))
3904 fprintf (dump_file, "step unknown.");
3905 return false;
3906 }
3907
3908 if (strict)
3909 if (!integer_onep (step_expr))
3910 {
3911 if (vect_debug_details (NULL))
3912 print_generic_expr (dump_file, step_expr, TDF_SLIM);
3913 return false;
3914 }
3915
3916 return true;
3917 }
3918
3919
3920 /* Function vect_analyze_scalar_cycles.
3921
3922 Examine the cross iteration def-use cycles of scalar variables, by
3923 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
3924 cycles that they represent do not impede vectorization.
3925
3926 FORNOW: Reduction as in the following loop, is not supported yet:
3927 loop1:
3928 for (i=0; i<N; i++)
3929 sum += a[i];
3930 The cross-iteration cycle corresponding to variable 'sum' will be
3931 considered too complicated and will impede vectorization.
3932
3933 FORNOW: Induction as in the following loop, is not supported yet:
3934 loop2:
3935 for (i=0; i<N; i++)
3936 a[i] = i;
3937
3938 However, the following loop *is* vectorizable:
3939 loop3:
3940 for (i=0; i<N; i++)
3941 a[i] = b[i];
3942
3943 In both loops there exists a def-use cycle for the variable i:
3944 loop: i_2 = PHI (i_0, i_1)
3945 a[i_2] = ...;
3946 i_1 = i_2 + 1;
3947 GOTO loop;
3948
3949 The evolution of the above cycle is considered simple enough,
3950 however, we also check that the cycle does not need to be
3951 vectorized, i.e - we check that the variable that this cycle
3952 defines is only used for array indexing or in stmts that do not
3953 need to be vectorized. This is not the case in loop2, but it
3954 *is* the case in loop3. */
3955
3956 static bool
3957 vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
3958 {
3959 tree phi;
3960 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3961 basic_block bb = loop->header;
3962 tree dummy;
3963
3964 if (vect_debug_details (NULL))
3965 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
3966
3967 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
3968 {
3969 tree access_fn = NULL;
3970
3971 if (vect_debug_details (NULL))
3972 {
3973 fprintf (dump_file, "Analyze phi: ");
3974 print_generic_expr (dump_file, phi, TDF_SLIM);
3975 }
3976
3977 /* Skip virtual phi's. The data dependences that are associated with
3978 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
3979
3980 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
3981 {
3982 if (vect_debug_details (NULL))
3983 fprintf (dump_file, "virtual phi. skip.");
3984 continue;
3985 }
3986
3987 /* Analyze the evolution function. */
3988
3989 /* FORNOW: The only scalar cross-iteration cycles that we allow are
3990 those of loop induction variables; This property is verified here.
3991
3992 Furthermore, if that induction variable is used in an operation
3993 that needs to be vectorized (i.e, is not solely used to index
3994 arrays and check the exit condition) - we do not support its
3995 vectorization yet. This property is verified in vect_is_simple_use,
3996 during vect_analyze_operations. */
3997
3998 access_fn = /* instantiate_parameters
3999 (loop,*/
4000 analyze_scalar_evolution (loop, PHI_RESULT (phi));
4001
4002 if (!access_fn)
4003 {
4004 if (vect_debug_stats (loop) || vect_debug_details (loop))
4005 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4006 return false;
4007 }
4008
4009 if (vect_debug_details (NULL))
4010 {
4011 fprintf (dump_file, "Access function of PHI: ");
4012 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4013 }
4014
4015 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
4016 &dummy, false))
4017 {
4018 if (vect_debug_stats (loop) || vect_debug_details (loop))
4019 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
4020 return false;
4021 }
4022 }
4023
4024 return true;
4025 }
4026
4027
4028 /* Function vect_analyze_data_ref_dependence.
4029
4030 Return TRUE if there (might) exist a dependence between a memory-reference
4031 DRA and a memory-reference DRB. */
4032
4033 static bool
4034 vect_analyze_data_ref_dependence (struct data_reference *dra,
4035 struct data_reference *drb,
4036 struct loop *loop)
4037 {
4038 bool differ_p;
4039 struct data_dependence_relation *ddr;
4040
4041 if (!array_base_name_differ_p (dra, drb, &differ_p))
4042 {
4043 if (vect_debug_stats (loop) || vect_debug_details (loop))
4044 {
4045 fprintf (dump_file,
4046 "not vectorized: can't determine dependence between: ");
4047 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4048 fprintf (dump_file, " and ");
4049 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4050 }
4051 return true;
4052 }
4053
4054 if (differ_p)
4055 return false;
4056
4057 ddr = initialize_data_dependence_relation (dra, drb);
4058 compute_affine_dependence (ddr);
4059
4060 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4061 return false;
4062
4063 if (vect_debug_stats (loop) || vect_debug_details (loop))
4064 {
4065 fprintf (dump_file,
4066 "not vectorized: possible dependence between data-refs ");
4067 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
4068 fprintf (dump_file, " and ");
4069 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
4070 }
4071
4072 return true;
4073 }
4074
4075
4076 /* Function vect_analyze_data_ref_dependences.
4077
4078 Examine all the data references in the loop, and make sure there do not
4079 exist any data dependences between them.
4080
4081 TODO: dependences which distance is greater than the vectorization factor
4082 can be ignored. */
4083
4084 static bool
4085 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
4086 {
4087 unsigned int i, j;
4088 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4089 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4090 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4091
4092 /* Examine store-store (output) dependences. */
4093
4094 if (vect_debug_details (NULL))
4095 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
4096
4097 if (vect_debug_details (NULL))
4098 fprintf (dump_file, "compare all store-store pairs.");
4099
4100 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
4101 {
4102 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4103 {
4104 struct data_reference *dra =
4105 VARRAY_GENERIC_PTR (loop_write_refs, i);
4106 struct data_reference *drb =
4107 VARRAY_GENERIC_PTR (loop_write_refs, j);
4108 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4109 return false;
4110 }
4111 }
4112
4113 /* Examine load-store (true/anti) dependences. */
4114
4115 if (vect_debug_details (NULL))
4116 fprintf (dump_file, "compare all load-store pairs.");
4117
4118 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
4119 {
4120 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
4121 {
4122 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
4123 struct data_reference *drb =
4124 VARRAY_GENERIC_PTR (loop_write_refs, j);
4125 if (vect_analyze_data_ref_dependence (dra, drb, loop))
4126 return false;
4127 }
4128 }
4129
4130 return true;
4131 }
4132
4133
4134 /* Function vect_compute_data_ref_alignment
4135
4136 Compute the misalignment of the data reference DR.
4137
4138 Output:
4139 1. If during the misalignment computation it is found that the data reference
4140 cannot be vectorized then false is returned.
4141 2. DR_MISALIGNMENT (DR) is defined.
4142
4143 FOR NOW: No analysis is actually performed. Misalignment is calculated
4144 only for trivial cases. TODO. */
4145
4146 static bool
4147 vect_compute_data_ref_alignment (struct data_reference *dr)
4148 {
4149 tree stmt = DR_STMT (dr);
4150 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4151 tree ref = DR_REF (dr);
4152 tree vectype;
4153 tree base, alignment;
4154 bool base_aligned_p;
4155 tree misalign;
4156
4157 if (vect_debug_details (NULL))
4158 fprintf (dump_file, "vect_compute_data_ref_alignment:");
4159
4160 /* Initialize misalignment to unknown. */
4161 DR_MISALIGNMENT (dr) = -1;
4162
4163 misalign = STMT_VINFO_VECT_MISALIGNMENT (stmt_info);
4164 base_aligned_p = STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info);
4165 base = STMT_VINFO_VECT_DR_BASE (stmt_info);
4166 vectype = STMT_VINFO_VECTYPE (stmt_info);
4167
4168 if (!misalign)
4169 {
4170 if (vect_debug_details (NULL))
4171 {
4172 fprintf (dump_file, "Unknown alignment for access: ");
4173 print_generic_expr (dump_file, base, TDF_SLIM);
4174 }
4175 return true;
4176 }
4177
4178 if (!base_aligned_p)
4179 {
4180 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
4181 {
4182 if (vect_debug_details (NULL))
4183 {
4184 fprintf (dump_file, "can't force alignment of ref: ");
4185 print_generic_expr (dump_file, ref, TDF_SLIM);
4186 }
4187 return true;
4188 }
4189
4190 /* Force the alignment of the decl.
4191 NOTE: This is the only change to the code we make during
4192 the analysis phase, before deciding to vectorize the loop. */
4193 if (vect_debug_details (NULL))
4194 fprintf (dump_file, "force alignment");
4195 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
4196 DECL_USER_ALIGN (base) = 1;
4197 }
4198
4199 /* At this point we assume that the base is aligned. */
4200 gcc_assert (base_aligned_p
4201 || (TREE_CODE (base) == VAR_DECL
4202 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
4203
4204 /* Alignment required, in bytes: */
4205 alignment = size_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
4206
4207 /* Modulo alignment. */
4208 misalign = size_binop (TRUNC_MOD_EXPR, misalign, alignment);
4209 if (tree_int_cst_sgn (misalign) < 0)
4210 {
4211 /* Negative misalignment value. */
4212 if (vect_debug_details (NULL))
4213 fprintf (dump_file, "unexpected misalign value");
4214 return false;
4215 }
4216
4217 DR_MISALIGNMENT (dr) = tree_low_cst (misalign, 1);
4218
4219 if (vect_debug_details (NULL))
4220 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
4221
4222 return true;
4223 }
4224
4225
4226 /* Function vect_compute_data_refs_alignment
4227
4228 Compute the misalignment of data references in the loop.
4229 This pass may take place at function granularity instead of at loop
4230 granularity.
4231
4232 FOR NOW: No analysis is actually performed. Misalignment is calculated
4233 only for trivial cases. TODO. */
4234
4235 static bool
4236 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
4237 {
4238 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4239 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4240 unsigned int i;
4241
4242 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4243 {
4244 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4245 if (!vect_compute_data_ref_alignment (dr))
4246 return false;
4247 }
4248
4249 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4250 {
4251 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4252 if (!vect_compute_data_ref_alignment (dr))
4253 return false;
4254 }
4255
4256 return true;
4257 }
4258
4259
4260 /* Function vect_enhance_data_refs_alignment
4261
4262 This pass will use loop versioning and loop peeling in order to enhance
4263 the alignment of data references in the loop.
4264
4265 FOR NOW: we assume that whatever versioning/peeling takes place, only the
4266 original loop is to be vectorized; Any other loops that are created by
4267 the transformations performed in this pass - are not supposed to be
4268 vectorized. This restriction will be relaxed. */
4269
4270 static void
4271 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
4272 {
4273 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4274 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4275 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4276 unsigned int i;
4277
4278 /*
4279 This pass will require a cost model to guide it whether to apply peeling
4280 or versioning or a combination of the two. For example, the scheme that
4281 intel uses when given a loop with several memory accesses, is as follows:
4282 choose one memory access ('p') which alignment you want to force by doing
4283 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
4284 other accesses are not necessarily aligned, or (2) use loop versioning to
4285 generate one loop in which all accesses are aligned, and another loop in
4286 which only 'p' is necessarily aligned.
4287
4288 ("Automatic Intra-Register Vectorization for the Intel Architecture",
4289 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
4290 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
4291
4292 Devising a cost model is the most critical aspect of this work. It will
4293 guide us on which access to peel for, whether to use loop versioning, how
4294 many versions to create, etc. The cost model will probably consist of
4295 generic considerations as well as target specific considerations (on
4296 powerpc for example, misaligned stores are more painful than misaligned
4297 loads).
4298
4299 Here is the general steps involved in alignment enhancements:
4300
4301 -- original loop, before alignment analysis:
4302 for (i=0; i<N; i++){
4303 x = q[i]; # DR_MISALIGNMENT(q) = unknown
4304 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4305 }
4306
4307 -- After vect_compute_data_refs_alignment:
4308 for (i=0; i<N; i++){
4309 x = q[i]; # DR_MISALIGNMENT(q) = 3
4310 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4311 }
4312
4313 -- Possibility 1: we do loop versioning:
4314 if (p is aligned) {
4315 for (i=0; i<N; i++){ # loop 1A
4316 x = q[i]; # DR_MISALIGNMENT(q) = 3
4317 p[i] = y; # DR_MISALIGNMENT(p) = 0
4318 }
4319 }
4320 else {
4321 for (i=0; i<N; i++){ # loop 1B
4322 x = q[i]; # DR_MISALIGNMENT(q) = 3
4323 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4324 }
4325 }
4326
4327 -- Possibility 2: we do loop peeling:
4328 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4329 x = q[i];
4330 p[i] = y;
4331 }
4332 for (i = 3; i < N; i++){ # loop 2A
4333 x = q[i]; # DR_MISALIGNMENT(q) = 0
4334 p[i] = y; # DR_MISALIGNMENT(p) = unknown
4335 }
4336
4337 -- Possibility 3: combination of loop peeling and versioning:
4338 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
4339 x = q[i];
4340 p[i] = y;
4341 }
4342 if (p is aligned) {
4343 for (i = 3; i<N; i++){ # loop 3A
4344 x = q[i]; # DR_MISALIGNMENT(q) = 0
4345 p[i] = y; # DR_MISALIGNMENT(p) = 0
4346 }
4347 }
4348 else {
4349 for (i = 3; i<N; i++){ # loop 3B
4350 x = q[i]; # DR_MISALIGNMENT(q) = 0
4351 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
4352 }
4353 }
4354
4355 These loops are later passed to loop_transform to be vectorized. The
4356 vectorizer will use the alignment information to guide the transformation
4357 (whether to generate regular loads/stores, or with special handling for
4358 misalignment).
4359 */
4360
4361 /* (1) Peeling to force alignment. */
4362
4363 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
4364 Considerations:
4365 + How many accesses will become aligned due to the peeling
4366 - How many accesses will become unaligned due to the peeling,
4367 and the cost of misaligned accesses.
4368 - The cost of peeling (the extra runtime checks, the increase
4369 in code size).
4370
4371 The scheme we use FORNOW: peel to force the alignment of the first
4372 misaligned store in the loop.
4373 Rationale: misaligned stores are not yet supported.
4374
4375 TODO: Use a better cost model. */
4376
4377 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4378 {
4379 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4380 if (!aligned_access_p (dr))
4381 {
4382 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr;
4383 LOOP_DO_PEELING_FOR_ALIGNMENT (loop_vinfo) = true;
4384 break;
4385 }
4386 }
4387
4388 if (!LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4389 {
4390 if (vect_debug_details (loop))
4391 fprintf (dump_file, "Peeling for alignment will not be applied.");
4392 return;
4393 }
4394 else
4395 if (vect_debug_details (loop))
4396 fprintf (dump_file, "Peeling for alignment will be applied.");
4397
4398
4399 /* (1.2) Update the alignment info according to the peeling factor.
4400 If the misalignment of the DR we peel for is M, then the
4401 peeling factor is VF - M, and the misalignment of each access DR_i
4402 in the loop is DR_MISALIGNMENT (DR_i) + VF - M.
4403 If the misalignment of the DR we peel for is unknown, then the
4404 misalignment of each access DR_i in the loop is also unknown.
4405
4406 FORNOW: set the misalignment of the accesses to unknown even
4407 if the peeling factor is known at compile time.
4408
4409 TODO: - if the peeling factor is known at compile time, use that
4410 when updating the misalignment info of the loop DRs.
4411 - consider accesses that are known to have the same
4412 alignment, even if that alignment is unknown. */
4413
4414 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4415 {
4416 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4417 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4418 {
4419 DR_MISALIGNMENT (dr) = 0;
4420 if (vect_debug_details (loop) || vect_debug_stats (loop))
4421 fprintf (dump_file, "Alignment of access forced using peeling.");
4422 }
4423 else
4424 DR_MISALIGNMENT (dr) = -1;
4425 }
4426 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4427 {
4428 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4429 if (dr == LOOP_VINFO_UNALIGNED_DR (loop_vinfo))
4430 {
4431 DR_MISALIGNMENT (dr) = 0;
4432 if (vect_debug_details (loop) || vect_debug_stats (loop))
4433 fprintf (dump_file, "Alignment of access forced using peeling.");
4434 }
4435 else
4436 DR_MISALIGNMENT (dr) = -1;
4437 }
4438 }
4439
4440
4441 /* Function vect_analyze_data_refs_alignment
4442
4443 Analyze the alignment of the data-references in the loop.
4444 FOR NOW: Until support for misliagned accesses is in place, only if all
4445 accesses are aligned can the loop be vectorized. This restriction will be
4446 relaxed. */
4447
4448 static bool
4449 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
4450 {
4451 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4452 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4453 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4454 enum dr_alignment_support supportable_dr_alignment;
4455 unsigned int i;
4456
4457 if (vect_debug_details (NULL))
4458 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
4459
4460
4461 /* This pass may take place at function granularity instead of at loop
4462 granularity. */
4463
4464 if (!vect_compute_data_refs_alignment (loop_vinfo))
4465 {
4466 if (vect_debug_details (loop) || vect_debug_stats (loop))
4467 fprintf (dump_file,
4468 "not vectorized: can't calculate alignment for data ref.");
4469 return false;
4470 }
4471
4472
4473 /* This pass will decide on using loop versioning and/or loop peeling in
4474 order to enhance the alignment of data references in the loop. */
4475
4476 vect_enhance_data_refs_alignment (loop_vinfo);
4477
4478
4479 /* Finally, check that all the data references in the loop can be
4480 handled with respect to their alignment. */
4481
4482 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4483 {
4484 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4485 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4486 if (!supportable_dr_alignment)
4487 {
4488 if (vect_debug_details (loop) || vect_debug_stats (loop))
4489 fprintf (dump_file, "not vectorized: unsupported unaligned load.");
4490 return false;
4491 }
4492 if (supportable_dr_alignment != dr_aligned
4493 && (vect_debug_details (loop) || vect_debug_stats (loop)))
4494 fprintf (dump_file, "Vectorizing an unaligned access.");
4495 }
4496 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4497 {
4498 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4499 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
4500 if (!supportable_dr_alignment)
4501 {
4502 if (vect_debug_details (loop) || vect_debug_stats (loop))
4503 fprintf (dump_file, "not vectorized: unsupported unaligned store.");
4504 return false;
4505 }
4506 if (supportable_dr_alignment != dr_aligned
4507 && (vect_debug_details (loop) || vect_debug_stats (loop)))
4508 fprintf (dump_file, "Vectorizing an unaligned access.");
4509 }
4510
4511 return true;
4512 }
4513
4514
4515 /* Function vect_analyze_data_ref_access.
4516
4517 Analyze the access pattern of the data-reference DR. For now, a data access
4518 has to consecutive to be considered vectorizable. */
4519
4520 static bool
4521 vect_analyze_data_ref_access (struct data_reference *dr)
4522 {
4523 tree stmt = DR_STMT (dr);
4524 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4525 tree step = STMT_VINFO_VECT_STEP (stmt_info);
4526 tree scalar_type = TREE_TYPE (DR_REF (dr));
4527
4528 if (!step || tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
4529 {
4530 if (vect_debug_details (NULL))
4531 fprintf (dump_file, "not consecutive access");
4532 return false;
4533 }
4534 return true;
4535 }
4536
4537
4538 /* Function vect_analyze_data_ref_accesses.
4539
4540 Analyze the access pattern of all the data references in the loop.
4541
4542 FORNOW: the only access pattern that is considered vectorizable is a
4543 simple step 1 (consecutive) access.
4544
4545 FORNOW: handle only arrays and pointer accesses. */
4546
4547 static bool
4548 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
4549 {
4550 unsigned int i;
4551 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
4552 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
4553
4554 if (vect_debug_details (NULL))
4555 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
4556
4557 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
4558 {
4559 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
4560 bool ok = vect_analyze_data_ref_access (dr);
4561 if (!ok)
4562 {
4563 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4564 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4565 fprintf (dump_file, "not vectorized: complicated access pattern.");
4566 return false;
4567 }
4568 }
4569
4570 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
4571 {
4572 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
4573 bool ok = vect_analyze_data_ref_access (dr);
4574 if (!ok)
4575 {
4576 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
4577 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
4578 fprintf (dump_file, "not vectorized: complicated access pattern.");
4579 return false;
4580 }
4581 }
4582
4583 return true;
4584 }
4585
4586
4587 /* Function vect_analyze_pointer_ref_access.
4588
4589 Input:
4590 STMT - a stmt that contains a data-ref
4591 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
4592
4593 If the data-ref access is vectorizable, return a data_reference structure
4594 that represents it (DR). Otherwise - return NULL. */
4595
4596 static struct data_reference *
4597 vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
4598 {
4599 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4600 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
4601 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
4602 tree init, step;
4603 tree reftype, innertype;
4604 tree indx_access_fn;
4605 int loopnum = loop->num;
4606 struct data_reference *dr;
4607
4608 if (!access_fn)
4609 {
4610 if (vect_debug_stats (loop) || vect_debug_details (loop))
4611 fprintf (dump_file, "not vectorized: complicated pointer access.");
4612 return NULL;
4613 }
4614
4615 if (vect_debug_details (NULL))
4616 {
4617 fprintf (dump_file, "Access function of ptr: ");
4618 print_generic_expr (dump_file, access_fn, TDF_SLIM);
4619 }
4620
4621 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
4622 {
4623 if (vect_debug_stats (loop) || vect_debug_details (loop))
4624 fprintf (dump_file, "not vectorized: pointer access is not simple.");
4625 return NULL;
4626 }
4627
4628 STRIP_NOPS (init);
4629
4630 if (!expr_invariant_in_loop_p (loop, init))
4631 {
4632 if (vect_debug_stats (loop) || vect_debug_details (loop))
4633 fprintf (dump_file,
4634 "not vectorized: initial condition is not loop invariant.");
4635 return NULL;
4636 }
4637
4638 if (TREE_CODE (step) != INTEGER_CST)
4639 {
4640 if (vect_debug_stats (loop) || vect_debug_details (loop))
4641 fprintf (dump_file,
4642 "not vectorized: non constant step for pointer access.");
4643 return NULL;
4644 }
4645
4646 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
4647 if (TREE_CODE (reftype) != POINTER_TYPE)
4648 {
4649 if (vect_debug_stats (loop) || vect_debug_details (loop))
4650 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4651 return NULL;
4652 }
4653
4654 reftype = TREE_TYPE (init);
4655 if (TREE_CODE (reftype) != POINTER_TYPE)
4656 {
4657 if (vect_debug_stats (loop) || vect_debug_details (loop))
4658 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
4659 return NULL;
4660 }
4661
4662 innertype = TREE_TYPE (reftype);
4663 if (tree_int_cst_compare (TYPE_SIZE_UNIT (innertype), step))
4664 {
4665 /* FORNOW: support only consecutive access */
4666 if (vect_debug_stats (loop) || vect_debug_details (loop))
4667 fprintf (dump_file, "not vectorized: non consecutive access.");
4668 return NULL;
4669 }
4670
4671 STMT_VINFO_VECT_STEP (stmt_info) = fold_convert (sizetype, step);
4672 if (TREE_CODE (init) == PLUS_EXPR
4673 || TREE_CODE (init) == MINUS_EXPR)
4674 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4675 fold (size_binop (TREE_CODE (init), size_zero_node,
4676 fold_convert (sizetype, TREE_OPERAND (init, 1))));
4677 else
4678 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = size_zero_node;
4679
4680 indx_access_fn =
4681 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
4682 if (vect_debug_details (NULL))
4683 {
4684 fprintf (dump_file, "Access function of ptr indx: ");
4685 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
4686 }
4687 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
4688 return dr;
4689 }
4690
4691
4692 /* Function vect_get_memtag_and_dr.
4693
4694 The function returns the relevant variable for memory tag (for aliasing
4695 purposes). Also data reference structure DR is created.
4696
4697 This function handles three kinds of MEMREF:
4698
4699 It is called from vect_analyze_data_refs with a MEMREF that is either an
4700 ARRAY_REF or an INDIRECT_REF (this is category 1 - "recursion begins").
4701 It builds a DR for them using vect_get_base_and_offset, and calls itself
4702 recursively to retrieve the relevant memtag for the MEMREF, "peeling" the
4703 MEMREF along the way. During the recursive calls, the function may be called
4704 with a MEMREF for which the recursion has to continue - PLUS_EXPR,
4705 MINUS_EXPR, INDIRECT_REF (category 2 - "recursion continues"),
4706 and/or with a MEMREF for which a memtag can be trivially obtained - VAR_DECL
4707 and SSA_NAME (this is category 3 - "recursion stop condition").
4708
4709 When the MEMREF falls into category 1 there is still no data reference struct
4710 (DR) available. It is created by this function, and then, along the recursion,
4711 MEMREF will fall into category 2 or 3, in which case a DR will have already
4712 been created, but the analysis continues to retrieve the MEMTAG.
4713
4714 Input:
4715 MEMREF - data reference in STMT
4716 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
4717
4718 Output:
4719 DR - data_reference struct for MEMREF
4720 return value - the relevant variable for memory tag (for aliasing purposes).
4721
4722 */
4723
4724 static tree
4725 vect_get_memtag_and_dr (tree memref, tree stmt, bool is_read,
4726 loop_vec_info loop_vinfo,
4727 tree vectype, struct data_reference **dr)
4728 {
4729 tree symbl, oprnd0, oprnd1;
4730 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4731 tree offset, misalign, step;
4732 tree ref_to_be_analyzed, tag, dr_base;
4733 struct data_reference *new_dr;
4734 bool base_aligned_p;
4735
4736 if (*dr)
4737 {
4738 /* Category 3: recursion stop condition. */
4739 /* (1) A DR already exists. We only need to get the relevant memtag for
4740 MEMREF, the rest of the data was already initialized. */
4741
4742 switch (TREE_CODE (memref))
4743 {
4744 /* (1.1) Stop condition: find the relevant memtag and return. */
4745 case SSA_NAME:
4746 symbl = SSA_NAME_VAR (memref);
4747 tag = get_var_ann (symbl)->type_mem_tag;
4748 if (!tag)
4749 {
4750 tree ptr = TREE_OPERAND (DR_REF ((*dr)), 0);
4751 if (TREE_CODE (ptr) == SSA_NAME)
4752 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
4753 }
4754 if (!tag)
4755 {
4756 if (vect_debug_details (NULL))
4757 fprintf (dump_file, "not vectorized: no memtag for ref.");
4758 return NULL_TREE;
4759 }
4760 return tag;
4761
4762 case VAR_DECL:
4763 case PARM_DECL:
4764 return memref;
4765
4766 /* Category 2: recursion continues. */
4767 /* (1.2) A recursive call to find the relevant memtag is required. */
4768 case INDIRECT_REF:
4769 symbl = TREE_OPERAND (memref, 0);
4770 break; /* For recursive call. */
4771
4772 case COMPONENT_REF:
4773 /* Could have recorded more accurate information -
4774 i.e, the actual FIELD_DECL that is being referenced -
4775 but later passes expect VAR_DECL as the nmt. */
4776 /* Fall through. */
4777
4778 case ADDR_EXPR:
4779 symbl = STMT_VINFO_VECT_DR_BASE (stmt_info);
4780 break; /* For recursive call. */
4781
4782 case PLUS_EXPR:
4783 case MINUS_EXPR:
4784 /* Although DR exists, we have to call the function recursively to
4785 build MEMTAG for such expression. This is handled below. */
4786 oprnd0 = TREE_OPERAND (memref, 0);
4787 oprnd1 = TREE_OPERAND (memref, 1);
4788
4789 STRIP_NOPS (oprnd1);
4790 /* Supported plus/minus expressions are of the form
4791 {address_base + offset}, such that address_base is of type
4792 POINTER/ARRAY, and offset is either an INTEGER_CST of type POINTER,
4793 or it's not of type POINTER/ARRAY.
4794 TODO: swap operands if {offset + address_base}. */
4795 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
4796 && TREE_CODE (oprnd1) != INTEGER_CST)
4797 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
4798 return NULL_TREE;
4799
4800 symbl = oprnd0;
4801 break; /* For recursive call. */
4802
4803 default:
4804 return NULL_TREE;
4805 }
4806 }
4807 else
4808 {
4809 /* Category 1: recursion begins. */
4810 /* (2) A DR does not exist yet and must be built, followed by a
4811 recursive call to get the relevant memtag for MEMREF. */
4812
4813 switch (TREE_CODE (memref))
4814 {
4815 case INDIRECT_REF:
4816 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
4817 if (!new_dr)
4818 return NULL_TREE;
4819 *dr = new_dr;
4820 symbl = DR_BASE_NAME (new_dr);
4821 ref_to_be_analyzed = DR_BASE_NAME (new_dr);
4822 break;
4823
4824 case ARRAY_REF:
4825 new_dr = analyze_array (stmt, memref, is_read);
4826 *dr = new_dr;
4827 symbl = DR_BASE_NAME (new_dr);
4828 ref_to_be_analyzed = memref;
4829 break;
4830
4831 default:
4832 /* TODO: Support data-refs of form a[i].p for unions and single
4833 field structures. */
4834 return NULL_TREE;
4835 }
4836
4837 offset = size_zero_node;
4838 misalign = size_zero_node;
4839 step = size_zero_node;
4840
4841 /* Analyze data-ref, find its base, initial offset from the base, step,
4842 and alignment. */
4843 dr_base = vect_get_base_and_offset (new_dr, ref_to_be_analyzed,
4844 vectype, loop_vinfo, &offset,
4845 &misalign, &step, &base_aligned_p);
4846 if (!dr_base)
4847 return NULL_TREE;
4848
4849 /* Initialize information according to above analysis. */
4850 /* Since offset and step of a pointer can be also set in
4851 vect_analyze_pointer_ref_access, we combine the values here. */
4852 if (STMT_VINFO_VECT_INIT_OFFSET (stmt_info))
4853 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) =
4854 fold (build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
4855 STMT_VINFO_VECT_INIT_OFFSET (stmt_info)));
4856 else
4857 STMT_VINFO_VECT_INIT_OFFSET (stmt_info) = offset;
4858
4859 if (step && STMT_VINFO_VECT_STEP (stmt_info))
4860 STMT_VINFO_VECT_STEP (stmt_info) =
4861 size_binop (PLUS_EXPR, step, STMT_VINFO_VECT_STEP (stmt_info));
4862 else
4863 STMT_VINFO_VECT_STEP (stmt_info) = step;
4864
4865 STMT_VINFO_VECT_BASE_ALIGNED_P (stmt_info) = base_aligned_p;
4866 STMT_VINFO_VECT_MISALIGNMENT (stmt_info) = misalign;
4867 STMT_VINFO_VECT_DR_BASE (stmt_info) = dr_base;
4868 }
4869
4870 if (!symbl)
4871 return NULL_TREE;
4872 /* Recursive call to retrieve the relevant memtag. */
4873 tag = vect_get_memtag_and_dr (symbl, stmt, is_read, loop_vinfo, vectype, dr);
4874 return tag;
4875 }
4876
4877
4878
4879 /* Function vect_analyze_data_refs.
4880
4881 Find all the data references in the loop.
4882
4883 The general structure of the analysis of data refs in the vectorizer is as
4884 follows:
4885 1- vect_analyze_data_refs(loop):
4886 Find and analyze all data-refs in the loop:
4887 foreach ref
4888 ref_stmt.memtag = vect_get_memtag_and_dr (ref)
4889 1.1- vect_get_memtag_and_dr(ref):
4890 Analyze ref, and build a DR (data_referece struct) for it;
4891 call vect_get_base_and_offset to compute base, initial_offset,
4892 step and alignment. Set ref_stmt.base, ref_stmt.initial_offset,
4893 ref_stmt.alignment, and ref_stmt.step accordingly.
4894 1.1.1- vect_get_base_and_offset():
4895 Calculate base, initial_offset, step and alignment.
4896 For ARRAY_REFs and COMPONENT_REFs use call get_inner_reference.
4897 2- vect_analyze_dependences(): apply dependence testing using ref_stmt.DR
4898 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4899 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4900
4901 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
4902 which base is really an array (not a pointer) and which alignment
4903 can be forced. This restriction will be relaxed. */
4904
4905 static bool
4906 vect_analyze_data_refs (loop_vec_info loop_vinfo)
4907 {
4908 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4909 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4910 int nbbs = loop->num_nodes;
4911 block_stmt_iterator si;
4912 int j;
4913 struct data_reference *dr;
4914
4915 if (vect_debug_details (NULL))
4916 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
4917
4918 for (j = 0; j < nbbs; j++)
4919 {
4920 basic_block bb = bbs[j];
4921 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
4922 {
4923 bool is_read = false;
4924 tree stmt = bsi_stmt (si);
4925 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4926 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
4927 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
4928 vuse_optype vuses = STMT_VUSE_OPS (stmt);
4929 varray_type *datarefs = NULL;
4930 int nvuses, nv_may_defs, nv_must_defs;
4931 tree memref = NULL;
4932 tree symbl;
4933 tree scalar_type, vectype;
4934
4935 /* Assumption: there exists a data-ref in stmt, if and only if
4936 it has vuses/vdefs. */
4937
4938 if (!vuses && !v_may_defs && !v_must_defs)
4939 continue;
4940
4941 nvuses = NUM_VUSES (vuses);
4942 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
4943 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
4944
4945 if (nvuses && (nv_may_defs || nv_must_defs))
4946 {
4947 if (vect_debug_details (NULL))
4948 {
4949 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
4950 print_generic_expr (dump_file, stmt, TDF_SLIM);
4951 }
4952 return false;
4953 }
4954
4955 if (TREE_CODE (stmt) != MODIFY_EXPR)
4956 {
4957 if (vect_debug_details (NULL))
4958 {
4959 fprintf (dump_file, "unexpected vops in stmt: ");
4960 print_generic_expr (dump_file, stmt, TDF_SLIM);
4961 }
4962 return false;
4963 }
4964
4965 if (vuses)
4966 {
4967 memref = TREE_OPERAND (stmt, 1);
4968 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
4969 is_read = true;
4970 }
4971 else /* vdefs */
4972 {
4973 memref = TREE_OPERAND (stmt, 0);
4974 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
4975 is_read = false;
4976 }
4977
4978 scalar_type = TREE_TYPE (memref);
4979 vectype = get_vectype_for_scalar_type (scalar_type);
4980 if (!vectype)
4981 {
4982 if (vect_debug_details (NULL))
4983 {
4984 fprintf (dump_file, "no vectype for stmt: ");
4985 print_generic_expr (dump_file, stmt, TDF_SLIM);
4986 fprintf (dump_file, " scalar_type: ");
4987 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
4988 }
4989 /* It is not possible to vectorize this data reference. */
4990 return false;
4991 }
4992 /* Analyze MEMREF. If it is of a supported form, build data_reference
4993 struct for it (DR) and find memtag for aliasing purposes. */
4994 dr = NULL;
4995 symbl = vect_get_memtag_and_dr (memref, stmt, is_read, loop_vinfo,
4996 vectype, &dr);
4997 if (!symbl)
4998 {
4999 if (vect_debug_stats (loop) || vect_debug_details (loop))
5000 {
5001 fprintf (dump_file, "not vectorized: unhandled data ref: ");
5002 print_generic_expr (dump_file, stmt, TDF_SLIM);
5003 }
5004 return false;
5005 }
5006 STMT_VINFO_MEMTAG (stmt_info) = symbl;
5007 STMT_VINFO_VECTYPE (stmt_info) = vectype;
5008 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
5009 STMT_VINFO_DATA_REF (stmt_info) = dr;
5010 }
5011 }
5012
5013 return true;
5014 }
5015
5016
5017 /* Utility functions used by vect_mark_stmts_to_be_vectorized. */
5018
5019 /* Function vect_mark_relevant.
5020
5021 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
5022
5023 static void
5024 vect_mark_relevant (varray_type *worklist, tree stmt)
5025 {
5026 stmt_vec_info stmt_info;
5027
5028 if (vect_debug_details (NULL))
5029 fprintf (dump_file, "mark relevant.");
5030
5031 if (TREE_CODE (stmt) == PHI_NODE)
5032 {
5033 VARRAY_PUSH_TREE (*worklist, stmt);
5034 return;
5035 }
5036
5037 stmt_info = vinfo_for_stmt (stmt);
5038
5039 if (!stmt_info)
5040 {
5041 if (vect_debug_details (NULL))
5042 {
5043 fprintf (dump_file, "mark relevant: no stmt info!!.");
5044 print_generic_expr (dump_file, stmt, TDF_SLIM);
5045 }
5046 return;
5047 }
5048
5049 if (STMT_VINFO_RELEVANT_P (stmt_info))
5050 {
5051 if (vect_debug_details (NULL))
5052 fprintf (dump_file, "already marked relevant.");
5053 return;
5054 }
5055
5056 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
5057 VARRAY_PUSH_TREE (*worklist, stmt);
5058 }
5059
5060
5061 /* Function vect_stmt_relevant_p.
5062
5063 Return true if STMT in loop that is represented by LOOP_VINFO is
5064 "relevant for vectorization".
5065
5066 A stmt is considered "relevant for vectorization" if:
5067 - it has uses outside the loop.
5068 - it has vdefs (it alters memory).
5069 - control stmts in the loop (except for the exit condition).
5070
5071 CHECKME: what other side effects would the vectorizer allow? */
5072
5073 static bool
5074 vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
5075 {
5076 v_may_def_optype v_may_defs;
5077 v_must_def_optype v_must_defs;
5078 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5079 int i;
5080 dataflow_t df;
5081 int num_uses;
5082
5083 /* cond stmt other than loop exit cond. */
5084 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
5085 return true;
5086
5087 /* changing memory. */
5088 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5089 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
5090 if (v_may_defs || v_must_defs)
5091 {
5092 if (vect_debug_details (NULL))
5093 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
5094 return true;
5095 }
5096
5097 /* uses outside the loop. */
5098 df = get_immediate_uses (stmt);
5099 num_uses = num_immediate_uses (df);
5100 for (i = 0; i < num_uses; i++)
5101 {
5102 tree use = immediate_use (df, i);
5103 basic_block bb = bb_for_stmt (use);
5104 if (!flow_bb_inside_loop_p (loop, bb))
5105 {
5106 if (vect_debug_details (NULL))
5107 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
5108 return true;
5109 }
5110 }
5111
5112 return false;
5113 }
5114
5115
5116 /* Function vect_mark_stmts_to_be_vectorized.
5117
5118 Not all stmts in the loop need to be vectorized. For example:
5119
5120 for i...
5121 for j...
5122 1. T0 = i + j
5123 2. T1 = a[T0]
5124
5125 3. j = j + 1
5126
5127 Stmt 1 and 3 do not need to be vectorized, because loop control and
5128 addressing of vectorized data-refs are handled differently.
5129
5130 This pass detects such stmts. */
5131
5132 static bool
5133 vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
5134 {
5135 varray_type worklist;
5136 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5137 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5138 unsigned int nbbs = loop->num_nodes;
5139 block_stmt_iterator si;
5140 tree stmt;
5141 stmt_ann_t ann;
5142 unsigned int i;
5143 int j;
5144 use_optype use_ops;
5145 stmt_vec_info stmt_info;
5146
5147 if (vect_debug_details (NULL))
5148 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
5149
5150 VARRAY_TREE_INIT (worklist, 64, "work list");
5151
5152 /* 1. Init worklist. */
5153
5154 for (i = 0; i < nbbs; i++)
5155 {
5156 basic_block bb = bbs[i];
5157 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5158 {
5159 stmt = bsi_stmt (si);
5160
5161 if (vect_debug_details (NULL))
5162 {
5163 fprintf (dump_file, "init: stmt relevant? ");
5164 print_generic_expr (dump_file, stmt, TDF_SLIM);
5165 }
5166
5167 stmt_info = vinfo_for_stmt (stmt);
5168 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
5169
5170 if (vect_stmt_relevant_p (stmt, loop_vinfo))
5171 vect_mark_relevant (&worklist, stmt);
5172 }
5173 }
5174
5175
5176 /* 2. Process_worklist */
5177
5178 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
5179 {
5180 stmt = VARRAY_TOP_TREE (worklist);
5181 VARRAY_POP (worklist);
5182
5183 if (vect_debug_details (NULL))
5184 {
5185 fprintf (dump_file, "worklist: examine stmt: ");
5186 print_generic_expr (dump_file, stmt, TDF_SLIM);
5187 }
5188
5189 /* Examine the USES in this statement. Mark all the statements which
5190 feed this statement's uses as "relevant", unless the USE is used as
5191 an array index. */
5192
5193 if (TREE_CODE (stmt) == PHI_NODE)
5194 {
5195 /* follow the def-use chain inside the loop. */
5196 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
5197 {
5198 tree arg = PHI_ARG_DEF (stmt, j);
5199 tree def_stmt = NULL_TREE;
5200 basic_block bb;
5201 if (!vect_is_simple_use (arg, loop, &def_stmt))
5202 {
5203 if (vect_debug_details (NULL))
5204 fprintf (dump_file, "worklist: unsupported use.");
5205 varray_clear (worklist);
5206 return false;
5207 }
5208 if (!def_stmt)
5209 continue;
5210
5211 if (vect_debug_details (NULL))
5212 {
5213 fprintf (dump_file, "worklist: def_stmt: ");
5214 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
5215 }
5216
5217 bb = bb_for_stmt (def_stmt);
5218 if (flow_bb_inside_loop_p (loop, bb))
5219 vect_mark_relevant (&worklist, def_stmt);
5220 }
5221 }
5222
5223 ann = stmt_ann (stmt);
5224 use_ops = USE_OPS (ann);
5225
5226 for (i = 0; i < NUM_USES (use_ops); i++)
5227 {
5228 tree use = USE_OP (use_ops, i);
5229
5230 /* We are only interested in uses that need to be vectorized. Uses
5231 that are used for address computation are not considered relevant.
5232 */
5233 if (exist_non_indexing_operands_for_use_p (use, stmt))
5234 {
5235 tree def_stmt = NULL_TREE;
5236 basic_block bb;
5237 if (!vect_is_simple_use (use, loop, &def_stmt))
5238 {
5239 if (vect_debug_details (NULL))
5240 fprintf (dump_file, "worklist: unsupported use.");
5241 varray_clear (worklist);
5242 return false;
5243 }
5244
5245 if (!def_stmt)
5246 continue;
5247
5248 if (vect_debug_details (NULL))
5249 {
5250 fprintf (dump_file, "worklist: examine use %d: ", i);
5251 print_generic_expr (dump_file, use, TDF_SLIM);
5252 }
5253
5254 bb = bb_for_stmt (def_stmt);
5255 if (flow_bb_inside_loop_p (loop, bb))
5256 vect_mark_relevant (&worklist, def_stmt);
5257 }
5258 }
5259 } /* while worklist */
5260
5261 varray_clear (worklist);
5262 return true;
5263 }
5264
5265
5266 /* Function vect_can_advance_ivs_p
5267
5268 In case the number of iterations that LOOP iterates in unknown at compile
5269 time, an epilog loop will be generated, and the loop induction variables
5270 (IVs) will be "advanced" to the value they are supposed to take just before
5271 the epilog loop. Here we check that the access function of the loop IVs
5272 and the expression that represents the loop bound are simple enough.
5273 These restrictions will be relaxed in the future. */
5274
5275 static bool
5276 vect_can_advance_ivs_p (struct loop *loop)
5277 {
5278 basic_block bb = loop->header;
5279 tree phi;
5280
5281 /* Analyze phi functions of the loop header. */
5282
5283 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
5284 {
5285 tree access_fn = NULL;
5286 tree evolution_part;
5287
5288 if (vect_debug_details (NULL))
5289 {
5290 fprintf (dump_file, "Analyze phi: ");
5291 print_generic_expr (dump_file, phi, TDF_SLIM);
5292 }
5293
5294 /* Skip virtual phi's. The data dependences that are associated with
5295 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
5296
5297 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
5298 {
5299 if (vect_debug_details (NULL))
5300 fprintf (dump_file, "virtual phi. skip.");
5301 continue;
5302 }
5303
5304 /* Analyze the evolution function. */
5305
5306 access_fn = instantiate_parameters
5307 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi)));
5308
5309 if (!access_fn)
5310 {
5311 if (vect_debug_details (NULL))
5312 fprintf (dump_file, "No Access function.");
5313 return false;
5314 }
5315
5316 if (vect_debug_details (NULL))
5317 {
5318 fprintf (dump_file, "Access function of PHI: ");
5319 print_generic_expr (dump_file, access_fn, TDF_SLIM);
5320 }
5321
5322 evolution_part = evolution_part_in_loop_num (access_fn, loop->num);
5323
5324 if (evolution_part == NULL_TREE)
5325 return false;
5326
5327 /* FORNOW: We do not transform initial conditions of IVs
5328 which evolution functions are a polynomial of degree >= 2. */
5329
5330 if (tree_is_chrec (evolution_part))
5331 return false;
5332 }
5333
5334 return true;
5335 }
5336
5337
5338 /* Function vect_get_loop_niters.
5339
5340 Determine how many iterations the loop is executed.
5341 If an expression that represents the number of iterations
5342 can be constructed, place it in NUMBER_OF_ITERATIONS.
5343 Return the loop exit condition. */
5344
5345 static tree
5346 vect_get_loop_niters (struct loop *loop, tree *number_of_iterations)
5347 {
5348 tree niters;
5349
5350 if (vect_debug_details (NULL))
5351 fprintf (dump_file, "\n<<get_loop_niters>>\n");
5352
5353 niters = number_of_iterations_in_loop (loop);
5354
5355 if (niters != NULL_TREE
5356 && niters != chrec_dont_know)
5357 {
5358 *number_of_iterations = niters;
5359
5360 if (vect_debug_details (NULL))
5361 {
5362 fprintf (dump_file, "==> get_loop_niters:" );
5363 print_generic_expr (dump_file, *number_of_iterations, TDF_SLIM);
5364 }
5365 }
5366
5367 return get_loop_exit_condition (loop);
5368 }
5369
5370
5371 /* Function vect_analyze_loop_form.
5372
5373 Verify the following restrictions (some may be relaxed in the future):
5374 - it's an inner-most loop
5375 - number of BBs = 2 (which are the loop header and the latch)
5376 - the loop has a pre-header
5377 - the loop has a single entry and exit
5378 - the loop exit condition is simple enough, and the number of iterations
5379 can be analyzed (a countable loop). */
5380
5381 static loop_vec_info
5382 vect_analyze_loop_form (struct loop *loop)
5383 {
5384 loop_vec_info loop_vinfo;
5385 tree loop_cond;
5386 tree number_of_iterations = NULL;
5387 bool rescan = false;
5388
5389 if (vect_debug_details (loop))
5390 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
5391
5392 if (loop->inner
5393 || !loop->single_exit
5394 || loop->num_nodes != 2
5395 || EDGE_COUNT (loop->header->preds) != 2
5396 || loop->num_entries != 1)
5397 {
5398 if (vect_debug_stats (loop) || vect_debug_details (loop))
5399 {
5400 fprintf (dump_file, "not vectorized: bad loop form. ");
5401 if (loop->inner)
5402 fprintf (dump_file, "nested loop.");
5403 else if (!loop->single_exit)
5404 fprintf (dump_file, "multiple exits.");
5405 else if (loop->num_nodes != 2)
5406 fprintf (dump_file, "too many BBs in loop.");
5407 else if (EDGE_COUNT (loop->header->preds) != 2)
5408 fprintf (dump_file, "too many incoming edges.");
5409 else if (loop->num_entries != 1)
5410 fprintf (dump_file, "too many entries.");
5411 }
5412
5413 return NULL;
5414 }
5415
5416 /* We assume that the loop exit condition is at the end of the loop. i.e,
5417 that the loop is represented as a do-while (with a proper if-guard
5418 before the loop if needed), where the loop header contains all the
5419 executable statements, and the latch is empty. */
5420 if (!empty_block_p (loop->latch))
5421 {
5422 if (vect_debug_stats (loop) || vect_debug_details (loop))
5423 fprintf (dump_file, "not vectorized: unexpectd loop form.");
5424 return NULL;
5425 }
5426
5427 /* Make sure we have a preheader basic block. */
5428 if (!loop->pre_header)
5429 {
5430 rescan = true;
5431 loop_split_edge_with (loop_preheader_edge (loop), NULL);
5432 }
5433
5434 /* Make sure there exists a single-predecessor exit bb: */
5435 if (EDGE_COUNT (loop->exit_edges[0]->dest->preds) != 1)
5436 {
5437 rescan = true;
5438 loop_split_edge_with (loop->exit_edges[0], NULL);
5439 }
5440
5441 if (rescan)
5442 {
5443 flow_loop_scan (loop, LOOP_ALL);
5444 /* Flow loop scan does not update loop->single_exit field. */
5445 loop->single_exit = loop->exit_edges[0];
5446 }
5447
5448 if (empty_block_p (loop->header))
5449 {
5450 if (vect_debug_stats (loop) || vect_debug_details (loop))
5451 fprintf (dump_file, "not vectorized: empty loop.");
5452 return NULL;
5453 }
5454
5455 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
5456 if (!loop_cond)
5457 {
5458 if (vect_debug_stats (loop) || vect_debug_details (loop))
5459 fprintf (dump_file, "not vectorized: complicated exit condition.");
5460 return NULL;
5461 }
5462
5463 if (!number_of_iterations)
5464 {
5465 if (vect_debug_stats (loop) || vect_debug_details (loop))
5466 fprintf (dump_file,
5467 "not vectorized: number of iterations cannot be computed.");
5468 return NULL;
5469 }
5470
5471 if (chrec_contains_undetermined (number_of_iterations))
5472 {
5473 if (vect_debug_details (NULL))
5474 fprintf (dump_file, "Infinite number of iterations.");
5475 return false;
5476 }
5477
5478 loop_vinfo = new_loop_vec_info (loop);
5479 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
5480
5481 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
5482 {
5483 if (vect_debug_details (loop))
5484 {
5485 fprintf (dump_file, "loop bound unknown.\n");
5486 fprintf (dump_file, "Symbolic number of iterations is ");
5487 print_generic_expr (dump_file, number_of_iterations, TDF_DETAILS);
5488 }
5489 }
5490 else
5491 if (LOOP_VINFO_INT_NITERS (loop_vinfo) == 0)
5492 {
5493 if (vect_debug_stats (loop) || vect_debug_details (loop))
5494 fprintf (dump_file, "not vectorized: number of iterations = 0.");
5495 return NULL;
5496 }
5497
5498 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
5499
5500 return loop_vinfo;
5501 }
5502
5503
5504 /* Function vect_analyze_loop.
5505
5506 Apply a set of analyses on LOOP, and create a loop_vec_info struct
5507 for it. The different analyses will record information in the
5508 loop_vec_info struct. */
5509
5510 static loop_vec_info
5511 vect_analyze_loop (struct loop *loop)
5512 {
5513 bool ok;
5514 loop_vec_info loop_vinfo;
5515
5516 if (vect_debug_details (NULL))
5517 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
5518
5519 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
5520
5521 loop_vinfo = vect_analyze_loop_form (loop);
5522 if (!loop_vinfo)
5523 {
5524 if (vect_debug_details (loop))
5525 fprintf (dump_file, "bad loop form.");
5526 return NULL;
5527 }
5528
5529 /* Find all data references in the loop (which correspond to vdefs/vuses)
5530 and analyze their evolution in the loop.
5531
5532 FORNOW: Handle only simple, array references, which
5533 alignment can be forced, and aligned pointer-references. */
5534
5535 ok = vect_analyze_data_refs (loop_vinfo);
5536 if (!ok)
5537 {
5538 if (vect_debug_details (loop))
5539 fprintf (dump_file, "bad data references.");
5540 destroy_loop_vec_info (loop_vinfo);
5541 return NULL;
5542 }
5543
5544 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
5545
5546 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
5547 if (!ok)
5548 {
5549 if (vect_debug_details (loop))
5550 fprintf (dump_file, "unexpected pattern.");
5551 if (vect_debug_details (loop))
5552 fprintf (dump_file, "not vectorized: unexpected pattern.");
5553 destroy_loop_vec_info (loop_vinfo);
5554 return NULL;
5555 }
5556
5557 /* Check that all cross-iteration scalar data-flow cycles are OK.
5558 Cross-iteration cycles caused by virtual phis are analyzed separately. */
5559
5560 ok = vect_analyze_scalar_cycles (loop_vinfo);
5561 if (!ok)
5562 {
5563 if (vect_debug_details (loop))
5564 fprintf (dump_file, "bad scalar cycle.");
5565 destroy_loop_vec_info (loop_vinfo);
5566 return NULL;
5567 }
5568
5569 /* Analyze data dependences between the data-refs in the loop.
5570 FORNOW: fail at the first data dependence that we encounter. */
5571
5572 ok = vect_analyze_data_ref_dependences (loop_vinfo);
5573 if (!ok)
5574 {
5575 if (vect_debug_details (loop))
5576 fprintf (dump_file, "bad data dependence.");
5577 destroy_loop_vec_info (loop_vinfo);
5578 return NULL;
5579 }
5580
5581 /* Analyze the access patterns of the data-refs in the loop (consecutive,
5582 complex, etc.). FORNOW: Only handle consecutive access pattern. */
5583
5584 ok = vect_analyze_data_ref_accesses (loop_vinfo);
5585 if (!ok)
5586 {
5587 if (vect_debug_details (loop))
5588 fprintf (dump_file, "bad data access.");
5589 destroy_loop_vec_info (loop_vinfo);
5590 return NULL;
5591 }
5592
5593 /* Analyze the alignment of the data-refs in the loop.
5594 FORNOW: Only aligned accesses are handled. */
5595
5596 ok = vect_analyze_data_refs_alignment (loop_vinfo);
5597 if (!ok)
5598 {
5599 if (vect_debug_details (loop))
5600 fprintf (dump_file, "bad data alignment.");
5601 destroy_loop_vec_info (loop_vinfo);
5602 return NULL;
5603 }
5604
5605 /* Scan all the operations in the loop and make sure they are
5606 vectorizable. */
5607
5608 ok = vect_analyze_operations (loop_vinfo);
5609 if (!ok)
5610 {
5611 if (vect_debug_details (loop))
5612 fprintf (dump_file, "bad operation or unsupported loop bound.");
5613 destroy_loop_vec_info (loop_vinfo);
5614 return NULL;
5615 }
5616
5617 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
5618
5619 return loop_vinfo;
5620 }
5621
5622
5623 /* Function need_imm_uses_for.
5624
5625 Return whether we ought to include information for 'var'
5626 when calculating immediate uses. For this pass we only want use
5627 information for non-virtual variables. */
5628
5629 static bool
5630 need_imm_uses_for (tree var)
5631 {
5632 return is_gimple_reg (var);
5633 }
5634
5635
5636 /* Function vectorize_loops.
5637
5638 Entry Point to loop vectorization phase. */
5639
5640 void
5641 vectorize_loops (struct loops *loops)
5642 {
5643 unsigned int i, loops_num;
5644 unsigned int num_vectorized_loops = 0;
5645
5646 /* Does the target support SIMD? */
5647 /* FORNOW: until more sophisticated machine modelling is in place. */
5648 if (!UNITS_PER_SIMD_WORD)
5649 {
5650 if (vect_debug_details (NULL))
5651 fprintf (dump_file, "vectorizer: target vector size is not defined.");
5652 return;
5653 }
5654
5655 #ifdef ENABLE_CHECKING
5656 verify_loop_closed_ssa ();
5657 #endif
5658
5659 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
5660
5661 /* ----------- Analyze loops. ----------- */
5662
5663 /* If some loop was duplicated, it gets bigger number
5664 than all previously defined loops. This fact allows us to run
5665 only over initial loops skipping newly generated ones. */
5666 loops_num = loops->num;
5667 for (i = 1; i < loops_num; i++)
5668 {
5669 loop_vec_info loop_vinfo;
5670 struct loop *loop = loops->parray[i];
5671
5672 if (!loop)
5673 continue;
5674
5675 loop_vinfo = vect_analyze_loop (loop);
5676 loop->aux = loop_vinfo;
5677
5678 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
5679 continue;
5680
5681 vect_transform_loop (loop_vinfo, loops);
5682 num_vectorized_loops++;
5683 }
5684
5685 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
5686 fprintf (dump_file, "\nvectorized %u loops in function.\n",
5687 num_vectorized_loops);
5688
5689 /* ----------- Finalize. ----------- */
5690
5691 free_df ();
5692 for (i = 1; i < loops_num; i++)
5693 {
5694 struct loop *loop = loops->parray[i];
5695 loop_vec_info loop_vinfo;
5696
5697 if (!loop)
5698 continue;
5699 loop_vinfo = loop->aux;
5700 destroy_loop_vec_info (loop_vinfo);
5701 loop->aux = NULL;
5702 }
5703
5704 rewrite_into_ssa (false);
5705 rewrite_into_loop_closed_ssa (); /* FORNOW */
5706 bitmap_clear (vars_to_rename);
5707 }