]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vectorizer.c
tree.def (ALIGN_INDIRECT_REF, [...]): New tree-codes.
[thirdparty/gcc.git] / gcc / tree-vectorizer.c
CommitLineData
79fe1b3b
DN
1/* Loop Vectorization
2 Copyright (C) 2003, 2004 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9Software Foundation; either version 2, or (at your option) any later
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
18along with GCC; see the file COPYING. If not, write to the Free
19Software Foundation, 59 Temple Place - Suite 330, Boston, MA
2002111-1307, USA. */
21
22/* Loop Vectorization Pass.
23
24 This pass tries to vectorize loops. This first implementation focuses on
25 simple inner-most loops, with no conditional control flow, and a set of
26 simple operations which vector form can be expressed using existing
27 tree codes (PLUS, MULT etc).
28
29 For example, the vectorizer transforms the following simple loop:
30
31 short a[N]; short b[N]; short c[N]; int i;
32
33 for (i=0; i<N; i++){
34 a[i] = b[i] + c[i];
35 }
36
37 as if it was manually vectorized by rewriting the source code into:
38
39 typedef int __attribute__((mode(V8HI))) v8hi;
40 short a[N]; short b[N]; short c[N]; int i;
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c;
42 v8hi va, vb, vc;
43
44 for (i=0; i<N/8; i++){
45 vb = pb[i];
46 vc = pc[i];
47 va = vb + vc;
48 pa[i] = va;
49 }
50
51 The main entry to this pass is vectorize_loops(), in which
52 the vectorizer applies a set of analyses on a given set of loops,
53 followed by the actual vectorization transformation for the loops that
54 had successfully passed the analysis phase.
55
56 Throughout this pass we make a distinction between two types of
57 data: scalars (which are represented by SSA_NAMES), and memory references
58 ("data-refs"). These two types of data require different handling both
59 during analysis and transformation. The types of data-refs that the
6775f1f3
IR
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer
62 accesses are required to have a simple (consecutive) access pattern.
79fe1b3b
DN
63
64 Analysis phase:
65 ===============
66 The driver for the analysis phase is vect_analyze_loop_nest().
67 It applies a set of analyses, some of which rely on the scalar evolution
68 analyzer (scev) developed by Sebastian Pop.
69
70 During the analysis phase the vectorizer records some information
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the
72 loop, as well as general information about the loop as a whole, which is
73 recorded in a "loop_vec_info" struct attached to each loop.
74
75 Transformation phase:
76 =====================
77 The loop transformation phase scans all the stmts in the loop, and
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in
79 the loop that needs to be vectorized. It insert the vector code sequence
80 just before the scalar stmt S, and records a pointer to the vector code
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct
82 attached to S). This pointer will be used for the vectorization of following
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory;
84 otherwise, we rely on dead code elimination for removing it.
85
86 For example, say stmt S1 was vectorized into stmt VS1:
87
88 VS1: vb = px[i];
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
90 S2: a = b;
91
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the
94 vector stmt VS1 pointed by STMT_VINFO_VEC_STMT (stmt_info (S1)). The
95 resulting sequence would be:
96
97 VS1: vb = px[i];
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1
99 VS2: va = vb;
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2
101
102 Operands that are not SSA_NAMEs, are data-refs that appear in
103 load/store operations (like 'x[i]' in S1), and are handled differently.
104
105 Target modeling:
106 =================
107 Currently the only target specific information that is used is the
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can
109 support different sizes of vectors, for now will need to specify one value
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future.
111
112 Since we only vectorize operations which vector form can be
113 expressed using existing tree codes, to verify that an operation is
114 supported, the vectorizer checks the relevant optab at the relevant
115 machine_mode (e.g, add_optab->handlers[(int) V8HImode].insn_code). If
116 the value found is CODE_FOR_nothing, then there's no target support, and
117 we can't vectorize the stmt.
118
119 For additional information on this project see:
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
121*/
122
123#include "config.h"
124#include "system.h"
125#include "coretypes.h"
126#include "tm.h"
127#include "errors.h"
128#include "ggc.h"
129#include "tree.h"
130#include "target.h"
131
132#include "rtl.h"
133#include "basic-block.h"
134#include "diagnostic.h"
135#include "tree-flow.h"
136#include "tree-dump.h"
137#include "timevar.h"
138#include "cfgloop.h"
139#include "cfglayout.h"
140#include "expr.h"
141#include "optabs.h"
142#include "tree-chrec.h"
143#include "tree-data-ref.h"
144#include "tree-scalar-evolution.h"
145#include "tree-vectorizer.h"
146#include "tree-pass.h"
147
148/* Main analysis functions. */
149static loop_vec_info vect_analyze_loop (struct loop *);
150static loop_vec_info vect_analyze_loop_form (struct loop *);
151static bool vect_analyze_data_refs (loop_vec_info);
152static bool vect_mark_stmts_to_be_vectorized (loop_vec_info);
153static bool vect_analyze_scalar_cycles (loop_vec_info);
154static bool vect_analyze_data_ref_accesses (loop_vec_info);
155static bool vect_analyze_data_refs_alignment (loop_vec_info);
156static void vect_compute_data_refs_alignment (loop_vec_info);
157static bool vect_analyze_operations (loop_vec_info);
158
159/* Main code transformation functions. */
160static void vect_transform_loop (loop_vec_info, struct loops *);
161static void vect_transform_loop_bound (loop_vec_info);
162static bool vect_transform_stmt (tree, block_stmt_iterator *);
163static bool vectorizable_load (tree, block_stmt_iterator *, tree *);
164static bool vectorizable_store (tree, block_stmt_iterator *, tree *);
165static bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
166static bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
167static void vect_align_data_ref (tree);
168static void vect_enhance_data_refs_alignment (loop_vec_info);
169
170/* Utility functions for the analyses. */
171static bool vect_is_simple_use (tree , struct loop *, tree *);
172static bool exist_non_indexing_operands_for_use_p (tree, tree);
173static bool vect_is_simple_iv_evolution (unsigned, tree, tree *, tree *, bool);
174static void vect_mark_relevant (varray_type, tree);
175static bool vect_stmt_relevant_p (tree, loop_vec_info);
176static tree vect_get_loop_niters (struct loop *, HOST_WIDE_INT *);
6775f1f3 177static bool vect_compute_data_ref_alignment
79fe1b3b
DN
178 (struct data_reference *, loop_vec_info);
179static bool vect_analyze_data_ref_access (struct data_reference *);
180static bool vect_get_first_index (tree, tree *);
181static bool vect_can_force_dr_alignment_p (tree, unsigned int);
7ccf35ed
DN
182static struct data_reference * vect_analyze_pointer_ref_access
183 (tree, tree, bool);
6775f1f3
IR
184static tree vect_get_base_and_bit_offset
185 (struct data_reference *, tree, tree, loop_vec_info, tree *, bool*);
186static struct data_reference * vect_analyze_pointer_ref_access
187 (tree, tree, bool);
188static tree vect_compute_array_base_alignment (tree, tree, tree *, tree *);
189static tree vect_compute_array_ref_alignment
190 (struct data_reference *, loop_vec_info, tree, tree *);
191static tree vect_get_ptr_offset (tree, tree, tree *);
192static tree vect_get_symbl_and_dr
193 (tree, tree, bool, loop_vec_info, struct data_reference **);
79fe1b3b
DN
194
195/* Utility functions for the code transformation. */
196static tree vect_create_destination_var (tree, tree);
7ccf35ed
DN
197static tree vect_create_data_ref_ptr
198 (tree, block_stmt_iterator *, tree, tree *, bool);
199static tree vect_create_index_for_vector_ref
200 (struct loop *, block_stmt_iterator *);
201static tree vect_create_addr_base_for_vector_ref (tree, tree *, tree);
79fe1b3b
DN
202static tree get_vectype_for_scalar_type (tree);
203static tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
204static tree vect_get_vec_def_for_operand (tree, tree);
205static tree vect_init_vector (tree, tree);
206static void vect_finish_stmt_generation
207 (tree stmt, tree vec_stmt, block_stmt_iterator *bsi);
208
209/* Utilities for creation and deletion of vec_info structs. */
210loop_vec_info new_loop_vec_info (struct loop *loop);
211void destroy_loop_vec_info (loop_vec_info);
212stmt_vec_info new_stmt_vec_info (tree stmt, struct loop *loop);
213
214static bool vect_debug_stats (struct loop *loop);
215static bool vect_debug_details (struct loop *loop);
216
217
218/* Function new_stmt_vec_info.
219
220 Create and initialize a new stmt_vec_info struct for STMT. */
221
222stmt_vec_info
223new_stmt_vec_info (tree stmt, struct loop *loop)
224{
225 stmt_vec_info res;
226 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info));
227
228 STMT_VINFO_TYPE (res) = undef_vec_info_type;
229 STMT_VINFO_STMT (res) = stmt;
230 STMT_VINFO_LOOP (res) = loop;
231 STMT_VINFO_RELEVANT_P (res) = 0;
232 STMT_VINFO_VECTYPE (res) = NULL;
233 STMT_VINFO_VEC_STMT (res) = NULL;
234 STMT_VINFO_DATA_REF (res) = NULL;
235 STMT_VINFO_MEMTAG (res) = NULL;
6775f1f3 236 STMT_VINFO_VECT_DR_BASE (res) = NULL;
79fe1b3b
DN
237
238 return res;
239}
240
241
242/* Function new_loop_vec_info.
243
244 Create and initialize a new loop_vec_info struct for LOOP, as well as
245 stmt_vec_info structs for all the stmts in LOOP. */
246
247loop_vec_info
248new_loop_vec_info (struct loop *loop)
249{
250 loop_vec_info res;
251 basic_block *bbs;
252 block_stmt_iterator si;
253 unsigned int i;
254
255 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info));
256
257 bbs = get_loop_body (loop);
258
259 /* Create stmt_info for all stmts in the loop. */
260 for (i = 0; i < loop->num_nodes; i++)
261 {
262 basic_block bb = bbs[i];
263 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
264 {
265 tree stmt = bsi_stmt (si);
266 stmt_ann_t ann;
267
268 get_stmt_operands (stmt);
269 ann = stmt_ann (stmt);
270 set_stmt_info (ann, new_stmt_vec_info (stmt, loop));
271 }
272 }
273
274 LOOP_VINFO_LOOP (res) = loop;
275 LOOP_VINFO_BBS (res) = bbs;
276 LOOP_VINFO_EXIT_COND (res) = NULL;
277 LOOP_VINFO_NITERS (res) = -1;
278 LOOP_VINFO_VECTORIZABLE_P (res) = 0;
279 LOOP_VINFO_VECT_FACTOR (res) = 0;
280 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_WRITES (res), 20,
281 "loop_write_datarefs");
282 VARRAY_GENERIC_PTR_INIT (LOOP_VINFO_DATAREF_READS (res), 20,
283 "loop_read_datarefs");
284 return res;
285}
286
287
288/* Function destroy_loop_vec_info.
289
290 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the
291 stmts in the loop. */
292
293void
294destroy_loop_vec_info (loop_vec_info loop_vinfo)
295{
296 struct loop *loop;
297 basic_block *bbs;
298 int nbbs;
299 block_stmt_iterator si;
300 int j;
301
302 if (!loop_vinfo)
303 return;
304
305 loop = LOOP_VINFO_LOOP (loop_vinfo);
306
307 bbs = LOOP_VINFO_BBS (loop_vinfo);
308 nbbs = loop->num_nodes;
309
310 for (j = 0; j < nbbs; j++)
311 {
312 basic_block bb = bbs[j];
313 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
314 {
315 tree stmt = bsi_stmt (si);
316 stmt_ann_t ann = stmt_ann (stmt);
317 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
318 free (stmt_info);
319 set_stmt_info (ann, NULL);
320 }
321 }
322
323 free (LOOP_VINFO_BBS (loop_vinfo));
324 varray_clear (LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
325 varray_clear (LOOP_VINFO_DATAREF_READS (loop_vinfo));
326
327 free (loop_vinfo);
328}
329
330
331/* Function debug_loop_stats.
332
333 For vectorization statistics dumps. */
334
335static bool
336vect_debug_stats (struct loop *loop)
337{
338 basic_block bb;
339 block_stmt_iterator si;
340 tree node = NULL_TREE;
341
342 if (!dump_file || !(dump_flags & TDF_STATS))
343 return false;
344
345 if (!loop)
346 {
347 fprintf (dump_file, "\n");
348 return true;
349 }
350
351 if (!loop->header)
352 return false;
353
354 bb = loop->header;
355
356 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
357 {
358 node = bsi_stmt (si);
359 if (node && EXPR_P (node) && EXPR_LOCUS (node))
360 break;
361 }
362
363 if (node && EXPR_P (node) && EXPR_LOCUS (node)
364 && EXPR_FILENAME (node) && EXPR_LINENO (node))
365 {
366 fprintf (dump_file, "\nloop at %s:%d: ",
367 EXPR_FILENAME (node), EXPR_LINENO (node));
368 return true;
369 }
370
371 return false;
372}
373
374
375/* Function debug_loop_details.
376
377 For vectorization debug dumps. */
378
379static bool
380vect_debug_details (struct loop *loop)
381{
382 basic_block bb;
383 block_stmt_iterator si;
384 tree node = NULL_TREE;
385
386 if (!dump_file || !(dump_flags & TDF_DETAILS))
387 return false;
388
389 if (!loop)
390 {
391 fprintf (dump_file, "\n");
392 return true;
393 }
394
395 if (!loop->header)
396 return false;
397
398 bb = loop->header;
399
400 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
401 {
402 node = bsi_stmt (si);
403 if (node && EXPR_P (node) && EXPR_LOCUS (node))
404 break;
405 }
406
407 if (node && EXPR_P (node) && EXPR_LOCUS (node)
408 && EXPR_FILENAME (node) && EXPR_LINENO (node))
409 {
410 fprintf (dump_file, "\nloop at %s:%d: ",
411 EXPR_FILENAME (node), EXPR_LINENO (node));
412 return true;
413 }
414
415 return false;
416}
417
6775f1f3
IR
418
419/* Function vect_get_ptr_offset
420
421 Compute the OFFSET modulo vector-type alignment of pointer REF in bits. */
422
423static tree
424vect_get_ptr_offset (tree ref ATTRIBUTE_UNUSED,
425 tree vectype ATTRIBUTE_UNUSED,
426 tree *offset ATTRIBUTE_UNUSED)
427{
428 /* TODO: Use alignment information. */
429 return NULL_TREE;
430}
431
432
433/* Function vect_get_base_and_bit_offset
434
435 Return the BASE of the data reference EXPR.
436 If VECTYPE is given, also compute the OFFSET from BASE in bits.
437 E.g., for EXPR a.b[i] + 4B, BASE is a, and OFFSET is the overall offset in
438 bits of 'a.b[i] + 4B' from a.
439
440 Input:
441 EXPR - the memory reference that is being analyzed
442 DR - the data_reference struct of the _original_ memory reference
443 (Note: DR_REF (DR) is not necessarily EXPR)
444 VECTYPE - the type that defines the alignment (i.e, we compute
445 alignment relative to TYPE_ALIGN(VECTYPE))
79fe1b3b 446
6775f1f3
IR
447 Output:
448 BASE (returned value) - the base of the data reference EXPR.
449 E.g, if EXPR is a.b[k].c[i][j] the returned
450 base is a.
451 OFFSET - offset of EXPR from BASE in bits
452 BASE_ALIGNED_P - indicates if BASE is aligned
453
454 If something unexpected is encountered (an unsupported form of data-ref),
455 or if VECTYPE is given but OFFSET cannot be determined:
456 then NULL_TREE is returned. */
79fe1b3b
DN
457
458static tree
6775f1f3
IR
459vect_get_base_and_bit_offset (struct data_reference *dr,
460 tree expr,
461 tree vectype,
462 loop_vec_info loop_vinfo,
463 tree *offset,
464 bool *base_aligned_p)
79fe1b3b 465{
6775f1f3
IR
466 tree this_offset = size_zero_node;
467 tree base = NULL_TREE;
468 tree next_ref;
469 tree oprnd0, oprnd1;
470 struct data_reference *array_dr;
471 enum tree_code code = TREE_CODE (expr);
472
473 *base_aligned_p = false;
79fe1b3b 474
6775f1f3 475 switch (code)
79fe1b3b 476 {
6775f1f3
IR
477 /* These cases end the recursion: */
478 case VAR_DECL:
479 *offset = size_zero_node;
480 if (vectype && DECL_ALIGN (expr) >= TYPE_ALIGN (vectype))
481 *base_aligned_p = true;
482 return expr;
483
484 case SSA_NAME:
485 if (!vectype)
486 return expr;
487
488 if (TREE_CODE (TREE_TYPE (expr)) != POINTER_TYPE)
489 return NULL_TREE;
490
491 if (TYPE_ALIGN (TREE_TYPE (TREE_TYPE (expr))) < TYPE_ALIGN (vectype))
492 {
493 base = vect_get_ptr_offset (expr, vectype, offset);
494 if (base)
495 *base_aligned_p = true;
496 }
497 else
498 {
499 *base_aligned_p = true;
500 *offset = size_zero_node;
501 base = expr;
502 }
503 return base;
504
505 case INTEGER_CST:
506 *offset = int_const_binop (MULT_EXPR, expr,
507 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
508 return expr;
509
510 /* These cases continue the recursion: */
511 case COMPONENT_REF:
512 oprnd0 = TREE_OPERAND (expr, 0);
513 oprnd1 = TREE_OPERAND (expr, 1);
79fe1b3b
DN
514
515 this_offset = bit_position (oprnd1);
6775f1f3
IR
516 if (vectype && !host_integerp (this_offset, 1))
517 return NULL_TREE;
518 next_ref = oprnd0;
519 break;
520
521 case ADDR_EXPR:
522 oprnd0 = TREE_OPERAND (expr, 0);
523 next_ref = oprnd0;
524 break;
525
526 case INDIRECT_REF:
527 oprnd0 = TREE_OPERAND (expr, 0);
528 next_ref = oprnd0;
529 break;
530
531 case ARRAY_REF:
532 if (DR_REF (dr) != expr)
533 /* Build array data_reference struct if the existing DR_REF
534 doesn't match EXPR. This happens, for example, when the
535 EXPR is *T and T is initialized to &arr[indx]. The DR struct
536 contains information on the access of T, not of arr. In order
537 to continue the analysis, we create a new DR struct that
538 describes the access of arr.
539 */
540 array_dr = analyze_array (DR_STMT (dr), expr, DR_IS_READ (dr));
541 else
542 array_dr = dr;
543
544 next_ref = vect_compute_array_ref_alignment (array_dr, loop_vinfo,
545 vectype, &this_offset);
546 if (!next_ref)
79fe1b3b 547 return NULL_TREE;
79fe1b3b 548
6775f1f3
IR
549 if (vectype &&
550 TYPE_ALIGN (TREE_TYPE (TREE_TYPE (next_ref))) >= TYPE_ALIGN (vectype))
79fe1b3b 551 {
6775f1f3
IR
552 *offset = this_offset;
553 *base_aligned_p = true;
554 return next_ref;
555 }
556 break;
79fe1b3b 557
6775f1f3
IR
558 case PLUS_EXPR:
559 case MINUS_EXPR:
560 /* In case we have a PLUS_EXPR of the form
561 (oprnd0 + oprnd1), we assume that only oprnd0 determines the base.
562 This is verified in vect_get_symbl_and_dr. */
563 oprnd0 = TREE_OPERAND (expr, 0);
564 oprnd1 = TREE_OPERAND (expr, 1);
565
566 base = vect_get_base_and_bit_offset
567 (dr, oprnd1, vectype, loop_vinfo, &this_offset, base_aligned_p);
568 if (vectype && !base)
569 return NULL_TREE;
79fe1b3b 570
6775f1f3
IR
571 next_ref = oprnd0;
572 break;
79fe1b3b 573
6775f1f3
IR
574 default:
575 return NULL_TREE;
79fe1b3b
DN
576 }
577
6775f1f3
IR
578 base = vect_get_base_and_bit_offset (dr, next_ref, vectype,
579 loop_vinfo, offset, base_aligned_p);
580
581 if (vectype && base)
582 {
583 *offset = int_const_binop (PLUS_EXPR, *offset, this_offset, 1);
584 if (!host_integerp (*offset, 1) || TREE_OVERFLOW (*offset))
585 return NULL_TREE;
586
587 if (vect_debug_details (NULL))
588 {
589 print_generic_expr (dump_file, expr, TDF_SLIM);
590 fprintf (dump_file, " --> total offset for ref: ");
591 print_generic_expr (dump_file, *offset, TDF_SLIM);
592 }
593 }
594 return base;
79fe1b3b
DN
595}
596
597
6775f1f3 598
79fe1b3b
DN
599/* Function vect_force_dr_alignment_p.
600
601 Returns whether the alignment of a DECL can be forced to be aligned
602 on ALIGNMENT bit boundary. */
603
604static bool
605vect_can_force_dr_alignment_p (tree decl, unsigned int alignment)
606{
607 if (TREE_CODE (decl) != VAR_DECL)
608 return false;
609
610 if (DECL_EXTERNAL (decl))
611 return false;
612
613 if (TREE_STATIC (decl))
614 return (alignment <= MAX_OFILE_ALIGNMENT);
615 else
7a8554ce
DN
616 /* This is not 100% correct. The absolute correct stack alignment
617 is STACK_BOUNDARY. We're supposed to hope, but not assume, that
618 PREFERRED_STACK_BOUNDARY is honored by all translation units.
619 However, until someone implements forced stack alignment, SSE
620 isn't really usable without this. */
621 return (alignment <= PREFERRED_STACK_BOUNDARY);
79fe1b3b
DN
622}
623
624
625/* Function vect_get_new_vect_var.
626
627 Returns a name for a new variable. The current naming scheme appends the
628 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
629 the name of vectorizer generated variables, and appends that to NAME if
630 provided. */
631
632static tree
633vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
634{
635 const char *prefix;
636 int prefix_len;
637 tree new_vect_var;
638
639 if (var_kind == vect_simple_var)
640 prefix = "vect_";
641 else
642 prefix = "vect_p";
643
644 prefix_len = strlen (prefix);
645
646 if (name)
647 new_vect_var = create_tmp_var (type, concat (prefix, name, NULL));
648 else
649 new_vect_var = create_tmp_var (type, prefix);
650
651 return new_vect_var;
652}
653
654
6775f1f3 655/* Function vect_create_index_for_vector_ref.
79fe1b3b
DN
656
657 Create (and return) an index variable, along with it's update chain in the
658 loop. This variable will be used to access a memory location in a vector
659 operation.
660
661 Input:
6775f1f3 662 LOOP: The loop being vectorized.
79fe1b3b
DN
663 BSI: The block_stmt_iterator where STMT is. Any new stmts created by this
664 function can be added here, or in the loop pre-header.
665
6775f1f3
IR
666 Output:
667 Return an index that will be used to index a vector array. It is expected
668 that a pointer to the first vector will be used as the base address for the
669 indexed reference.
670
671 FORNOW: we are not trying to be efficient, just creating a new index each
672 time from scratch. At this time all vector references could use the same
673 index.
674
675 TODO: create only one index to be used by all vector references. Record
676 the index in the LOOP_VINFO the first time this procedure is called and
677 return it on subsequent calls. The increment of this index must be placed
678 just before the conditional expression that ends the single block loop. */
79fe1b3b
DN
679
680static tree
6775f1f3 681vect_create_index_for_vector_ref (struct loop *loop, block_stmt_iterator *bsi)
79fe1b3b 682{
79fe1b3b 683 tree init, step;
79fe1b3b 684 tree indx_before_incr, indx_after_incr;
79fe1b3b 685
6775f1f3
IR
686 /* It is assumed that the base pointer used for vectorized access contains
687 the address of the first vector. Therefore the index used for vectorized
688 access must be initialized to zero and incremented by 1. */
79fe1b3b 689
6775f1f3
IR
690 init = integer_zero_node;
691 step = integer_one_node;
692
693 /* Assuming that bsi_insert is used with BSI_NEW_STMT */
694 create_iv (init, step, NULL_TREE, loop, bsi, false,
695 &indx_before_incr, &indx_after_incr);
79fe1b3b 696
6775f1f3
IR
697 return indx_before_incr;
698}
79fe1b3b 699
79fe1b3b 700
6775f1f3 701/* Function vect_create_addr_base_for_vector_ref.
79fe1b3b 702
6775f1f3
IR
703 Create an expression that computes the address of the first memory location
704 that will be accessed for a data reference.
79fe1b3b 705
6775f1f3
IR
706 Input:
707 STMT: The statement containing the data reference.
7ccf35ed
DN
708 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
709 OFFSET: Optional. If supplied, it is be added to the initial address.
79fe1b3b 710
6775f1f3
IR
711 Output:
712 1. Return an SSA_NAME whose value is the address of the memory location of the
713 first vector of the data reference.
714 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
715 these statement(s) which define the returned SSA_NAME.
79fe1b3b 716
6775f1f3 717 FORNOW: We are only handling array accesses with step 1. */
79fe1b3b 718
6775f1f3
IR
719static tree
720vect_create_addr_base_for_vector_ref (tree stmt,
7ccf35ed
DN
721 tree *new_stmt_list,
722 tree offset)
6775f1f3
IR
723{
724 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
725 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
726 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
727 tree data_ref_base = unshare_expr (STMT_VINFO_VECT_DR_BASE (stmt_info));
728 tree base_name = unshare_expr (DR_BASE_NAME (dr));
729 tree ref = DR_REF (dr);
730 tree data_ref_base_type = TREE_TYPE (data_ref_base);
731 tree scalar_type = TREE_TYPE (ref);
732 tree scalar_ptr_type = build_pointer_type (scalar_type);
733 tree access_fn;
734 tree init_val, step, init_oval;
735 bool ok;
736 bool is_ptr_ref, is_array_ref, is_addr_expr;
737 tree array_base;
738 tree vec_stmt;
739 tree new_temp;
740 tree array_ref;
741 tree addr_base, addr_expr;
742 tree dest, new_stmt;
79fe1b3b 743
6775f1f3
IR
744 /* Only the access function of the last index is relevant (i_n in
745 a[i_1][i_2]...[i_n]), the others correspond to loop invariants. */
746 access_fn = DR_ACCESS_FN (dr, 0);
747 ok = vect_is_simple_iv_evolution (loop->num, access_fn, &init_oval, &step, true);
748 if (!ok)
749 init_oval = integer_zero_node;
750
751 is_ptr_ref = TREE_CODE (data_ref_base_type) == POINTER_TYPE
752 && TREE_CODE (data_ref_base) == SSA_NAME;
753 is_array_ref = TREE_CODE (data_ref_base_type) == ARRAY_TYPE
754 && (TREE_CODE (data_ref_base) == VAR_DECL
755 || TREE_CODE (data_ref_base) == COMPONENT_REF
756 || TREE_CODE (data_ref_base) == ARRAY_REF);
757 is_addr_expr = TREE_CODE (data_ref_base) == ADDR_EXPR
758 || TREE_CODE (data_ref_base) == PLUS_EXPR
759 || TREE_CODE (data_ref_base) == MINUS_EXPR;
760 gcc_assert (is_ptr_ref || is_array_ref || is_addr_expr);
761
762 /** Create: &(base[init_val])
763
764 if data_ref_base is an ARRAY_TYPE:
765 base = data_ref_base
766
767 if data_ref_base is the SSA_NAME of a POINTER_TYPE:
768 base = *((scalar_array *) data_ref_base)
769 **/
770
771 if (is_array_ref)
772 array_base = data_ref_base;
773 else /* is_ptr_ref or is_addr_expr */
774 {
775 /* array_ptr = (scalar_array_ptr_type *) data_ref_base; */
776 tree scalar_array_type = build_array_type (scalar_type, 0);
777 tree scalar_array_ptr_type = build_pointer_type (scalar_array_type);
778 tree array_ptr = create_tmp_var (scalar_array_ptr_type, "array_ptr");
779 add_referenced_tmp_var (array_ptr);
780
781 dest = create_tmp_var (TREE_TYPE (data_ref_base), "dataref");
782 add_referenced_tmp_var (dest);
7ccf35ed
DN
783 data_ref_base =
784 force_gimple_operand (data_ref_base, &new_stmt, false, dest);
6775f1f3
IR
785 append_to_statement_list_force (new_stmt, new_stmt_list);
786
787 vec_stmt = fold_convert (scalar_array_ptr_type, data_ref_base);
788 vec_stmt = build2 (MODIFY_EXPR, void_type_node, array_ptr, vec_stmt);
789 new_temp = make_ssa_name (array_ptr, vec_stmt);
790 TREE_OPERAND (vec_stmt, 0) = new_temp;
791 append_to_statement_list_force (vec_stmt, new_stmt_list);
792
793 /* (*array_ptr) */
794 array_base = build_fold_indirect_ref (new_temp);
795 }
796
797 dest = create_tmp_var (TREE_TYPE (init_oval), "newinit");
798 add_referenced_tmp_var (dest);
799 init_val = force_gimple_operand (init_oval, &new_stmt, false, dest);
800 append_to_statement_list_force (new_stmt, new_stmt_list);
801
7ccf35ed
DN
802 if (offset)
803 {
804 tree tmp = create_tmp_var (TREE_TYPE (init_val), "offset");
805 add_referenced_tmp_var (tmp);
806 vec_stmt = build2 (PLUS_EXPR, TREE_TYPE (init_val), init_val, offset);
807 vec_stmt = build2 (MODIFY_EXPR, TREE_TYPE (init_val), tmp, vec_stmt);
808 init_val = make_ssa_name (tmp, vec_stmt);
809 TREE_OPERAND (vec_stmt, 0) = init_val;
810 append_to_statement_list_force (vec_stmt, new_stmt_list);
811 }
812
6775f1f3
IR
813 array_ref = build4 (ARRAY_REF, scalar_type, array_base, init_val,
814 NULL_TREE, NULL_TREE);
815 addr_base = build_fold_addr_expr (array_ref);
816
817 /* addr_expr = addr_base */
818 addr_expr = vect_get_new_vect_var (scalar_ptr_type, vect_pointer_var,
819 get_name (base_name));
820 add_referenced_tmp_var (addr_expr);
821 vec_stmt = build2 (MODIFY_EXPR, void_type_node, addr_expr, addr_base);
822 new_temp = make_ssa_name (addr_expr, vec_stmt);
823 TREE_OPERAND (vec_stmt, 0) = new_temp;
824 append_to_statement_list_force (vec_stmt, new_stmt_list);
7ccf35ed 825
6775f1f3 826 return new_temp;
79fe1b3b
DN
827}
828
829
830/* Function get_vectype_for_scalar_type.
831
832 Returns the vector type corresponding to SCALAR_TYPE as supported
833 by the target. */
834
835static tree
836get_vectype_for_scalar_type (tree scalar_type)
837{
838 enum machine_mode inner_mode = TYPE_MODE (scalar_type);
839 int nbytes = GET_MODE_SIZE (inner_mode);
840 int nunits;
6775f1f3 841 tree vectype;
79fe1b3b
DN
842
843 if (nbytes == 0)
844 return NULL_TREE;
845
846 /* FORNOW: Only a single vector size per target (UNITS_PER_SIMD_WORD)
847 is expected. */
848 nunits = UNITS_PER_SIMD_WORD / nbytes;
849
6775f1f3
IR
850 vectype = build_vector_type (scalar_type, nunits);
851 if (TYPE_MODE (vectype) == BLKmode)
852 return NULL_TREE;
853 return vectype;
79fe1b3b
DN
854}
855
856
857/* Function vect_align_data_ref.
858
859 Handle mislignment of a memory accesses.
860
861 FORNOW: Can't handle misaligned accesses.
862 Make sure that the dataref is aligned. */
863
864static void
865vect_align_data_ref (tree stmt)
866{
867 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
868 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
869
870 /* FORNOW: can't handle misaligned accesses;
871 all accesses expected to be aligned. */
1e128c5f 872 gcc_assert (aligned_access_p (dr));
79fe1b3b
DN
873}
874
875
7ccf35ed 876/* Function vect_create_data_ref_ptr.
79fe1b3b
DN
877
878 Create a memory reference expression for vector access, to be used in a
7ccf35ed
DN
879 vector load/store stmt. The reference is based on a new pointer to vector
880 type (vp).
79fe1b3b
DN
881
882 Input:
7ccf35ed
DN
883 1. STMT: a stmt that references memory. Expected to be of the form
884 MODIFY_EXPR <name, data-ref> or MODIFY_EXPR <data-ref, name>.
885 2. BSI: block_stmt_iterator where new stmts can be added.
886 3. OFFSET (optional): an offset to be added to the initial address accessed
887 by the data-ref in STMT.
888 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
889 pointing to the initial address.
79fe1b3b
DN
890
891 Output:
7ccf35ed
DN
892 1. Declare a new ptr to vector_type, and have it point to the base of the
893 data reference (initial addressed accessed by the data reference).
894 For example, for vector of type V8HI, the following code is generated:
895
896 v8hi *vp;
897 vp = (v8hi *)initial_address;
898
899 if OFFSET is not supplied:
900 initial_address = &a[init];
901 if OFFSET is supplied:
902 initial_address = &a[init + OFFSET];
903
904 Return the initial_address in INITIAL_ADDRESS.
905
906 2. Create a data-reference in the loop based on the new vector pointer vp,
907 and using a new index variable 'idx' as follows:
908
909 vp' = vp + update
910
911 where if ONLY_INIT is true:
912 update = zero
913 and otherwise
914 update = idx + vector_type_size
915
916 Return the pointer vp'.
917
79fe1b3b
DN
918
919 FORNOW: handle only aligned and consecutive accesses. */
920
921static tree
7ccf35ed
DN
922vect_create_data_ref_ptr (tree stmt, block_stmt_iterator *bsi, tree offset,
923 tree *initial_address, bool only_init)
79fe1b3b 924{
7ccf35ed 925 tree base_name;
79fe1b3b 926 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
6775f1f3
IR
927 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
928 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
79fe1b3b
DN
929 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
930 tree vect_ptr_type;
931 tree vect_ptr;
79fe1b3b 932 tree tag;
6775f1f3
IR
933 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
934 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
935 vuse_optype vuses = STMT_VUSE_OPS (stmt);
936 int nvuses, nv_may_defs, nv_must_defs;
937 int i;
938 tree new_temp;
939 tree vec_stmt;
940 tree new_stmt_list = NULL_TREE;
941 tree idx;
7ccf35ed 942 edge pe = loop_preheader_edge (loop);
6775f1f3 943 basic_block new_bb;
7ccf35ed
DN
944 tree vect_ptr_init;
945 tree vectype_size;
946 tree ptr_update;
947 tree data_ref_ptr;
79fe1b3b 948
6775f1f3 949 base_name = unshare_expr (DR_BASE_NAME (dr));
79fe1b3b
DN
950 if (vect_debug_details (NULL))
951 {
7ccf35ed 952 tree data_ref_base = base_name;
79fe1b3b
DN
953 fprintf (dump_file, "create array_ref of type: ");
954 print_generic_expr (dump_file, vectype, TDF_SLIM);
6775f1f3 955 if (TREE_CODE (data_ref_base) == VAR_DECL)
7ccf35ed 956 fprintf (dump_file, "vectorizing a one dimensional array ref: ");
6775f1f3 957 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
7ccf35ed 958 fprintf (dump_file, "vectorizing a multidimensional array ref: ");
6775f1f3 959 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
7ccf35ed 960 fprintf (dump_file, "vectorizing a record based array ref: ");
6775f1f3 961 else if (TREE_CODE (data_ref_base) == SSA_NAME)
7ccf35ed 962 fprintf (dump_file, "vectorizing a pointer ref: ");
6775f1f3 963 print_generic_expr (dump_file, base_name, TDF_SLIM);
79fe1b3b
DN
964 }
965
7ccf35ed
DN
966 /** (1) Create the new vector-pointer variable: **/
967
968 vect_ptr_type = build_pointer_type (vectype);
969 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
970 get_name (base_name));
971 add_referenced_tmp_var (vect_ptr);
972
973
974 /** (2) Handle aliasing information of the new vector-pointer: **/
975
79fe1b3b 976 tag = STMT_VINFO_MEMTAG (stmt_info);
1e128c5f 977 gcc_assert (tag);
79fe1b3b 978 get_var_ann (vect_ptr)->type_mem_tag = tag;
7ccf35ed 979
79fe1b3b 980 /* Mark for renaming all aliased variables
6775f1f3
IR
981 (i.e, the may-aliases of the type-mem-tag). */
982 nvuses = NUM_VUSES (vuses);
983 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
984 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
985 for (i = 0; i < nvuses; i++)
79fe1b3b 986 {
6775f1f3 987 tree use = VUSE_OP (vuses, i);
79fe1b3b
DN
988 if (TREE_CODE (use) == SSA_NAME)
989 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (use))->uid);
990 }
6775f1f3
IR
991 for (i = 0; i < nv_may_defs; i++)
992 {
993 tree def = V_MAY_DEF_RESULT (v_may_defs, i);
994 if (TREE_CODE (def) == SSA_NAME)
995 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
996 }
997 for (i = 0; i < nv_must_defs; i++)
998 {
999 tree def = V_MUST_DEF_OP (v_must_defs, i);
1000 if (TREE_CODE (def) == SSA_NAME)
1001 bitmap_set_bit (vars_to_rename, var_ann (SSA_NAME_VAR (def))->uid);
1002 }
79fe1b3b 1003
79fe1b3b 1004
7ccf35ed
DN
1005 /** (3) Calculate the initial address the vector-pointer, and set
1006 the vector-pointer to point to it before the loop: **/
79fe1b3b 1007
7ccf35ed
DN
1008 /* Create: (&(base[init_val+offset]) in the loop preheader. */
1009 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
1010 offset);
1011 pe = loop_preheader_edge (loop);
1012 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt_list);
1013 gcc_assert (!new_bb);
1014 *initial_address = new_temp;
1015
1016 /* Create: p = (vectype *) initial_base */
6775f1f3 1017 vec_stmt = fold_convert (vect_ptr_type, new_temp);
79fe1b3b
DN
1018 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1019 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1020 TREE_OPERAND (vec_stmt, 0) = new_temp;
7ccf35ed
DN
1021 new_bb = bsi_insert_on_edge_immediate (pe, vec_stmt);
1022 gcc_assert (!new_bb);
1023 vect_ptr_init = TREE_OPERAND (vec_stmt, 0);
1024
1025
1026 /** (4) Handle the updating of the vector-pointer inside the loop: **/
1027
1028 if (only_init) /* No update in loop is required. */
1029 return vect_ptr_init;
79fe1b3b 1030
6775f1f3 1031 idx = vect_create_index_for_vector_ref (loop, bsi);
79fe1b3b 1032
7ccf35ed
DN
1033 /* Create: update = idx * vectype_size */
1034 ptr_update = create_tmp_var (integer_type_node, "update");
1035 add_referenced_tmp_var (ptr_update);
1036 vectype_size = build_int_cst (integer_type_node,
1037 GET_MODE_SIZE (TYPE_MODE (vectype)));
1038 vec_stmt = build2 (MULT_EXPR, integer_type_node, idx, vectype_size);
1039 vec_stmt = build2 (MODIFY_EXPR, void_type_node, ptr_update, vec_stmt);
1040 new_temp = make_ssa_name (ptr_update, vec_stmt);
1041 TREE_OPERAND (vec_stmt, 0) = new_temp;
1042 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
79fe1b3b 1043
7ccf35ed
DN
1044 /* Create: data_ref_ptr = vect_ptr_init + update */
1045 vec_stmt = build2 (PLUS_EXPR, vect_ptr_type, vect_ptr_init, new_temp);
1046 vec_stmt = build2 (MODIFY_EXPR, void_type_node, vect_ptr, vec_stmt);
1047 new_temp = make_ssa_name (vect_ptr, vec_stmt);
1048 TREE_OPERAND (vec_stmt, 0) = new_temp;
1049 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1050 data_ref_ptr = TREE_OPERAND (vec_stmt, 0);
1051
1052 return data_ref_ptr;
79fe1b3b
DN
1053}
1054
1055
1056/* Function vect_create_destination_var.
1057
1058 Create a new temporary of type VECTYPE. */
1059
1060static tree
1061vect_create_destination_var (tree scalar_dest, tree vectype)
1062{
1063 tree vec_dest;
1064 const char *new_name;
1065
1e128c5f 1066 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
79fe1b3b
DN
1067
1068 new_name = get_name (scalar_dest);
1069 if (!new_name)
1070 new_name = "var_";
1071 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, new_name);
1072 add_referenced_tmp_var (vec_dest);
1073
1074 return vec_dest;
1075}
1076
1077
1078/* Function vect_init_vector.
1079
1080 Insert a new stmt (INIT_STMT) that initializes a new vector variable with
1081 the vector elements of VECTOR_VAR. Return the DEF of INIT_STMT. It will be
1082 used in the vectorization of STMT. */
1083
1084static tree
1085vect_init_vector (tree stmt, tree vector_var)
1086{
1087 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1088 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1089 tree new_var;
1090 tree init_stmt;
1091 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1092 tree vec_oprnd;
1093 edge pe;
1094 tree new_temp;
6775f1f3 1095 basic_block new_bb;
79fe1b3b
DN
1096
1097 new_var = vect_get_new_vect_var (vectype, vect_simple_var, "cst_");
1098 add_referenced_tmp_var (new_var);
1099
1100 init_stmt = build2 (MODIFY_EXPR, vectype, new_var, vector_var);
1101 new_temp = make_ssa_name (new_var, init_stmt);
1102 TREE_OPERAND (init_stmt, 0) = new_temp;
1103
1104 pe = loop_preheader_edge (loop);
6775f1f3
IR
1105 new_bb = bsi_insert_on_edge_immediate (pe, init_stmt);
1106 gcc_assert (!new_bb);
79fe1b3b
DN
1107
1108 if (vect_debug_details (NULL))
1109 {
1110 fprintf (dump_file, "created new init_stmt: ");
1111 print_generic_expr (dump_file, init_stmt, TDF_SLIM);
1112 }
1113
1114 vec_oprnd = TREE_OPERAND (init_stmt, 0);
1115 return vec_oprnd;
1116}
1117
1118
1119/* Function vect_get_vec_def_for_operand.
1120
1121 OP is an operand in STMT. This function returns a (vector) def that will be
1122 used in the vectorized stmt for STMT.
1123
1124 In the case that OP is an SSA_NAME which is defined in the loop, then
1125 STMT_VINFO_VEC_STMT of the defining stmt holds the relevant def.
1126
1127 In case OP is an invariant or constant, a new stmt that creates a vector def
1128 needs to be introduced. */
1129
1130static tree
1131vect_get_vec_def_for_operand (tree op, tree stmt)
1132{
1133 tree vec_oprnd;
1134 tree vec_stmt;
1135 tree def_stmt;
1136 stmt_vec_info def_stmt_info = NULL;
1137 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1138 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1139 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1140 struct loop *loop = STMT_VINFO_LOOP (stmt_vinfo);
1141 basic_block bb;
1142 tree vec_inv;
1143 tree t = NULL_TREE;
1144 tree def;
1145 int i;
1146
1147 if (vect_debug_details (NULL))
1148 {
1149 fprintf (dump_file, "vect_get_vec_def_for_operand: ");
1150 print_generic_expr (dump_file, op, TDF_SLIM);
1151 }
1152
1153 /** ===> Case 1: operand is a constant. **/
1154
1155 if (TREE_CODE (op) == INTEGER_CST || TREE_CODE (op) == REAL_CST)
1156 {
1157 /* Create 'vect_cst_ = {cst,cst,...,cst}' */
1158
1159 tree vec_cst;
1160 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1161 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
1162 int nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
1163 tree t = NULL_TREE;
1164 int i;
1165
1166 /* Build a tree with vector elements. */
1167 if (vect_debug_details (NULL))
1168 fprintf (dump_file, "Create vector_cst. nunits = %d", nunits);
1169
1170 for (i = nunits - 1; i >= 0; --i)
1171 {
1172 t = tree_cons (NULL_TREE, op, t);
1173 }
1174 vec_cst = build_vector (vectype, t);
1175 return vect_init_vector (stmt, vec_cst);
1176 }
1177
1e128c5f 1178 gcc_assert (TREE_CODE (op) == SSA_NAME);
79fe1b3b
DN
1179
1180 /** ===> Case 2: operand is an SSA_NAME - find the stmt that defines it. **/
1181
1182 def_stmt = SSA_NAME_DEF_STMT (op);
1183 def_stmt_info = vinfo_for_stmt (def_stmt);
1184
1185 if (vect_debug_details (NULL))
1186 {
1187 fprintf (dump_file, "vect_get_vec_def_for_operand: def_stmt: ");
1188 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1189 }
1190
1191
1192 /** ==> Case 2.1: operand is defined inside the loop. **/
1193
1194 if (def_stmt_info)
1195 {
1196 /* Get the def from the vectorized stmt. */
1197
1198 vec_stmt = STMT_VINFO_VEC_STMT (def_stmt_info);
1e128c5f 1199 gcc_assert (vec_stmt);
79fe1b3b
DN
1200 vec_oprnd = TREE_OPERAND (vec_stmt, 0);
1201 return vec_oprnd;
1202 }
1203
1204
1205 /** ==> Case 2.2: operand is defined by the loop-header phi-node -
1206 it is a reduction/induction. **/
1207
1208 bb = bb_for_stmt (def_stmt);
1209 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
1210 {
1211 if (vect_debug_details (NULL))
1212 fprintf (dump_file, "reduction/induction - unsupported.");
1e128c5f 1213 internal_error ("no support for reduction/induction"); /* FORNOW */
79fe1b3b
DN
1214 }
1215
1216
1217 /** ==> Case 2.3: operand is defined outside the loop -
1218 it is a loop invariant. */
1219
1220 switch (TREE_CODE (def_stmt))
1221 {
1222 case PHI_NODE:
1223 def = PHI_RESULT (def_stmt);
1224 break;
1225 case MODIFY_EXPR:
1226 def = TREE_OPERAND (def_stmt, 0);
1227 break;
1228 case NOP_EXPR:
1229 def = TREE_OPERAND (def_stmt, 0);
1e128c5f 1230 gcc_assert (IS_EMPTY_STMT (def_stmt));
79fe1b3b
DN
1231 def = op;
1232 break;
1233 default:
1234 if (vect_debug_details (NULL))
1235 {
1236 fprintf (dump_file, "unsupported defining stmt: ");
1237 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1238 }
1e128c5f 1239 internal_error ("unsupported defining stmt");
79fe1b3b
DN
1240 }
1241
1242 /* Build a tree with vector elements. Create 'vec_inv = {inv,inv,..,inv}' */
1243
1244 if (vect_debug_details (NULL))
1245 fprintf (dump_file, "Create vector_inv.");
1246
1247 for (i = nunits - 1; i >= 0; --i)
1248 {
1249 t = tree_cons (NULL_TREE, def, t);
1250 }
1251
1252 vec_inv = build_constructor (vectype, t);
1253 return vect_init_vector (stmt, vec_inv);
1254}
1255
1256
1257/* Function vect_finish_stmt_generation.
1258
1259 Insert a new stmt. */
1260
1261static void
1262vect_finish_stmt_generation (tree stmt, tree vec_stmt, block_stmt_iterator *bsi)
1263{
1264 bsi_insert_before (bsi, vec_stmt, BSI_SAME_STMT);
1265
1266 if (vect_debug_details (NULL))
1267 {
1268 fprintf (dump_file, "add new stmt: ");
1269 print_generic_expr (dump_file, vec_stmt, TDF_SLIM);
1270 }
1271
1272 /* Make sure bsi points to the stmt that is being vectorized. */
1273
7ccf35ed
DN
1274 /* Assumption: any stmts created for the vectorization of stmt S were
1275 inserted before S. BSI is expected to point to S or some new stmt before S. */
79fe1b3b
DN
1276
1277 while (stmt != bsi_stmt (*bsi) && !bsi_end_p (*bsi))
1278 bsi_next (bsi);
1e128c5f 1279 gcc_assert (stmt == bsi_stmt (*bsi));
79fe1b3b
DN
1280}
1281
1282
1283/* Function vectorizable_assignment.
1284
1285 Check if STMT performs an assignment (copy) that can be vectorized.
1286 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1287 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1288 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1289
1290static bool
1291vectorizable_assignment (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1292{
1293 tree vec_dest;
1294 tree scalar_dest;
1295 tree op;
1296 tree vec_oprnd;
1297 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1298 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1299 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1300 tree new_temp;
1301
1302 /* Is vectorizable assignment? */
1303
1304 if (TREE_CODE (stmt) != MODIFY_EXPR)
1305 return false;
1306
1307 scalar_dest = TREE_OPERAND (stmt, 0);
1308 if (TREE_CODE (scalar_dest) != SSA_NAME)
1309 return false;
1310
1311 op = TREE_OPERAND (stmt, 1);
1312 if (!vect_is_simple_use (op, loop, NULL))
1313 {
1314 if (vect_debug_details (NULL))
1315 fprintf (dump_file, "use not simple.");
1316 return false;
1317 }
1318
1319 if (!vec_stmt) /* transformation not required. */
1320 {
1321 STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
1322 return true;
1323 }
1324
1325 /** Trasform. **/
1326 if (vect_debug_details (NULL))
1327 fprintf (dump_file, "transform assignment.");
1328
1329 /* Handle def. */
1330 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1331
1332 /* Handle use. */
1333 op = TREE_OPERAND (stmt, 1);
1334 vec_oprnd = vect_get_vec_def_for_operand (op, stmt);
1335
1336 /* Arguments are ready. create the new vector stmt. */
1337 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, vec_oprnd);
1338 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1339 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1340 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1341
1342 return true;
1343}
1344
1345
1346/* Function vectorizable_operation.
1347
1348 Check if STMT performs a binary or unary operation that can be vectorized.
1349 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1350 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1351 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1352
1353static bool
1354vectorizable_operation (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1355{
1356 tree vec_dest;
1357 tree scalar_dest;
1358 tree operation;
1359 tree op0, op1 = NULL;
1360 tree vec_oprnd0, vec_oprnd1=NULL;
1361 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1362 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1363 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1364 int i;
1365 enum tree_code code;
1366 enum machine_mode vec_mode;
1367 tree new_temp;
1368 int op_type;
1369 tree op;
1370 optab optab;
1371
1372 /* Is STMT a vectorizable binary/unary operation? */
1373 if (TREE_CODE (stmt) != MODIFY_EXPR)
1374 return false;
1375
1376 if (TREE_CODE (TREE_OPERAND (stmt, 0)) != SSA_NAME)
1377 return false;
1378
1379 operation = TREE_OPERAND (stmt, 1);
1380 code = TREE_CODE (operation);
1381 optab = optab_for_tree_code (code, vectype);
1382
1383 /* Support only unary or binary operations. */
1384 op_type = TREE_CODE_LENGTH (code);
1385 if (op_type != unary_op && op_type != binary_op)
1386 {
1387 if (vect_debug_details (NULL))
1388 fprintf (dump_file, "num. args = %d (not unary/binary op).", op_type);
1389 return false;
1390 }
1391
1392 for (i = 0; i < op_type; i++)
1393 {
1394 op = TREE_OPERAND (operation, i);
1395 if (!vect_is_simple_use (op, loop, NULL))
1396 {
1397 if (vect_debug_details (NULL))
1398 fprintf (dump_file, "use not simple.");
1399 return false;
1400 }
1401 }
1402
1403 /* Supportable by target? */
1404 if (!optab)
1405 {
1406 if (vect_debug_details (NULL))
1407 fprintf (dump_file, "no optab.");
1408 return false;
1409 }
1410 vec_mode = TYPE_MODE (vectype);
1411 if (optab->handlers[(int) vec_mode].insn_code == CODE_FOR_nothing)
1412 {
1413 if (vect_debug_details (NULL))
1414 fprintf (dump_file, "op not supported by target.");
1415 return false;
1416 }
1417
1418 if (!vec_stmt) /* transformation not required. */
1419 {
1420 STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
1421 return true;
1422 }
1423
1424 /** Trasform. **/
1425
1426 if (vect_debug_details (NULL))
1427 fprintf (dump_file, "transform binary/unary operation.");
1428
1429 /* Handle def. */
1430 scalar_dest = TREE_OPERAND (stmt, 0);
1431 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1432
1433 /* Handle uses. */
1434 op0 = TREE_OPERAND (operation, 0);
1435 vec_oprnd0 = vect_get_vec_def_for_operand (op0, stmt);
1436
1437 if (op_type == binary_op)
1438 {
1439 op1 = TREE_OPERAND (operation, 1);
1440 vec_oprnd1 = vect_get_vec_def_for_operand (op1, stmt);
1441 }
1442
1443 /* Arguments are ready. create the new vector stmt. */
1444
1445 if (op_type == binary_op)
1446 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1447 build2 (code, vectype, vec_oprnd0, vec_oprnd1));
1448 else
1449 *vec_stmt = build2 (MODIFY_EXPR, vectype, vec_dest,
1450 build1 (code, vectype, vec_oprnd0));
1451 new_temp = make_ssa_name (vec_dest, *vec_stmt);
1452 TREE_OPERAND (*vec_stmt, 0) = new_temp;
1453 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1454
1455 return true;
1456}
1457
1458
1459/* Function vectorizable_store.
1460
1461 Check if STMT defines a non scalar data-ref (array/pointer/structure) that
1462 can be vectorized.
1463 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1464 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1465 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1466
1467static bool
1468vectorizable_store (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1469{
1470 tree scalar_dest;
1471 tree data_ref;
1472 tree op;
1473 tree vec_oprnd1;
1474 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1475 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1476 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1477 enum machine_mode vec_mode;
7ccf35ed 1478 tree dummy;
79fe1b3b
DN
1479
1480 /* Is vectorizable store? */
1481
1482 if (TREE_CODE (stmt) != MODIFY_EXPR)
1483 return false;
1484
1485 scalar_dest = TREE_OPERAND (stmt, 0);
1486 if (TREE_CODE (scalar_dest) != ARRAY_REF
1487 && TREE_CODE (scalar_dest) != INDIRECT_REF)
1488 return false;
1489
1490 op = TREE_OPERAND (stmt, 1);
1491 if (!vect_is_simple_use (op, loop, NULL))
1492 {
1493 if (vect_debug_details (NULL))
1494 fprintf (dump_file, "use not simple.");
1495 return false;
1496 }
1497
1498 vec_mode = TYPE_MODE (vectype);
1499 /* FORNOW. In some cases can vectorize even if data-type not supported
1500 (e.g. - array initialization with 0). */
1501 if (mov_optab->handlers[(int)vec_mode].insn_code == CODE_FOR_nothing)
1502 return false;
1503
1504 if (!STMT_VINFO_DATA_REF (stmt_info))
1505 return false;
1506
7ccf35ed
DN
1507 if (!aligned_access_p (STMT_VINFO_DATA_REF (stmt_info)))
1508 return false;
1509
79fe1b3b
DN
1510 if (!vec_stmt) /* transformation not required. */
1511 {
1512 STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
1513 return true;
1514 }
1515
1516 /** Trasform. **/
1517
1518 if (vect_debug_details (NULL))
1519 fprintf (dump_file, "transform store");
1520
1521 /* Handle use - get the vectorized def from the defining stmt. */
1522 vec_oprnd1 = vect_get_vec_def_for_operand (op, stmt);
1523
1524 /* Handle def. */
7ccf35ed
DN
1525 /* FORNOW: make sure the data reference is aligned. */
1526 vect_align_data_ref (stmt);
1527 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1528 data_ref = build_fold_indirect_ref (data_ref);
79fe1b3b
DN
1529
1530 /* Arguments are ready. create the new vector stmt. */
1531 *vec_stmt = build2 (MODIFY_EXPR, vectype, data_ref, vec_oprnd1);
1532 vect_finish_stmt_generation (stmt, *vec_stmt, bsi);
1533
1534 return true;
1535}
1536
1537
1538/* vectorizable_load.
1539
1540 Check if STMT reads a non scalar data-ref (array/pointer/structure) that
1541 can be vectorized.
1542 If VEC_STMT is also passed, vectorize the STMT: create a vectorized
1543 stmt to replace it, put it in VEC_STMT, and insert it at BSI.
1544 Return FALSE if not a vectorizable STMT, TRUE otherwise. */
1545
1546static bool
1547vectorizable_load (tree stmt, block_stmt_iterator *bsi, tree *vec_stmt)
1548{
1549 tree scalar_dest;
1550 tree vec_dest = NULL;
1551 tree data_ref = NULL;
1552 tree op;
1553 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7ccf35ed 1554 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
79fe1b3b
DN
1555 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1556 tree new_temp;
7ccf35ed
DN
1557 int mode;
1558 tree init_addr;
1559 tree new_stmt;
1560 tree dummy;
1561 basic_block new_bb;
1562 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
1563 edge pe = loop_preheader_edge (loop);
1564 bool software_pipeline_loads_p = false;
79fe1b3b
DN
1565
1566 /* Is vectorizable load? */
1567
1568 if (TREE_CODE (stmt) != MODIFY_EXPR)
1569 return false;
1570
1571 scalar_dest = TREE_OPERAND (stmt, 0);
1572 if (TREE_CODE (scalar_dest) != SSA_NAME)
1573 return false;
1574
1575 op = TREE_OPERAND (stmt, 1);
1576 if (TREE_CODE (op) != ARRAY_REF && TREE_CODE (op) != INDIRECT_REF)
1577 return false;
1578
1579 if (!STMT_VINFO_DATA_REF (stmt_info))
1580 return false;
1581
7ccf35ed
DN
1582 mode = (int) TYPE_MODE (vectype);
1583
79fe1b3b 1584 /* FORNOW. In some cases can vectorize even if data-type not supported
7ccf35ed
DN
1585 (e.g. - data copies). */
1586 if (mov_optab->handlers[mode].insn_code == CODE_FOR_nothing)
1587 {
1588 if (vect_debug_details (loop))
1589 fprintf (dump_file, "Aligned load, but unsupported type.");
1590 return false;
1591 }
1592
1593 if (!aligned_access_p (dr))
1594 {
1595 if (vec_realign_load_optab->handlers[mode].insn_code != CODE_FOR_nothing
1596 && (!targetm.vectorize.builtin_mask_for_load
1597 || targetm.vectorize.builtin_mask_for_load ()))
1598 software_pipeline_loads_p = true;
1599 else if (!targetm.vectorize.misaligned_mem_ok (mode))
1600 {
1601 /* Possibly unaligned access, and can't sofware pipeline the loads */
1602 if (vect_debug_details (loop))
1603 fprintf (dump_file, "Arbitrary load not supported.");
1604 return false;
1605 }
1606 }
79fe1b3b
DN
1607
1608 if (!vec_stmt) /* transformation not required. */
1609 {
1610 STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
1611 return true;
1612 }
1613
1614 /** Trasform. **/
1615
1616 if (vect_debug_details (NULL))
1617 fprintf (dump_file, "transform load.");
1618
7ccf35ed
DN
1619 if (!software_pipeline_loads_p)
1620 {
1621 /* Create:
1622 p = initial_addr;
1623 indx = 0;
1624 loop {
1625 vec_dest = *(p);
1626 indx = indx + 1;
1627 }
1628 */
1629
1630 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1631 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE, &dummy, false);
1632 if (aligned_access_p (dr))
1633 data_ref = build_fold_indirect_ref (data_ref);
1634 else
1635 {
1636 int mis = DR_MISALIGNMENT (dr);
1637 tree tmis = (mis == -1 ?
1638 integer_zero_node :
1639 build_int_cst (integer_type_node, mis));
1640 tmis = int_const_binop (MULT_EXPR, tmis,
1641 build_int_cst (integer_type_node, BITS_PER_UNIT), 1);
1642 data_ref = build2 (MISALIGNED_INDIRECT_REF, vectype, data_ref, tmis);
1643 }
1644 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1645 new_temp = make_ssa_name (vec_dest, new_stmt);
1646 TREE_OPERAND (new_stmt, 0) = new_temp;
1647 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1648 }
1649 else /* software-pipeline the loads */
1650 {
1651 /* Create:
1652 p1 = initial_addr;
1653 msq_init = *(floor(p1))
1654 p2 = initial_addr + VS - 1;
1655 magic = have_builtin ? builtin_result : initial_address;
1656 indx = 0;
1657 loop {
1658 p2' = p2 + indx * vectype_size
1659 lsq = *(floor(p2'))
1660 vec_dest = realign_load (msq, lsq, magic)
1661 indx = indx + 1;
1662 msq = lsq;
1663 }
1664 */
1665
1666 tree offset;
1667 tree magic;
1668 tree phi_stmt;
1669 tree msq_init;
1670 tree msq, lsq;
1671 tree dataref_ptr;
1672 tree params;
1673
1674 /* <1> Create msq_init = *(floor(p1)) in the loop preheader */
1675 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1676 data_ref = vect_create_data_ref_ptr (stmt, bsi, NULL_TREE,
1677 &init_addr, true);
1678 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, data_ref);
1679 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1680 new_temp = make_ssa_name (vec_dest, new_stmt);
1681 TREE_OPERAND (new_stmt, 0) = new_temp;
1682 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1683 gcc_assert (!new_bb);
1684 msq_init = TREE_OPERAND (new_stmt, 0);
1685
1686
1687 /* <2> Create lsq = *(floor(p2')) in the loop */
1688 offset = build_int_cst (integer_type_node,
1689 GET_MODE_NUNITS (TYPE_MODE (vectype)));
1690 offset = int_const_binop (MINUS_EXPR, offset, integer_one_node, 1);
1691 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1692 dataref_ptr = vect_create_data_ref_ptr (stmt, bsi, offset, &dummy, false);
1693 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, dataref_ptr);
1694 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, data_ref);
1695 new_temp = make_ssa_name (vec_dest, new_stmt);
1696 TREE_OPERAND (new_stmt, 0) = new_temp;
1697 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1698 lsq = TREE_OPERAND (new_stmt, 0);
1699
1700
1701 /* <3> */
1702 if (targetm.vectorize.builtin_mask_for_load)
1703 {
1704 /* Create permutation mask, if required, in loop preheader. */
1705 tree builtin_decl;
1706 params = build_tree_list (NULL_TREE, init_addr);
1707 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1708 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
1709 new_stmt = build_function_call_expr (builtin_decl, params);
1710 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1711 new_temp = make_ssa_name (vec_dest, new_stmt);
1712 TREE_OPERAND (new_stmt, 0) = new_temp;
1713 new_bb = bsi_insert_on_edge_immediate (pe, new_stmt);
1714 gcc_assert (!new_bb);
1715 magic = TREE_OPERAND (new_stmt, 0);
1716 }
1717 else
1718 {
1719 /* Use current address instead of init_addr for reduced reg pressure. */
1720 magic = dataref_ptr;
1721 }
79fe1b3b 1722
79fe1b3b 1723
7ccf35ed
DN
1724 /* <4> Create msq = phi <msq_init, lsq> in loop */
1725 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1726 msq = make_ssa_name (vec_dest, NULL_TREE);
1727 phi_stmt = create_phi_node (msq, loop->header); /* CHECKME */
1728 SSA_NAME_DEF_STMT (msq) = phi_stmt;
1729 add_phi_arg (&phi_stmt, msq_init, loop_preheader_edge (loop));
1730 add_phi_arg (&phi_stmt, lsq, loop_latch_edge (loop));
1731
79fe1b3b 1732
7ccf35ed
DN
1733 /* <5> Create <vec_dest = realign_load (msq, lsq, magic)> in loop */
1734 vec_dest = vect_create_destination_var (scalar_dest, vectype);
1735 new_stmt = build3 (REALIGN_LOAD_EXPR, vectype, msq, lsq, magic);
1736 new_stmt = build2 (MODIFY_EXPR, vectype, vec_dest, new_stmt);
1737 new_temp = make_ssa_name (vec_dest, new_stmt);
1738 TREE_OPERAND (new_stmt, 0) = new_temp;
1739 vect_finish_stmt_generation (stmt, new_stmt, bsi);
1740 }
1741
1742 *vec_stmt = new_stmt;
79fe1b3b
DN
1743 return true;
1744}
1745
1746
1747/* Function vect_transform_stmt.
1748
1749 Create a vectorized stmt to replace STMT, and insert it at BSI. */
1750
1751static bool
1752vect_transform_stmt (tree stmt, block_stmt_iterator *bsi)
1753{
1754 bool is_store = false;
1755 tree vec_stmt = NULL_TREE;
1756 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1e128c5f 1757 bool done;
79fe1b3b
DN
1758
1759 switch (STMT_VINFO_TYPE (stmt_info))
1760 {
1761 case op_vec_info_type:
1e128c5f
GB
1762 done = vectorizable_operation (stmt, bsi, &vec_stmt);
1763 gcc_assert (done);
79fe1b3b
DN
1764 break;
1765
1766 case assignment_vec_info_type:
1e128c5f
GB
1767 done = vectorizable_assignment (stmt, bsi, &vec_stmt);
1768 gcc_assert (done);
79fe1b3b
DN
1769 break;
1770
1771 case load_vec_info_type:
1e128c5f
GB
1772 done = vectorizable_load (stmt, bsi, &vec_stmt);
1773 gcc_assert (done);
79fe1b3b
DN
1774 break;
1775
1776 case store_vec_info_type:
1e128c5f
GB
1777 done = vectorizable_store (stmt, bsi, &vec_stmt);
1778 gcc_assert (done);
79fe1b3b
DN
1779 is_store = true;
1780 break;
1781 default:
1782 if (vect_debug_details (NULL))
1783 fprintf (dump_file, "stmt not supported.");
1e128c5f 1784 gcc_unreachable ();
79fe1b3b
DN
1785 }
1786
1787 STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
1788
1789 return is_store;
1790}
1791
1792
1793/* Function vect_transform_loop_bound.
1794
1795 Create a new exit condition for the loop. */
1796
1797static void
1798vect_transform_loop_bound (loop_vec_info loop_vinfo)
1799{
1800 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
82b85a85 1801 edge exit_edge = loop->single_exit;
79fe1b3b
DN
1802 block_stmt_iterator loop_exit_bsi = bsi_last (exit_edge->src);
1803 tree indx_before_incr, indx_after_incr;
1804 tree orig_cond_expr;
1805 HOST_WIDE_INT old_N = 0;
1806 int vf;
1807 tree cond_stmt;
1808 tree new_loop_bound;
1809 tree cond;
1810 tree lb_type;
1811
1e128c5f 1812 gcc_assert (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo));
79fe1b3b
DN
1813 old_N = LOOP_VINFO_NITERS (loop_vinfo);
1814 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1815
79fe1b3b
DN
1816 /* FORNOW:
1817 assuming number-of-iterations divides by the vectorization factor. */
1e128c5f 1818 gcc_assert (!(old_N % vf));
79fe1b3b
DN
1819
1820 orig_cond_expr = LOOP_VINFO_EXIT_COND (loop_vinfo);
1e128c5f
GB
1821 gcc_assert (orig_cond_expr);
1822 gcc_assert (orig_cond_expr == bsi_stmt (loop_exit_bsi));
79fe1b3b 1823
82b85a85
ZD
1824 create_iv (integer_zero_node, integer_one_node, NULL_TREE, loop,
1825 &loop_exit_bsi, false, &indx_before_incr, &indx_after_incr);
79fe1b3b
DN
1826
1827 /* bsi_insert is using BSI_NEW_STMT. We need to bump it back
8c27b7d4 1828 to point to the exit condition. */
79fe1b3b 1829 bsi_next (&loop_exit_bsi);
1e128c5f 1830 gcc_assert (bsi_stmt (loop_exit_bsi) == orig_cond_expr);
79fe1b3b
DN
1831
1832 /* new loop exit test: */
1833 lb_type = TREE_TYPE (TREE_OPERAND (TREE_OPERAND (orig_cond_expr, 0), 1));
7d60be94 1834 new_loop_bound = build_int_cst (lb_type, old_N/vf);
79fe1b3b
DN
1835
1836 if (exit_edge->flags & EDGE_TRUE_VALUE) /* 'then' edge exits the loop. */
1837 cond = build2 (GE_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1838 else /* 'then' edge loops back. */
1839 cond = build2 (LT_EXPR, boolean_type_node, indx_after_incr, new_loop_bound);
1840
1841 cond_stmt = build3 (COND_EXPR, TREE_TYPE (orig_cond_expr), cond,
1842 TREE_OPERAND (orig_cond_expr, 1), TREE_OPERAND (orig_cond_expr, 2));
1843
1844 bsi_insert_before (&loop_exit_bsi, cond_stmt, BSI_SAME_STMT);
1845
1846 /* remove old loop exit test: */
1847 bsi_remove (&loop_exit_bsi);
1848
1849 if (vect_debug_details (NULL))
1850 print_generic_expr (dump_file, cond_stmt, TDF_SLIM);
1851}
1852
1853
1854/* Function vect_transform_loop.
1855
1856 The analysis phase has determined that the loop is vectorizable.
1857 Vectorize the loop - created vectorized stmts to replace the scalar
1858 stmts in the loop, and update the loop exit condition. */
1859
1860static void
1861vect_transform_loop (loop_vec_info loop_vinfo,
1862 struct loops *loops ATTRIBUTE_UNUSED)
1863{
1864 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1865 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1866 int nbbs = loop->num_nodes;
1867 block_stmt_iterator si;
1868 int i;
1869#ifdef ENABLE_CHECKING
1870 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1871#endif
1872
1873 if (vect_debug_details (NULL))
1874 fprintf (dump_file, "\n<<vec_transform_loop>>\n");
1875
1876 /* 1) Make sure the loop header has exactly two entries
1877 2) Make sure we have a preheader basic block. */
1878
1e128c5f
GB
1879 gcc_assert (loop->header->pred->pred_next);
1880 gcc_assert (!loop->header->pred->pred_next->pred_next);
79fe1b3b
DN
1881
1882 loop_split_edge_with (loop_preheader_edge (loop), NULL);
1883
1884
1885 /* FORNOW: the vectorizer supports only loops which body consist
1886 of one basic block (header + empty latch). When the vectorizer will
1887 support more involved loop forms, the order by which the BBs are
1888 traversed need to be reconsidered. */
1889
1890 for (i = 0; i < nbbs; i++)
1891 {
1892 basic_block bb = bbs[i];
1893
1894 for (si = bsi_start (bb); !bsi_end_p (si);)
1895 {
1896 tree stmt = bsi_stmt (si);
1897 stmt_vec_info stmt_info;
1898 bool is_store;
1899#ifdef ENABLE_CHECKING
1900 tree vectype;
1901#endif
1902
1903 if (vect_debug_details (NULL))
1904 {
1905 fprintf (dump_file, "------>vectorizing statement: ");
1906 print_generic_expr (dump_file, stmt, TDF_SLIM);
1907 }
1908 stmt_info = vinfo_for_stmt (stmt);
1e128c5f 1909 gcc_assert (stmt_info);
79fe1b3b
DN
1910 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1911 {
1912 bsi_next (&si);
1913 continue;
1914 }
1915#ifdef ENABLE_CHECKING
1916 /* FORNOW: Verify that all stmts operate on the same number of
1917 units and no inner unrolling is necessary. */
1918 vectype = STMT_VINFO_VECTYPE (stmt_info);
1e128c5f
GB
1919 gcc_assert (GET_MODE_NUNITS (TYPE_MODE (vectype))
1920 == vectorization_factor);
79fe1b3b
DN
1921#endif
1922 /* -------- vectorize statement ------------ */
1923 if (vect_debug_details (NULL))
1924 fprintf (dump_file, "transform statement.");
1925
1926 is_store = vect_transform_stmt (stmt, &si);
1927 if (is_store)
1928 {
1929 /* free the attached stmt_vec_info and remove the stmt. */
1930 stmt_ann_t ann = stmt_ann (stmt);
1931 free (stmt_info);
1932 set_stmt_info (ann, NULL);
1933 bsi_remove (&si);
1934 continue;
1935 }
1936
1937 bsi_next (&si);
1938 } /* stmts in BB */
1939 } /* BBs in loop */
1940
1941 vect_transform_loop_bound (loop_vinfo);
1942
1943 if (vect_debug_details (loop))
1944 fprintf (dump_file,"Success! loop vectorized.");
1945 if (vect_debug_stats (loop))
1946 fprintf (dump_file, "LOOP VECTORIZED.");
1947}
1948
1949
1950/* Function vect_is_simple_use.
1951
1952 Input:
1953 LOOP - the loop that is being vectorized.
1954 OPERAND - operand of a stmt in LOOP.
1955 DEF - the defining stmt in case OPERAND is an SSA_NAME.
1956
1957 Returns whether a stmt with OPERAND can be vectorized.
1958 Supportable operands are constants, loop invariants, and operands that are
6cb38cd4 1959 defined by the current iteration of the loop. Unsupportable operands are
79fe1b3b
DN
1960 those that are defined by a previous iteration of the loop (as is the case
1961 in reduction/induction computations). */
1962
1963static bool
1964vect_is_simple_use (tree operand, struct loop *loop, tree *def)
1965{
1966 tree def_stmt;
1967 basic_block bb;
1968
1969 if (def)
1970 *def = NULL_TREE;
1971
1972 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST)
1973 return true;
1974
1975 if (TREE_CODE (operand) != SSA_NAME)
1976 return false;
1977
1978 def_stmt = SSA_NAME_DEF_STMT (operand);
1979 if (def_stmt == NULL_TREE )
1980 {
1981 if (vect_debug_details (NULL))
1982 fprintf (dump_file, "no def_stmt.");
1983 return false;
1984 }
1985
1986 /* empty stmt is expected only in case of a function argument.
1987 (Otherwise - we expect a phi_node or a modify_expr). */
1988 if (IS_EMPTY_STMT (def_stmt))
1989 {
1990 tree arg = TREE_OPERAND (def_stmt, 0);
1991 if (TREE_CODE (arg) == INTEGER_CST || TREE_CODE (arg) == REAL_CST)
1992 return true;
1993 if (vect_debug_details (NULL))
1994 {
1995 fprintf (dump_file, "Unexpected empty stmt: ");
1996 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
1997 }
1998 return false;
1999 }
2000
2001 /* phi_node inside the loop indicates an induction/reduction pattern.
2002 This is not supported yet. */
2003 bb = bb_for_stmt (def_stmt);
2004 if (TREE_CODE (def_stmt) == PHI_NODE && flow_bb_inside_loop_p (loop, bb))
2005 {
2006 if (vect_debug_details (NULL))
2007 fprintf (dump_file, "reduction/induction - unsupported.");
2008 return false; /* FORNOW: not supported yet. */
2009 }
2010
2011 /* Expecting a modify_expr or a phi_node. */
2012 if (TREE_CODE (def_stmt) == MODIFY_EXPR
2013 || TREE_CODE (def_stmt) == PHI_NODE)
2014 {
2015 if (def)
2016 *def = def_stmt;
2017 return true;
2018 }
2019
2020 return false;
2021}
2022
2023
2024/* Function vect_analyze_operations.
2025
2026 Scan the loop stmts and make sure they are all vectorizable. */
2027
2028static bool
2029vect_analyze_operations (loop_vec_info loop_vinfo)
2030{
2031 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2032 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
2033 int nbbs = loop->num_nodes;
2034 block_stmt_iterator si;
2035 int vectorization_factor = 0;
2036 int i;
2037 bool ok;
2038 tree scalar_type;
2039
2040 if (vect_debug_details (NULL))
2041 fprintf (dump_file, "\n<<vect_analyze_operations>>\n");
2042
2043 for (i = 0; i < nbbs; i++)
2044 {
2045 basic_block bb = bbs[i];
2046
2047 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
2048 {
2049 tree stmt = bsi_stmt (si);
2050 int nunits;
2051 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2052 tree vectype;
2053
2054 if (vect_debug_details (NULL))
2055 {
2056 fprintf (dump_file, "==> examining statement: ");
2057 print_generic_expr (dump_file, stmt, TDF_SLIM);
2058 }
1e128c5f
GB
2059
2060 gcc_assert (stmt_info);
2061
79fe1b3b
DN
2062 /* skip stmts which do not need to be vectorized.
2063 this is expected to include:
2064 - the COND_EXPR which is the loop exit condition
2065 - any LABEL_EXPRs in the loop
2066 - computations that are used only for array indexing or loop
2067 control */
2068
2069 if (!STMT_VINFO_RELEVANT_P (stmt_info))
2070 {
2071 if (vect_debug_details (NULL))
2072 fprintf (dump_file, "irrelevant.");
2073 continue;
2074 }
2075
2076 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))))
2077 {
2078 if (vect_debug_stats (loop) || vect_debug_details (loop))
2079 {
2080 fprintf (dump_file, "not vectorized: vector stmt in loop:");
2081 print_generic_expr (dump_file, stmt, TDF_SLIM);
2082 }
2083 return false;
2084 }
2085
2086 if (STMT_VINFO_DATA_REF (stmt_info))
2087 scalar_type = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
2088 else if (TREE_CODE (stmt) == MODIFY_EXPR)
2089 scalar_type = TREE_TYPE (TREE_OPERAND (stmt, 0));
2090 else
2091 scalar_type = TREE_TYPE (stmt);
2092
2093 if (vect_debug_details (NULL))
2094 {
2095 fprintf (dump_file, "get vectype for scalar type: ");
2096 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2097 }
2098
2099 vectype = get_vectype_for_scalar_type (scalar_type);
2100 if (!vectype)
2101 {
2102 if (vect_debug_stats (loop) || vect_debug_details (loop))
2103 {
2104 fprintf (dump_file, "not vectorized: unsupported data-type ");
2105 print_generic_expr (dump_file, scalar_type, TDF_SLIM);
2106 }
2107 return false;
2108 }
2109
2110 if (vect_debug_details (NULL))
2111 {
2112 fprintf (dump_file, "vectype: ");
2113 print_generic_expr (dump_file, vectype, TDF_SLIM);
2114 }
2115 STMT_VINFO_VECTYPE (stmt_info) = vectype;
2116
2117 ok = (vectorizable_operation (stmt, NULL, NULL)
2118 || vectorizable_assignment (stmt, NULL, NULL)
2119 || vectorizable_load (stmt, NULL, NULL)
2120 || vectorizable_store (stmt, NULL, NULL));
2121
2122 if (!ok)
2123 {
2124 if (vect_debug_stats (loop) || vect_debug_details (loop))
2125 {
2126 fprintf (dump_file, "not vectorized: stmt not supported: ");
2127 print_generic_expr (dump_file, stmt, TDF_SLIM);
2128 }
2129 return false;
2130 }
2131
2132 nunits = GET_MODE_NUNITS (TYPE_MODE (vectype));
2133 if (vect_debug_details (NULL))
2134 fprintf (dump_file, "nunits = %d", nunits);
2135
2136 if (vectorization_factor)
2137 {
2138 /* FORNOW: don't allow mixed units.
2139 This restriction will be relaxed in the future. */
2140 if (nunits != vectorization_factor)
2141 {
2142 if (vect_debug_stats (loop) || vect_debug_details (loop))
2143 fprintf (dump_file, "not vectorized: mixed data-types");
2144 return false;
2145 }
2146 }
2147 else
2148 vectorization_factor = nunits;
2149 }
2150 }
2151
2152 /* TODO: Analyze cost. Decide if worth while to vectorize. */
2153 if (!vectorization_factor)
2154 {
2155 if (vect_debug_stats (loop) || vect_debug_details (loop))
2156 fprintf (dump_file, "not vectorized: unsupported data-type");
2157 return false;
2158 }
2159 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
2160
2161 /* FORNOW: handle only cases where the loop bound divides by the
2162 vectorization factor. */
2163
2164 if (vect_debug_details (NULL))
2165 fprintf (dump_file,
2166 "vectorization_factor = %d, niters = " HOST_WIDE_INT_PRINT_DEC,
2167 vectorization_factor, LOOP_VINFO_NITERS (loop_vinfo));
2168
2169 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2170 {
2171 if (vect_debug_stats (loop) || vect_debug_details (loop))
2172 fprintf (dump_file, "not vectorized: Unknown loop bound.");
2173 return false;
2174 }
2175
2176 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2177 && LOOP_VINFO_NITERS (loop_vinfo) % vectorization_factor != 0)
2178 {
2179 if (vect_debug_stats (loop) || vect_debug_details (loop))
2180 fprintf (dump_file, "not vectorized: loop bound doesn't divided by %d.",
2181 vectorization_factor);
2182 return false;
2183 }
2184
2185 return true;
2186}
2187
2188
2189/* Function exist_non_indexing_operands_for_use_p
2190
2191 USE is one of the uses attached to STMT. Check if USE is
2192 used in STMT for anything other than indexing an array. */
2193
2194static bool
2195exist_non_indexing_operands_for_use_p (tree use, tree stmt)
2196{
2197 tree operand;
2198 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2199
2200 /* USE corresponds to some operand in STMT. If there is no data
2201 reference in STMT, then any operand that corresponds to USE
2202 is not indexing an array. */
2203 if (!STMT_VINFO_DATA_REF (stmt_info))
2204 return true;
2205
2206 /* STMT has a data_ref. FORNOW this means that its of one of
2207 the following forms:
2208 -1- ARRAY_REF = var
2209 -2- var = ARRAY_REF
2210 (This should have been verified in analyze_data_refs).
2211
2212 'var' in the second case corresponds to a def, not a use,
2213 so USE cannot correspond to any operands that are not used
2214 for array indexing.
2215
2216 Therefore, all we need to check is if STMT falls into the
2217 first case, and whether var corresponds to USE. */
2218
2219 if (TREE_CODE (TREE_OPERAND (stmt, 0)) == SSA_NAME)
2220 return false;
2221
2222 operand = TREE_OPERAND (stmt, 1);
2223
2224 if (TREE_CODE (operand) != SSA_NAME)
2225 return false;
2226
2227 if (operand == use)
2228 return true;
2229
2230 return false;
2231}
2232
2233
2234/* Function vect_is_simple_iv_evolution.
2235
2236 FORNOW: A simple evolution of an induction variables in the loop is
2237 considered a polynomial evolution with constant step. */
2238
2239static bool
2240vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init,
2241 tree * step, bool strict)
2242{
2243 tree init_expr;
2244 tree step_expr;
2245
2246 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb);
2247
2248 /* When there is no evolution in this loop, the evolution function
2249 is not "simple". */
2250 if (evolution_part == NULL_TREE)
2251 return false;
2252
2253 /* When the evolution is a polynomial of degree >= 2
2254 the evolution function is not "simple". */
2255 if (tree_is_chrec (evolution_part))
2256 return false;
2257
2258 step_expr = evolution_part;
2259 init_expr = initial_condition (access_fn);
2260
2261 if (vect_debug_details (NULL))
2262 {
2263 fprintf (dump_file, "step: ");
2264 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2265 fprintf (dump_file, ", init: ");
2266 print_generic_expr (dump_file, init_expr, TDF_SLIM);
2267 }
2268
2269 *init = init_expr;
2270 *step = step_expr;
2271
2272 if (TREE_CODE (step_expr) != INTEGER_CST)
2273 {
2274 if (vect_debug_details (NULL))
2275 fprintf (dump_file, "step unknown.");
2276 return false;
2277 }
2278
2279 if (strict)
2280 if (!integer_onep (step_expr))
2281 {
2282 if (vect_debug_details (NULL))
2283 print_generic_expr (dump_file, step_expr, TDF_SLIM);
2284 return false;
2285 }
2286
2287 return true;
2288}
2289
2290
2291/* Function vect_analyze_scalar_cycles.
2292
2293 Examine the cross iteration def-use cycles of scalar variables, by
2294 analyzing the loop (scalar) PHIs; verify that the cross iteration def-use
2295 cycles that they represent do not impede vectorization.
2296
2297 FORNOW: Reduction as in the following loop, is not supported yet:
2298 loop1:
2299 for (i=0; i<N; i++)
2300 sum += a[i];
2301 The cross-iteration cycle corresponding to variable 'sum' will be
2302 considered too complicated and will impede vectorization.
2303
2304 FORNOW: Induction as in the following loop, is not supported yet:
2305 loop2:
2306 for (i=0; i<N; i++)
2307 a[i] = i;
2308
2309 However, the following loop *is* vectorizable:
2310 loop3:
2311 for (i=0; i<N; i++)
2312 a[i] = b[i];
2313
2314 In both loops there exists a def-use cycle for the variable i:
2315 loop: i_2 = PHI (i_0, i_1)
2316 a[i_2] = ...;
2317 i_1 = i_2 + 1;
2318 GOTO loop;
2319
2320 The evolution of the above cycle is considered simple enough,
2321 however, we also check that the cycle does not need to be
2322 vectorized, i.e - we check that the variable that this cycle
2323 defines is only used for array indexing or in stmts that do not
2324 need to be vectorized. This is not the case in loop2, but it
2325 *is* the case in loop3. */
2326
2327static bool
2328vect_analyze_scalar_cycles (loop_vec_info loop_vinfo)
2329{
2330 tree phi;
2331 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2332 basic_block bb = loop->header;
2333 tree dummy;
2334
2335 if (vect_debug_details (NULL))
2336 fprintf (dump_file, "\n<<vect_analyze_scalar_cycles>>\n");
2337
2338 for (phi = phi_nodes (bb); phi; phi = TREE_CHAIN (phi))
2339 {
2340 tree access_fn = NULL;
2341
2342 if (vect_debug_details (NULL))
2343 {
2344 fprintf (dump_file, "Analyze phi: ");
2345 print_generic_expr (dump_file, phi, TDF_SLIM);
2346 }
2347
2348 /* Skip virtual phi's. The data dependences that are associated with
2349 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
2350
2351 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi))))
2352 {
2353 if (vect_debug_details (NULL))
2354 fprintf (dump_file, "virtual phi. skip.");
2355 continue;
2356 }
2357
2358 /* Analyze the evolution function. */
2359
2360 /* FORNOW: The only scalar cross-iteration cycles that we allow are
2361 those of loop induction variables; This property is verified here.
2362
2363 Furthermore, if that induction variable is used in an operation
2364 that needs to be vectorized (i.e, is not solely used to index
2365 arrays and check the exit condition) - we do not support its
2366 vectorization yet. This property is verified in vect_is_simple_use,
2367 during vect_analyze_operations. */
2368
6775f1f3
IR
2369 access_fn = /* instantiate_parameters
2370 (loop,*/
2371 analyze_scalar_evolution (loop, PHI_RESULT (phi));
79fe1b3b
DN
2372
2373 if (!access_fn)
2374 {
2375 if (vect_debug_stats (loop) || vect_debug_details (loop))
2376 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2377 return false;
2378 }
2379
2380 if (vect_debug_details (NULL))
2381 {
2382 fprintf (dump_file, "Access function of PHI: ");
2383 print_generic_expr (dump_file, access_fn, TDF_SLIM);
2384 }
2385
2386 if (!vect_is_simple_iv_evolution (loop->num, access_fn, &dummy,
2387 &dummy, false))
2388 {
2389 if (vect_debug_stats (loop) || vect_debug_details (loop))
2390 fprintf (dump_file, "not vectorized: unsupported scalar cycle.");
2391 return false;
2392 }
2393 }
2394
2395 return true;
2396}
2397
2398
2399/* Function vect_analyze_data_ref_dependence.
2400
2401 Return TRUE if there (might) exist a dependence between a memory-reference
2402 DRA and a memory-reference DRB. */
2403
2404static bool
2405vect_analyze_data_ref_dependence (struct data_reference *dra,
2406 struct data_reference *drb,
2407 struct loop *loop)
2408{
6775f1f3 2409 bool differ_p;
79fe1b3b 2410 struct data_dependence_relation *ddr;
6775f1f3 2411
79fe1b3b
DN
2412 if (!array_base_name_differ_p (dra, drb, &differ_p))
2413 {
6775f1f3 2414 if (vect_debug_stats (loop) || vect_debug_details (loop))
79fe1b3b 2415 {
6775f1f3
IR
2416 fprintf (dump_file,
2417 "not vectorized: can't determine dependence between: ");
79fe1b3b
DN
2418 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2419 fprintf (dump_file, " and ");
2420 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2421 }
2422 return true;
2423 }
2424
2425 if (differ_p)
2426 return false;
2427
2428 ddr = initialize_data_dependence_relation (dra, drb);
2429 compute_affine_dependence (ddr);
2430
2431 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2432 return false;
2433
2434 if (vect_debug_stats (loop) || vect_debug_details (loop))
2435 {
2436 fprintf (dump_file,
2437 "not vectorized: possible dependence between data-refs ");
2438 print_generic_expr (dump_file, DR_REF (dra), TDF_SLIM);
2439 fprintf (dump_file, " and ");
2440 print_generic_expr (dump_file, DR_REF (drb), TDF_SLIM);
2441 }
2442
2443 return true;
2444}
2445
2446
2447/* Function vect_analyze_data_ref_dependences.
2448
2449 Examine all the data references in the loop, and make sure there do not
2450 exist any data dependences between them.
2451
2452 TODO: dependences which distance is greater than the vectorization factor
2453 can be ignored. */
2454
2455static bool
2456vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
2457{
2458 unsigned int i, j;
2459 varray_type loop_write_refs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2460 varray_type loop_read_refs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2462
2463 /* Examine store-store (output) dependences. */
2464
2465 if (vect_debug_details (NULL))
2466 fprintf (dump_file, "\n<<vect_analyze_dependences>>\n");
2467
2468 if (vect_debug_details (NULL))
2469 fprintf (dump_file, "compare all store-store pairs.");
2470
2471 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_refs); i++)
2472 {
2473 for (j = i + 1; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2474 {
2475 struct data_reference *dra =
2476 VARRAY_GENERIC_PTR (loop_write_refs, i);
2477 struct data_reference *drb =
2478 VARRAY_GENERIC_PTR (loop_write_refs, j);
2479 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2480 return false;
2481 }
2482 }
2483
2484 /* Examine load-store (true/anti) dependences. */
2485
2486 if (vect_debug_details (NULL))
2487 fprintf (dump_file, "compare all load-store pairs.");
2488
2489 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_refs); i++)
2490 {
2491 for (j = 0; j < VARRAY_ACTIVE_SIZE (loop_write_refs); j++)
2492 {
2493 struct data_reference *dra = VARRAY_GENERIC_PTR (loop_read_refs, i);
2494 struct data_reference *drb =
2495 VARRAY_GENERIC_PTR (loop_write_refs, j);
2496 if (vect_analyze_data_ref_dependence (dra, drb, loop))
2497 return false;
2498 }
2499 }
2500
2501 return true;
2502}
2503
2504
2505/* Function vect_get_first_index.
2506
2507 REF is a data reference.
2508 If it is an ARRAY_REF: if its lower bound is simple enough,
2509 put it in ARRAY_FIRST_INDEX and return TRUE; otherwise - return FALSE.
2510 If it is not an ARRAY_REF: REF has no "first index";
2511 ARRAY_FIRST_INDEX in zero, and the function returns TRUE. */
2512
2513static bool
2514vect_get_first_index (tree ref, tree *array_first_index)
2515{
2516 tree array_start;
2517
2518 if (TREE_CODE (ref) != ARRAY_REF)
2519 *array_first_index = size_zero_node;
2520 else
2521 {
2522 array_start = array_ref_low_bound (ref);
2523 if (!host_integerp (array_start,0))
2524 {
2525 if (vect_debug_details (NULL))
2526 {
2527 fprintf (dump_file, "array min val not simple integer cst.");
2528 print_generic_expr (dump_file, array_start, TDF_DETAILS);
2529 }
2530 return false;
2531 }
2532 *array_first_index = array_start;
2533 }
2534
2535 return true;
2536}
2537
2538
6775f1f3
IR
2539/* Function vect_compute_array_base_alignment.
2540 A utility function of vect_compute_array_ref_alignment.
2541
2542 Compute the misalignment of ARRAY in bits.
2543
2544 Input:
2545 ARRAY - an array_ref (possibly multidimensional) of type ARRAY_TYPE.
d4a9b3a3 2546 VECTYPE - we are interested in the misalignment modulo the size of vectype.
6775f1f3
IR
2547 if NULL: don't compute misalignment, just return the base of ARRAY.
2548 PREV_DIMENSIONS - initialized to one.
2549 MISALIGNMENT - the computed misalignment in bits.
2550
2551 Output:
2552 If VECTYPE is not NULL:
2553 Return NULL_TREE if the misalignment cannot be computed. Otherwise, return
2554 the base of the array, and put the computed misalignment in MISALIGNMENT.
2555 If VECTYPE is NULL:
2556 Return the base of the array.
2557
2558 For a[idx_N]...[idx_2][idx_1][idx_0], the address of
2559 a[idx_N]...[idx_2][idx_1] is
2560 {&a + idx_1 * dim_0 + idx_2 * dim_0 * dim_1 + ...
2561 ... + idx_N * dim_0 * ... * dim_N-1}.
2562 (The misalignment of &a is not checked here).
2563 Note, that every term contains dim_0, therefore, if dim_0 is a
2564 multiple of NUNITS, the whole sum is a multiple of NUNITS.
2565 Otherwise, if idx_1 is constant, and dim_1 is a multiple of
2566 NUINTS, we can say that the misalignment of the sum is equal to
2567 the misalignment of {idx_1 * dim_0}. If idx_1 is not constant,
2568 we can't determine this array misalignment, and we return
2569 false.
2570 We proceed recursively in this manner, accumulating total misalignment
2571 and the multiplication of previous dimensions for correct misalignment
2572 calculation. */
2573
2574static tree
2575vect_compute_array_base_alignment (tree array,
2576 tree vectype,
2577 tree *prev_dimensions,
2578 tree *misalignment)
2579{
2580 tree index;
2581 tree domain;
2582 tree dimension_size;
2583 tree mis;
2584 tree bits_per_vectype;
2585 tree bits_per_vectype_unit;
2586
2587 /* The 'stop condition' of the recursion. */
2588 if (TREE_CODE (array) != ARRAY_REF)
2589 return array;
2590
2591 if (!vectype)
2592 /* Just get the base decl. */
2593 return vect_compute_array_base_alignment
2594 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2595
2596 if (!host_integerp (*misalignment, 1) || TREE_OVERFLOW (*misalignment) ||
2597 !host_integerp (*prev_dimensions, 1) || TREE_OVERFLOW (*prev_dimensions))
2598 return NULL_TREE;
2599
2600 domain = TYPE_DOMAIN (TREE_TYPE (array));
2601 dimension_size =
2602 int_const_binop (PLUS_EXPR,
2603 int_const_binop (MINUS_EXPR, TYPE_MAX_VALUE (domain),
2604 TYPE_MIN_VALUE (domain), 1),
2605 size_one_node, 1);
2606
2607 /* Check if the dimension size is a multiple of NUNITS, the remaining sum
2608 is a multiple of NUNITS:
2609
2610 dimension_size % GET_MODE_NUNITS (TYPE_MODE (vectype)) == 0 ?
2611 */
2612 mis = int_const_binop (TRUNC_MOD_EXPR, dimension_size,
2613 build_int_cst (NULL_TREE, GET_MODE_NUNITS (TYPE_MODE (vectype))), 1);
2614 if (integer_zerop (mis))
2615 /* This array is aligned. Continue just in order to get the base decl. */
2616 return vect_compute_array_base_alignment
2617 (TREE_OPERAND (array, 0), NULL, NULL, NULL);
2618
2619 index = TREE_OPERAND (array, 1);
2620 if (!host_integerp (index, 1))
2621 /* The current index is not constant. */
2622 return NULL_TREE;
2623
2624 index = int_const_binop (MINUS_EXPR, index, TYPE_MIN_VALUE (domain), 0);
2625
2626 bits_per_vectype = fold_convert (unsigned_type_node,
2627 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2628 GET_MODE_SIZE (TYPE_MODE (vectype))));
2629 bits_per_vectype_unit = fold_convert (unsigned_type_node,
2630 build_int_cst (NULL_TREE, BITS_PER_UNIT *
2631 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vectype)))));
2632
2633 /* Add {idx_i * dim_i-1 * ... * dim_0 } to the misalignment computed
2634 earlier:
2635
2636 *misalignment =
2637 (*misalignment + index_val * dimension_size * *prev_dimensions)
2638 % vectype_nunits;
2639 */
2640
2641 mis = int_const_binop (MULT_EXPR, index, dimension_size, 1);
2642 mis = int_const_binop (MULT_EXPR, mis, *prev_dimensions, 1);
2643 mis = int_const_binop (MULT_EXPR, mis, bits_per_vectype_unit, 1);
2644 mis = int_const_binop (PLUS_EXPR, *misalignment, mis, 1);
2645 *misalignment = int_const_binop (TRUNC_MOD_EXPR, mis, bits_per_vectype, 1);
2646
2647
2648 *prev_dimensions = int_const_binop (MULT_EXPR,
2649 *prev_dimensions, dimension_size, 1);
2650
2651 return vect_compute_array_base_alignment (TREE_OPERAND (array, 0), vectype,
2652 prev_dimensions,
2653 misalignment);
2654}
2655
2656
79fe1b3b
DN
2657/* Function vect_compute_data_ref_alignment
2658
2659 Compute the misalignment of the data reference DR.
2660
6775f1f3
IR
2661 Output:
2662 1. If during the misalignment computation it is found that the data reference
2663 cannot be vectorized then false is returned.
2664 2. DR_MISALIGNMENT (DR) is defined.
2665
79fe1b3b
DN
2666 FOR NOW: No analysis is actually performed. Misalignment is calculated
2667 only for trivial cases. TODO. */
2668
6775f1f3 2669static bool
79fe1b3b 2670vect_compute_data_ref_alignment (struct data_reference *dr,
6775f1f3 2671 loop_vec_info loop_vinfo)
79fe1b3b
DN
2672{
2673 tree stmt = DR_STMT (dr);
6775f1f3 2674 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
79fe1b3b
DN
2675 tree ref = DR_REF (dr);
2676 tree vectype;
79fe1b3b 2677 tree scalar_type;
79fe1b3b 2678 tree offset = size_zero_node;
6775f1f3
IR
2679 tree base, bit_offset, alignment;
2680 tree unit_bits = fold_convert (unsigned_type_node,
2681 build_int_cst (NULL_TREE, BITS_PER_UNIT));
2682 tree dr_base;
2683 bool base_aligned_p;
2684
79fe1b3b
DN
2685 if (vect_debug_details (NULL))
2686 fprintf (dump_file, "vect_compute_data_ref_alignment:");
2687
2688 /* Initialize misalignment to unknown. */
2689 DR_MISALIGNMENT (dr) = -1;
2690
2691 scalar_type = TREE_TYPE (ref);
2692 vectype = get_vectype_for_scalar_type (scalar_type);
2693 if (!vectype)
2694 {
2695 if (vect_debug_details (NULL))
2696 {
2697 fprintf (dump_file, "no vectype for stmt: ");
2698 print_generic_expr (dump_file, stmt, TDF_SLIM);
6775f1f3 2699 fprintf (dump_file, " scalar_type: ");
79fe1b3b
DN
2700 print_generic_expr (dump_file, scalar_type, TDF_DETAILS);
2701 }
6775f1f3
IR
2702 /* It is not possible to vectorize this data reference. */
2703 return false;
79fe1b3b 2704 }
6775f1f3
IR
2705 gcc_assert (TREE_CODE (ref) == ARRAY_REF || TREE_CODE (ref) == INDIRECT_REF);
2706
2707 if (TREE_CODE (ref) == ARRAY_REF)
2708 dr_base = ref;
2709 else
2710 dr_base = STMT_VINFO_VECT_DR_BASE (stmt_info);
79fe1b3b 2711
6775f1f3
IR
2712 base = vect_get_base_and_bit_offset (dr, dr_base, vectype,
2713 loop_vinfo, &bit_offset, &base_aligned_p);
2714 if (!base)
79fe1b3b 2715 {
6775f1f3 2716 if (vect_debug_details (NULL))
79fe1b3b 2717 {
6775f1f3
IR
2718 fprintf (dump_file, "Unknown alignment for access: ");
2719 print_generic_expr (dump_file,
2720 STMT_VINFO_VECT_DR_BASE (stmt_info), TDF_SLIM);
79fe1b3b 2721 }
6775f1f3
IR
2722 return true;
2723 }
79fe1b3b 2724
6775f1f3
IR
2725 if (!base_aligned_p)
2726 {
2727 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype)))
79fe1b3b
DN
2728 {
2729 if (vect_debug_details (NULL))
6775f1f3
IR
2730 {
2731 fprintf (dump_file, "can't force alignment of ref: ");
2732 print_generic_expr (dump_file, ref, TDF_SLIM);
2733 }
2734 return true;
79fe1b3b 2735 }
6775f1f3
IR
2736
2737 /* Force the alignment of the decl.
2738 NOTE: This is the only change to the code we make during
2739 the analysis phase, before deciding to vectorize the loop. */
2740 if (vect_debug_details (NULL))
2741 fprintf (dump_file, "force alignment");
2742 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
2743 DECL_USER_ALIGN (base) = TYPE_ALIGN (vectype);
2744 }
79fe1b3b 2745
6775f1f3
IR
2746 /* At this point we assume that the base is aligned, and the offset from it
2747 (including index, if relevant) has been computed and is in BIT_OFFSET. */
2748 gcc_assert (base_aligned_p
2749 || (TREE_CODE (base) == VAR_DECL
2750 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
2751
2752 /* Convert into bytes. */
2753 offset = int_const_binop (TRUNC_DIV_EXPR, bit_offset, unit_bits, 1);
2754 /* Check that there is no remainder in bits. */
2755 bit_offset = int_const_binop (TRUNC_MOD_EXPR, bit_offset, unit_bits, 1);
2756 if (!integer_zerop (bit_offset))
2757 {
2758 if (vect_debug_details (NULL))
79fe1b3b 2759 {
6775f1f3
IR
2760 fprintf (dump_file, "bit offset alignment: ");
2761 print_generic_expr (dump_file, bit_offset, TDF_SLIM);
79fe1b3b 2762 }
6775f1f3
IR
2763 return false;
2764 }
2765
2766 /* Alignment required, in bytes: */
2767 alignment = fold_convert (unsigned_type_node,
2768 build_int_cst (NULL_TREE, TYPE_ALIGN (vectype)/BITS_PER_UNIT));
79fe1b3b 2769
6775f1f3
IR
2770 /* Modulo alignment. */
2771 offset = int_const_binop (TRUNC_MOD_EXPR, offset, alignment, 0);
2772 if (!host_integerp (offset, 1) || TREE_OVERFLOW (offset))
2773 {
2774 if (vect_debug_details (NULL))
2775 fprintf (dump_file, "unexpected misalign value");
2776 return false;
79fe1b3b
DN
2777 }
2778
6775f1f3 2779 DR_MISALIGNMENT (dr) = tree_low_cst (offset, 1);
79fe1b3b 2780
6775f1f3
IR
2781 if (vect_debug_details (NULL))
2782 fprintf (dump_file, "misalign = %d", DR_MISALIGNMENT (dr));
2783
2784 return true;
2785}
2786
2787
2788/* Function vect_compute_array_ref_alignment
2789
2790 Compute the alignment of an array-ref.
2791 The alignment we compute here is relative to
2792 TYPE_ALIGN(VECTYPE) boundary.
2793
2794 Output:
2795 OFFSET - the alignment in bits
2796 Return value - the base of the array-ref. E.g,
2797 if the array-ref is a.b[k].c[i][j] the returned
2798 base is a.b[k].c
2799*/
2800
2801static tree
2802vect_compute_array_ref_alignment (struct data_reference *dr,
2803 loop_vec_info loop_vinfo,
2804 tree vectype,
2805 tree *offset)
2806{
2807 tree array_first_index = size_zero_node;
2808 tree init;
2809 tree ref = DR_REF (dr);
2810 tree scalar_type = TREE_TYPE (ref);
2811 tree oprnd0 = TREE_OPERAND (ref, 0);
2812 tree dims = size_one_node;
2813 tree misalign = size_zero_node;
2814 tree next_ref, this_offset = size_zero_node;
2815 tree nunits;
2816 tree nbits;
2817
2818 if (TREE_CODE (TREE_TYPE (ref)) == ARRAY_TYPE)
2819 /* The reference is an array without its last index. */
2820 next_ref = vect_compute_array_base_alignment (ref, vectype, &dims, &misalign);
2821 else
2822 next_ref =
2823 vect_compute_array_base_alignment (oprnd0, vectype, &dims, &misalign);
2824 if (!vectype)
2825 /* Alignment is not requested. Just return the base. */
2826 return next_ref;
2827
2828 /* Compute alignment. */
2829 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign) || !next_ref)
2830 return NULL_TREE;
2831 this_offset = misalign;
2832
2833 /* Check the first index accessed. */
79fe1b3b
DN
2834 if (!vect_get_first_index (ref, &array_first_index))
2835 {
2836 if (vect_debug_details (NULL))
2837 fprintf (dump_file, "no first_index for array.");
6775f1f3 2838 return NULL_TREE;
79fe1b3b 2839 }
79fe1b3b 2840
6775f1f3
IR
2841 /* Check the index of the array_ref. */
2842 init = initial_condition_in_loop_num (DR_ACCESS_FN (dr, 0),
2843 LOOP_VINFO_LOOP (loop_vinfo)->num);
79fe1b3b 2844
6775f1f3
IR
2845 /* FORNOW: In order to simplify the handling of alignment, we make sure
2846 that the first location at which the array is accessed ('init') is on an
79fe1b3b 2847 'NUNITS' boundary, since we are assuming here that 'array base' is aligned.
6775f1f3
IR
2848 This is too conservative, since we require that
2849 both {'array_base' is a multiple of NUNITS} && {'init' is a multiple of
79fe1b3b
DN
2850 NUNITS}, instead of just {('array_base' + 'init') is a multiple of NUNITS}.
2851 This should be relaxed in the future. */
2852
6775f1f3 2853 if (!init || !host_integerp (init, 0))
79fe1b3b
DN
2854 {
2855 if (vect_debug_details (NULL))
6775f1f3
IR
2856 fprintf (dump_file, "non constant init. ");
2857 return NULL_TREE;
79fe1b3b
DN
2858 }
2859
79fe1b3b 2860 /* bytes per scalar element: */
6775f1f3
IR
2861 nunits = fold_convert (unsigned_type_node,
2862 build_int_cst (NULL_TREE, GET_MODE_SIZE (TYPE_MODE (scalar_type))));
2863 nbits = int_const_binop (MULT_EXPR, nunits,
2864 build_int_cst (NULL_TREE, BITS_PER_UNIT), 1);
79fe1b3b 2865
6775f1f3 2866 /* misalign = offset + (init-array_first_index)*nunits*bits_in_byte */
79fe1b3b 2867 misalign = int_const_binop (MINUS_EXPR, init, array_first_index, 0);
6775f1f3
IR
2868 misalign = int_const_binop (MULT_EXPR, misalign, nbits, 0);
2869 misalign = int_const_binop (PLUS_EXPR, misalign, this_offset, 0);
79fe1b3b 2870
6775f1f3
IR
2871 /* TODO: allow negative misalign values. */
2872 if (!host_integerp (misalign, 1) || TREE_OVERFLOW (misalign))
79fe1b3b
DN
2873 {
2874 if (vect_debug_details (NULL))
6775f1f3
IR
2875 fprintf (dump_file, "unexpected misalign value");
2876 return NULL_TREE;
79fe1b3b 2877 }
6775f1f3
IR
2878 *offset = misalign;
2879 return next_ref;
79fe1b3b
DN
2880}
2881
2882
2883/* Function vect_compute_data_refs_alignment
2884
2885 Compute the misalignment of data references in the loop.
2886 This pass may take place at function granularity instead of at loop
2887 granularity.
2888
2889 FOR NOW: No analysis is actually performed. Misalignment is calculated
2890 only for trivial cases. TODO. */
2891
2892static void
2893vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
2894{
2895 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
2896 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
2897 unsigned int i;
6775f1f3 2898
79fe1b3b
DN
2899 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
2900 {
2901 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
2902 vect_compute_data_ref_alignment (dr, loop_vinfo);
2903 }
2904
2905 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
2906 {
2907 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
2908 vect_compute_data_ref_alignment (dr, loop_vinfo);
2909 }
2910}
2911
2912
2913/* Function vect_enhance_data_refs_alignment
2914
2915 This pass will use loop versioning and loop peeling in order to enhance
2916 the alignment of data references in the loop.
2917
2918 FOR NOW: we assume that whatever versioning/peeling takes place, only the
2919 original loop is to be vectorized; Any other loops that are created by
2920 the transformations performed in this pass - are not supposed to be
2921 vectorized. This restriction will be relaxed.
2922
2923 FOR NOW: No transformation is actually performed. TODO. */
2924
2925static void
7ccf35ed 2926vect_enhance_data_refs_alignment (loop_vec_info loop_info ATTRIBUTE_UNUSED)
79fe1b3b
DN
2927{
2928 /*
2929 This pass will require a cost model to guide it whether to apply peeling
2930 or versioning or a combination of the two. For example, the scheme that
2931 intel uses when given a loop with several memory accesses, is as follows:
2932 choose one memory access ('p') which alignment you want to force by doing
2933 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
2934 other accesses are not necessarily aligned, or (2) use loop versioning to
2935 generate one loop in which all accesses are aligned, and another loop in
2936 which only 'p' is necessarily aligned.
2937
2938 ("Automatic Intra-Register Vectorization for the Intel Architecture",
2939 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
2940 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
2941
2942 Devising a cost model is the most critical aspect of this work. It will
2943 guide us on which access to peel for, whether to use loop versioning, how
2944 many versions to create, etc. The cost model will probably consist of
2945 generic considerations as well as target specific considerations (on
2946 powerpc for example, misaligned stores are more painful than misaligned
2947 loads).
2948
2949 Here is the general steps involved in alignment enhancements:
2950
2951 -- original loop, before alignment analysis:
2952 for (i=0; i<N; i++){
2953 x = q[i]; # DR_MISALIGNMENT(q) = unknown
2954 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2955 }
2956
2957 -- After vect_compute_data_refs_alignment:
2958 for (i=0; i<N; i++){
2959 x = q[i]; # DR_MISALIGNMENT(q) = 3
2960 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2961 }
2962
2963 -- Possibility 1: we do loop versioning:
2964 if (p is aligned) {
2965 for (i=0; i<N; i++){ # loop 1A
2966 x = q[i]; # DR_MISALIGNMENT(q) = 3
2967 p[i] = y; # DR_MISALIGNMENT(p) = 0
2968 }
2969 }
2970 else {
2971 for (i=0; i<N; i++){ # loop 1B
2972 x = q[i]; # DR_MISALIGNMENT(q) = 3
2973 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
2974 }
2975 }
2976
2977 -- Possibility 2: we do loop peeling:
2978 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2979 x = q[i];
2980 p[i] = y;
2981 }
2982 for (i = 3; i < N; i++){ # loop 2A
2983 x = q[i]; # DR_MISALIGNMENT(q) = 0
2984 p[i] = y; # DR_MISALIGNMENT(p) = unknown
2985 }
2986
2987 -- Possibility 3: combination of loop peeling and versioning:
2988 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
2989 x = q[i];
2990 p[i] = y;
2991 }
2992 if (p is aligned) {
2993 for (i = 3; i<N; i++){ # loop 3A
2994 x = q[i]; # DR_MISALIGNMENT(q) = 0
2995 p[i] = y; # DR_MISALIGNMENT(p) = 0
2996 }
2997 }
2998 else {
2999 for (i = 3; i<N; i++){ # loop 3B
3000 x = q[i]; # DR_MISALIGNMENT(q) = 0
3001 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
3002 }
3003 }
3004
3005 These loops are later passed to loop_transform to be vectorized. The
3006 vectorizer will use the alignment information to guide the transformation
3007 (whether to generate regular loads/stores, or with special handling for
3008 misalignment).
3009 */
3010}
3011
3012
3013/* Function vect_analyze_data_refs_alignment
3014
3015 Analyze the alignment of the data-references in the loop.
3016 FOR NOW: Until support for misliagned accesses is in place, only if all
3017 accesses are aligned can the loop be vectorized. This restriction will be
3018 relaxed. */
3019
3020static bool
3021vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
3022{
3023 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
7ccf35ed
DN
3024 /*varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);*/
3025
79fe1b3b
DN
3026 unsigned int i;
3027
3028 if (vect_debug_details (NULL))
3029 fprintf (dump_file, "\n<<vect_analyze_data_refs_alignment>>\n");
3030
3031
3032 /* This pass may take place at function granularity instead of at loop
3033 granularity. */
3034
3035 vect_compute_data_refs_alignment (loop_vinfo);
3036
3037
3038 /* This pass will use loop versioning and loop peeling in order to enhance
3039 the alignment of data references in the loop.
3040 FOR NOW: we assume that whatever versioning/peeling took place, the
3041 original loop is to be vectorized. Any other loops that were created by
3042 the transformations performed in this pass - are not supposed to be
3043 vectorized. This restriction will be relaxed. */
3044
3045 vect_enhance_data_refs_alignment (loop_vinfo);
3046
3047
3048 /* Finally, check that loop can be vectorized.
3049 FOR NOW: Until support for misaligned accesses is in place, only if all
3050 accesses are aligned can the loop be vectorized. This restriction will be
3051 relaxed. */
3052
3053 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3054 {
3055 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3056 if (!aligned_access_p (dr))
3057 {
3058 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3059 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3060 fprintf (dump_file, "not vectorized: unaligned store.");
3061 return false;
3062 }
3063 }
3064
7ccf35ed
DN
3065 /* The vectorizer now supports misaligned loads, so we don't fail anymore
3066 in the presence of a misaligned read dataref. For some targets however
3067 it may be preferable not to vectorize in such a case as misaligned
3068 accesses are very costly. This should be considered in the future. */
3069/*
79fe1b3b
DN
3070 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3071 {
3072 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3073 if (!aligned_access_p (dr))
3074 {
3075 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3076 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3077 fprintf (dump_file, "not vectorized: unaligned load.");
3078 return false;
3079 }
3080 }
7ccf35ed 3081*/
79fe1b3b
DN
3082
3083 return true;
3084}
3085
3086
3087/* Function vect_analyze_data_ref_access.
3088
3089 Analyze the access pattern of the data-reference DR. For now, a data access
3090 has to consecutive and aligned to be considered vectorizable. */
3091
3092static bool
3093vect_analyze_data_ref_access (struct data_reference *dr)
3094{
3095 varray_type access_fns = DR_ACCESS_FNS (dr);
3096 tree access_fn;
3097 tree init, step;
6775f1f3 3098 unsigned int dimensions, i;
79fe1b3b 3099
6775f1f3
IR
3100 /* Check that in case of multidimensional array ref A[i1][i2]..[iN],
3101 i1, i2, ..., iN-1 are loop invariant (to make sure that the memory
3102 access is contiguous). */
3103 dimensions = VARRAY_ACTIVE_SIZE (access_fns);
3104
3105 for (i = 1; i < dimensions; i++) /* Not including the last dimension. */
79fe1b3b 3106 {
6775f1f3 3107 access_fn = DR_ACCESS_FN (dr, i);
79fe1b3b 3108
6775f1f3
IR
3109 if (evolution_part_in_loop_num (access_fn,
3110 loop_containing_stmt (DR_STMT (dr))->num))
3111 {
3112 /* Evolution part is not NULL in this loop (it is neither constant nor
3113 invariant). */
3114 if (vect_debug_details (NULL))
3115 {
3116 fprintf (dump_file,
3117 "not vectorized: complicated multidimensional array access.");
3118 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3119 }
3120 return false;
3121 }
3122 }
3123
3124 access_fn = DR_ACCESS_FN (dr, 0); /* The last dimension access function. */
3125 if (!evolution_function_is_constant_p (access_fn)
3126 && !vect_is_simple_iv_evolution (loop_containing_stmt (DR_STMT (dr))->num,
3127 access_fn, &init, &step, true))
79fe1b3b
DN
3128 {
3129 if (vect_debug_details (NULL))
3130 {
6775f1f3 3131 fprintf (dump_file, "not vectorized: too complicated access function.");
79fe1b3b
DN
3132 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3133 }
3134 return false;
3135 }
6775f1f3 3136
79fe1b3b
DN
3137 return true;
3138}
3139
3140
3141/* Function vect_analyze_data_ref_accesses.
3142
3143 Analyze the access pattern of all the data references in the loop.
3144
3145 FORNOW: the only access pattern that is considered vectorizable is a
3146 simple step 1 (consecutive) access.
3147
6775f1f3 3148 FORNOW: handle only arrays and pointer accesses. */
79fe1b3b
DN
3149
3150static bool
3151vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
3152{
3153 unsigned int i;
3154 varray_type loop_write_datarefs = LOOP_VINFO_DATAREF_WRITES (loop_vinfo);
3155 varray_type loop_read_datarefs = LOOP_VINFO_DATAREF_READS (loop_vinfo);
3156
3157 if (vect_debug_details (NULL))
3158 fprintf (dump_file, "\n<<vect_analyze_data_ref_accesses>>\n");
3159
3160 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_write_datarefs); i++)
3161 {
3162 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_write_datarefs, i);
3163 bool ok = vect_analyze_data_ref_access (dr);
3164 if (!ok)
3165 {
3166 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3167 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3168 fprintf (dump_file, "not vectorized: complicated access pattern.");
3169 return false;
3170 }
3171 }
3172
3173 for (i = 0; i < VARRAY_ACTIVE_SIZE (loop_read_datarefs); i++)
3174 {
3175 struct data_reference *dr = VARRAY_GENERIC_PTR (loop_read_datarefs, i);
3176 bool ok = vect_analyze_data_ref_access (dr);
3177 if (!ok)
3178 {
3179 if (vect_debug_stats (LOOP_VINFO_LOOP (loop_vinfo))
3180 || vect_debug_details (LOOP_VINFO_LOOP (loop_vinfo)))
3181 fprintf (dump_file, "not vectorized: complicated access pattern.");
3182 return false;
3183 }
3184 }
3185
3186 return true;
3187}
3188
3189
3190/* Function vect_analyze_pointer_ref_access.
3191
3192 Input:
3193 STMT - a stmt that contains a data-ref
3194 MEMREF - a data-ref in STMT, which is an INDIRECT_REF.
3195
3196 If the data-ref access is vectorizable, return a data_reference structure
3197 that represents it (DR). Otherwise - return NULL. */
3198
3199static struct data_reference *
3200vect_analyze_pointer_ref_access (tree memref, tree stmt, bool is_read)
3201{
3202 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3203 struct loop *loop = STMT_VINFO_LOOP (stmt_info);
3204 tree access_fn = analyze_scalar_evolution (loop, TREE_OPERAND (memref, 0));
3205 tree init, step;
3206 int step_val;
3207 tree reftype, innertype;
3208 enum machine_mode innermode;
3209 tree indx_access_fn;
3210 int loopnum = loop->num;
3211 struct data_reference *dr;
3212
3213 if (!access_fn)
3214 {
3215 if (vect_debug_stats (loop) || vect_debug_details (loop))
3216 fprintf (dump_file, "not vectorized: complicated pointer access.");
3217 return NULL;
3218 }
3219
3220 if (vect_debug_details (NULL))
3221 {
3222 fprintf (dump_file, "Access function of ptr: ");
3223 print_generic_expr (dump_file, access_fn, TDF_SLIM);
3224 }
3225
3226 if (!vect_is_simple_iv_evolution (loopnum, access_fn, &init, &step, false))
3227 {
3228 if (vect_debug_stats (loop) || vect_debug_details (loop))
3229 fprintf (dump_file, "not vectorized: pointer access is not simple.");
3230 return NULL;
3231 }
3232
6775f1f3
IR
3233 STRIP_NOPS (init);
3234
3235 if (!host_integerp (step,0))
79fe1b3b
DN
3236 {
3237 if (vect_debug_stats (loop) || vect_debug_details (loop))
3238 fprintf (dump_file,
6775f1f3 3239 "not vectorized: non constant step for pointer access.");
79fe1b3b
DN
3240 return NULL;
3241 }
3242
3243 step_val = TREE_INT_CST_LOW (step);
3244
3245 reftype = TREE_TYPE (TREE_OPERAND (memref, 0));
3246 if (TREE_CODE (reftype) != POINTER_TYPE)
3247 {
3248 if (vect_debug_stats (loop) || vect_debug_details (loop))
3249 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3250 return NULL;
3251 }
3252
3253 reftype = TREE_TYPE (init);
3254 if (TREE_CODE (reftype) != POINTER_TYPE)
3255 {
3256 if (vect_debug_stats (loop) || vect_debug_details (loop))
3257 fprintf (dump_file, "not vectorized: unexpected pointer access form.");
3258 return NULL;
3259 }
3260
3261 innertype = TREE_TYPE (reftype);
3262 innermode = TYPE_MODE (innertype);
3263 if (GET_MODE_SIZE (innermode) != step_val)
3264 {
3265 /* FORNOW: support only consecutive access */
3266 if (vect_debug_stats (loop) || vect_debug_details (loop))
3267 fprintf (dump_file, "not vectorized: non consecutive access.");
3268 return NULL;
3269 }
3270
3271 indx_access_fn =
3272 build_polynomial_chrec (loopnum, integer_zero_node, integer_one_node);
3273 if (vect_debug_details (NULL))
3274 {
3275 fprintf (dump_file, "Access function of ptr indx: ");
3276 print_generic_expr (dump_file, indx_access_fn, TDF_SLIM);
3277 }
3278 dr = init_data_ref (stmt, memref, init, indx_access_fn, is_read);
3279 return dr;
3280}
3281
3282
6775f1f3
IR
3283/* Function vect_get_symbl_and_dr.
3284
3285 The function returns SYMBL - the relevant variable for
3286 memory tag (for aliasing purposes).
3287 Also data reference structure DR is created.
3288
3289 Input:
3290 MEMREF - data reference in STMT
3291 IS_READ - TRUE if STMT reads from MEMREF, FALSE if writes to MEMREF
3292
3293 Output:
3294 DR - data_reference struct for MEMREF
3295 return value - the relevant variable for memory tag (for aliasing purposes).
3296
3297*/
3298
3299static tree
3300vect_get_symbl_and_dr (tree memref, tree stmt, bool is_read,
3301 loop_vec_info loop_vinfo, struct data_reference **dr)
3302{
3303 tree symbl, oprnd0, oprnd1;
3304 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3305 tree offset;
3306 tree array_base, base;
3307 struct data_reference *new_dr;
3308 bool base_aligned_p;
3309
3310 *dr = NULL;
3311 switch (TREE_CODE (memref))
3312 {
3313 case INDIRECT_REF:
3314 new_dr = vect_analyze_pointer_ref_access (memref, stmt, is_read);
3315 if (! new_dr)
3316 return NULL_TREE;
3317 *dr = new_dr;
3318 symbl = DR_BASE_NAME (new_dr);
3319 STMT_VINFO_VECT_DR_BASE (stmt_info) = symbl;
3320
3321 switch (TREE_CODE (symbl))
3322 {
3323 case PLUS_EXPR:
3324 case MINUS_EXPR:
3325 oprnd0 = TREE_OPERAND (symbl, 0);
3326 oprnd1 = TREE_OPERAND (symbl, 1);
3327
3328 STRIP_NOPS(oprnd1);
3329 /* Only {address_base + offset} expressions are supported,
d4a9b3a3 3330 where address_base can be POINTER_TYPE or ARRAY_TYPE and
6775f1f3
IR
3331 offset can be anything but POINTER_TYPE or ARRAY_TYPE.
3332 TODO: swap operands if {offset + address_base}. */
3333 if ((TREE_CODE (TREE_TYPE (oprnd1)) == POINTER_TYPE
3334 && TREE_CODE (oprnd1) != INTEGER_CST)
3335 || TREE_CODE (TREE_TYPE (oprnd1)) == ARRAY_TYPE)
3336 return NULL_TREE;
3337
3338 if (TREE_CODE (TREE_TYPE (oprnd0)) == POINTER_TYPE)
3339 symbl = oprnd0;
3340 else
3341 symbl = vect_get_symbl_and_dr (oprnd0, stmt, is_read,
3342 loop_vinfo, &new_dr);
3343
3344 case SSA_NAME:
3345 case ADDR_EXPR:
3346 /* symbl remains unchanged. */
3347 break;
3348
3349 default:
3350 if (vect_debug_details (NULL))
3351 {
3352 fprintf (dump_file, "unhandled data ref: ");
3353 print_generic_expr (dump_file, memref, TDF_SLIM);
3354 fprintf (dump_file, " (symbl ");
3355 print_generic_expr (dump_file, symbl, TDF_SLIM);
3356 fprintf (dump_file, ") in stmt ");
3357 print_generic_expr (dump_file, stmt, TDF_SLIM);
3358 }
3359 return NULL_TREE;
3360 }
3361 break;
3362
3363 case ARRAY_REF:
3364 offset = size_zero_node;
6775f1f3
IR
3365
3366 /* Store the array base in the stmt info.
3367 For one dimensional array ref a[i], the base is a,
3368 for multidimensional a[i1][i2]..[iN], the base is
3369 a[i1][i2]..[iN-1]. */
3370 array_base = TREE_OPERAND (memref, 0);
3371 STMT_VINFO_VECT_DR_BASE (stmt_info) = array_base;
3372
3373 new_dr = analyze_array (stmt, memref, is_read);
3374 *dr = new_dr;
3375
3376 /* Find the relevant symbol for aliasing purposes. */
3377 base = DR_BASE_NAME (new_dr);
3378 switch (TREE_CODE (base))
3379 {
3380 case VAR_DECL:
3381 symbl = base;
3382 break;
3383
3384 case INDIRECT_REF:
3385 symbl = TREE_OPERAND (base, 0);
3386 break;
3387
3388 case COMPONENT_REF:
3389 /* Could have recorded more accurate information -
3390 i.e, the actual FIELD_DECL that is being referenced -
3391 but later passes expect VAR_DECL as the nmt. */
3392 symbl = vect_get_base_and_bit_offset (new_dr, base, NULL_TREE,
3393 loop_vinfo, &offset, &base_aligned_p);
3394 if (symbl)
3395 break;
3396 /* fall through */
3397 default:
3398 if (vect_debug_details (NULL))
3399 {
3400 fprintf (dump_file, "unhandled struct/class field access ");
3401 print_generic_expr (dump_file, stmt, TDF_SLIM);
3402 }
3403 return NULL_TREE;
3404 }
3405 break;
3406
3407 default:
3408 if (vect_debug_details (NULL))
3409 {
3410 fprintf (dump_file, "unhandled data ref: ");
3411 print_generic_expr (dump_file, memref, TDF_SLIM);
3412 fprintf (dump_file, " in stmt ");
3413 print_generic_expr (dump_file, stmt, TDF_SLIM);
3414 }
3415 return NULL_TREE;
3416 }
3417 return symbl;
3418}
3419
3420
79fe1b3b
DN
3421/* Function vect_analyze_data_refs.
3422
3423 Find all the data references in the loop.
3424
6775f1f3 3425 FORNOW: Handle aligned INDIRECT_REFs and ARRAY_REFs
79fe1b3b
DN
3426 which base is really an array (not a pointer) and which alignment
3427 can be forced. This restriction will be relaxed. */
3428
3429static bool
3430vect_analyze_data_refs (loop_vec_info loop_vinfo)
3431{
3432 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3433 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3434 int nbbs = loop->num_nodes;
3435 block_stmt_iterator si;
3436 int j;
3437 struct data_reference *dr;
6775f1f3
IR
3438 tree tag;
3439 tree address_base;
79fe1b3b
DN
3440
3441 if (vect_debug_details (NULL))
3442 fprintf (dump_file, "\n<<vect_analyze_data_refs>>\n");
3443
3444 for (j = 0; j < nbbs; j++)
3445 {
3446 basic_block bb = bbs[j];
3447 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3448 {
3449 bool is_read = false;
3450 tree stmt = bsi_stmt (si);
3451 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3452 v_may_def_optype v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3453 v_must_def_optype v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3454 vuse_optype vuses = STMT_VUSE_OPS (stmt);
3455 varray_type *datarefs = NULL;
3456 int nvuses, nv_may_defs, nv_must_defs;
3457 tree memref = NULL;
79fe1b3b
DN
3458 tree symbl;
3459
3460 /* Assumption: there exists a data-ref in stmt, if and only if
3461 it has vuses/vdefs. */
3462
3463 if (!vuses && !v_may_defs && !v_must_defs)
3464 continue;
3465
3466 nvuses = NUM_VUSES (vuses);
3467 nv_may_defs = NUM_V_MAY_DEFS (v_may_defs);
3468 nv_must_defs = NUM_V_MUST_DEFS (v_must_defs);
3469
3470 if (nvuses && (nv_may_defs || nv_must_defs))
3471 {
3472 if (vect_debug_details (NULL))
3473 {
3474 fprintf (dump_file, "unexpected vdefs and vuses in stmt: ");
3475 print_generic_expr (dump_file, stmt, TDF_SLIM);
3476 }
3477 return false;
3478 }
3479
3480 if (TREE_CODE (stmt) != MODIFY_EXPR)
3481 {
3482 if (vect_debug_details (NULL))
3483 {
3484 fprintf (dump_file, "unexpected vops in stmt: ");
3485 print_generic_expr (dump_file, stmt, TDF_SLIM);
3486 }
3487 return false;
3488 }
3489
3490 if (vuses)
3491 {
3492 memref = TREE_OPERAND (stmt, 1);
3493 datarefs = &(LOOP_VINFO_DATAREF_READS (loop_vinfo));
3494 is_read = true;
3495 }
3496 else /* vdefs */
3497 {
3498 memref = TREE_OPERAND (stmt, 0);
3499 datarefs = &(LOOP_VINFO_DATAREF_WRITES (loop_vinfo));
3500 is_read = false;
3501 }
3502
6775f1f3
IR
3503 /* Analyze MEMREF. If it is of a supported form, build data_reference
3504 struct for it (DR) and find the relevant symbol for aliasing
3505 purposes. */
3506 symbl = vect_get_symbl_and_dr (memref, stmt, is_read, loop_vinfo, &dr);
3507 if (!symbl)
79fe1b3b
DN
3508 {
3509 if (vect_debug_stats (loop) || vect_debug_details (loop))
3510 {
6775f1f3 3511 fprintf (dump_file, "not vectorized: unhandled data ref: ");
79fe1b3b
DN
3512 print_generic_expr (dump_file, stmt, TDF_SLIM);
3513 }
3514 return false;
3515 }
6775f1f3 3516
79fe1b3b 3517 /* Find and record the memtag assigned to this data-ref. */
6775f1f3 3518 switch (TREE_CODE (symbl))
79fe1b3b 3519 {
6775f1f3
IR
3520 case VAR_DECL:
3521 STMT_VINFO_MEMTAG (stmt_info) = symbl;
3522 break;
3523
3524 case SSA_NAME:
79fe1b3b
DN
3525 symbl = SSA_NAME_VAR (symbl);
3526 tag = get_var_ann (symbl)->type_mem_tag;
3527 if (!tag)
3528 {
3529 tree ptr = TREE_OPERAND (memref, 0);
3530 if (TREE_CODE (ptr) == SSA_NAME)
3531 tag = get_var_ann (SSA_NAME_VAR (ptr))->type_mem_tag;
3532 }
3533 if (!tag)
3534 {
3535 if (vect_debug_stats (loop) || vect_debug_details (loop))
3536 fprintf (dump_file, "not vectorized: no memtag for ref.");
3537 return false;
3538 }
3539 STMT_VINFO_MEMTAG (stmt_info) = tag;
6775f1f3
IR
3540 break;
3541
3542 case ADDR_EXPR:
3543 address_base = TREE_OPERAND (symbl, 0);
3544
3545 switch (TREE_CODE (address_base))
3546 {
3547 case ARRAY_REF:
3548 dr = analyze_array (stmt, TREE_OPERAND (symbl, 0), DR_IS_READ(dr));
3549 STMT_VINFO_MEMTAG (stmt_info) = DR_BASE_NAME (dr);
3550 break;
3551
3552 case VAR_DECL:
3553 STMT_VINFO_MEMTAG (stmt_info) = address_base;
3554 break;
3555
3556 default:
3557 if (vect_debug_stats (loop) || vect_debug_details (loop))
3558 {
3559 fprintf (dump_file, "not vectorized: unhandled address expression: ");
3560 print_generic_expr (dump_file, stmt, TDF_SLIM);
3561 }
3562 return false;
3563 }
3564 break;
3565
3566 default:
79fe1b3b
DN
3567 if (vect_debug_stats (loop) || vect_debug_details (loop))
3568 {
3569 fprintf (dump_file, "not vectorized: unsupported data-ref: ");
3570 print_generic_expr (dump_file, memref, TDF_SLIM);
3571 }
3572 return false;
6775f1f3 3573 }
79fe1b3b
DN
3574
3575 VARRAY_PUSH_GENERIC_PTR (*datarefs, dr);
3576 STMT_VINFO_DATA_REF (stmt_info) = dr;
3577 }
3578 }
3579
3580 return true;
3581}
3582
3583
8c27b7d4 3584/* Utility functions used by vect_mark_stmts_to_be_vectorized. */
79fe1b3b
DN
3585
3586/* Function vect_mark_relevant.
3587
3588 Mark STMT as "relevant for vectorization" and add it to WORKLIST. */
3589
3590static void
3591vect_mark_relevant (varray_type worklist, tree stmt)
3592{
3593 stmt_vec_info stmt_info;
3594
3595 if (vect_debug_details (NULL))
3596 fprintf (dump_file, "mark relevant.");
3597
3598 if (TREE_CODE (stmt) == PHI_NODE)
3599 {
3600 VARRAY_PUSH_TREE (worklist, stmt);
3601 return;
3602 }
3603
3604 stmt_info = vinfo_for_stmt (stmt);
3605
3606 if (!stmt_info)
3607 {
3608 if (vect_debug_details (NULL))
3609 {
3610 fprintf (dump_file, "mark relevant: no stmt info!!.");
3611 print_generic_expr (dump_file, stmt, TDF_SLIM);
3612 }
3613 return;
3614 }
3615
3616 if (STMT_VINFO_RELEVANT_P (stmt_info))
3617 {
3618 if (vect_debug_details (NULL))
3619 fprintf (dump_file, "already marked relevant.");
3620 return;
3621 }
3622
3623 STMT_VINFO_RELEVANT_P (stmt_info) = 1;
3624 VARRAY_PUSH_TREE (worklist, stmt);
3625}
3626
3627
3628/* Function vect_stmt_relevant_p.
3629
3630 Return true if STMT in loop that is represented by LOOP_VINFO is
3631 "relevant for vectorization".
3632
3633 A stmt is considered "relevant for vectorization" if:
3634 - it has uses outside the loop.
3635 - it has vdefs (it alters memory).
3636 - control stmts in the loop (except for the exit condition).
3637
3638 CHECKME: what other side effects would the vectorizer allow? */
3639
3640static bool
3641vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo)
3642{
3643 v_may_def_optype v_may_defs;
3644 v_must_def_optype v_must_defs;
3645 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3646 int i;
3647 dataflow_t df;
3648 int num_uses;
3649
3650 /* cond stmt other than loop exit cond. */
3651 if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
3652 return true;
3653
3654 /* changing memory. */
3655 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
3656 v_must_defs = STMT_V_MUST_DEF_OPS (stmt);
3657 if (v_may_defs || v_must_defs)
3658 {
3659 if (vect_debug_details (NULL))
3660 fprintf (dump_file, "vec_stmt_relevant_p: stmt has vdefs.");
3661 return true;
3662 }
3663
3664 /* uses outside the loop. */
3665 df = get_immediate_uses (stmt);
3666 num_uses = num_immediate_uses (df);
3667 for (i = 0; i < num_uses; i++)
3668 {
3669 tree use = immediate_use (df, i);
3670 basic_block bb = bb_for_stmt (use);
3671 if (!flow_bb_inside_loop_p (loop, bb))
3672 {
3673 if (vect_debug_details (NULL))
3674 fprintf (dump_file, "vec_stmt_relevant_p: used out of loop.");
3675 return true;
3676 }
3677 }
3678
3679 return false;
3680}
3681
3682
3683/* Function vect_mark_stmts_to_be_vectorized.
3684
3685 Not all stmts in the loop need to be vectorized. For example:
3686
3687 for i...
3688 for j...
3689 1. T0 = i + j
3690 2. T1 = a[T0]
3691
3692 3. j = j + 1
3693
3694 Stmt 1 and 3 do not need to be vectorized, because loop control and
3695 addressing of vectorized data-refs are handled differently.
3696
3697 This pass detects such stmts. */
3698
3699static bool
3700vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
3701{
3702 varray_type worklist;
3703 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3704 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3705 unsigned int nbbs = loop->num_nodes;
3706 block_stmt_iterator si;
3707 tree stmt;
3708 stmt_ann_t ann;
3709 unsigned int i;
3710 int j;
3711 use_optype use_ops;
3712 stmt_vec_info stmt_info;
3713
3714 if (vect_debug_details (NULL))
3715 fprintf (dump_file, "\n<<vect_mark_stmts_to_be_vectorized>>\n");
3716
3717 VARRAY_TREE_INIT (worklist, 64, "work list");
3718
3719 /* 1. Init worklist. */
3720
3721 for (i = 0; i < nbbs; i++)
3722 {
3723 basic_block bb = bbs[i];
3724 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
3725 {
3726 stmt = bsi_stmt (si);
3727
3728 if (vect_debug_details (NULL))
3729 {
3730 fprintf (dump_file, "init: stmt relevant? ");
3731 print_generic_expr (dump_file, stmt, TDF_SLIM);
3732 }
3733
3734 stmt_info = vinfo_for_stmt (stmt);
3735 STMT_VINFO_RELEVANT_P (stmt_info) = 0;
3736
3737 if (vect_stmt_relevant_p (stmt, loop_vinfo))
3738 vect_mark_relevant (worklist, stmt);
3739 }
3740 }
3741
3742
3743 /* 2. Process_worklist */
3744
3745 while (VARRAY_ACTIVE_SIZE (worklist) > 0)
3746 {
3747 stmt = VARRAY_TOP_TREE (worklist);
3748 VARRAY_POP (worklist);
3749
3750 if (vect_debug_details (NULL))
3751 {
3752 fprintf (dump_file, "worklist: examine stmt: ");
3753 print_generic_expr (dump_file, stmt, TDF_SLIM);
3754 }
3755
3756 /* Examine the USES in this statement. Mark all the statements which
3757 feed this statement's uses as "relevant", unless the USE is used as
3758 an array index. */
3759
3760 if (TREE_CODE (stmt) == PHI_NODE)
3761 {
3762 /* follow the def-use chain inside the loop. */
3763 for (j = 0; j < PHI_NUM_ARGS (stmt); j++)
3764 {
3765 tree arg = PHI_ARG_DEF (stmt, j);
3766 tree def_stmt = NULL_TREE;
3767 basic_block bb;
3768 if (!vect_is_simple_use (arg, loop, &def_stmt))
3769 {
3770 if (vect_debug_details (NULL))
3771 fprintf (dump_file, "worklist: unsupported use.");
3772 varray_clear (worklist);
3773 return false;
3774 }
3775 if (!def_stmt)
3776 continue;
3777
3778 if (vect_debug_details (NULL))
3779 {
3780 fprintf (dump_file, "worklist: def_stmt: ");
3781 print_generic_expr (dump_file, def_stmt, TDF_SLIM);
3782 }
3783
3784 bb = bb_for_stmt (def_stmt);
3785 if (flow_bb_inside_loop_p (loop, bb))
3786 vect_mark_relevant (worklist, def_stmt);
3787 }
3788 }
3789
3790 ann = stmt_ann (stmt);
3791 use_ops = USE_OPS (ann);
3792
3793 for (i = 0; i < NUM_USES (use_ops); i++)
3794 {
3795 tree use = USE_OP (use_ops, i);
3796
3797 /* We are only interested in uses that need to be vectorized. Uses
3798 that are used for address computation are not considered relevant.
3799 */
3800 if (exist_non_indexing_operands_for_use_p (use, stmt))
3801 {
3802 tree def_stmt = NULL_TREE;
3803 basic_block bb;
3804 if (!vect_is_simple_use (use, loop, &def_stmt))
3805 {
3806 if (vect_debug_details (NULL))
3807 fprintf (dump_file, "worklist: unsupported use.");
3808 varray_clear (worklist);
3809 return false;
3810 }
3811
3812 if (!def_stmt)
3813 continue;
3814
3815 if (vect_debug_details (NULL))
3816 {
3817 fprintf (dump_file, "worklist: examine use %d: ", i);
3818 print_generic_expr (dump_file, use, TDF_SLIM);
3819 }
3820
3821 bb = bb_for_stmt (def_stmt);
3822 if (flow_bb_inside_loop_p (loop, bb))
3823 vect_mark_relevant (worklist, def_stmt);
3824 }
3825 }
3826 } /* while worklist */
3827
3828 varray_clear (worklist);
3829 return true;
3830}
3831
3832
3833/* Function vect_get_loop_niters.
3834
3835 Determine how many iterations the loop is executed. */
3836
3837static tree
3838vect_get_loop_niters (struct loop *loop, HOST_WIDE_INT *number_of_iterations)
3839{
3840 tree niters;
3841
3842 if (vect_debug_details (NULL))
3843 fprintf (dump_file, "\n<<get_loop_niters>>\n");
3844
3845 niters = number_of_iterations_in_loop (loop);
3846
3847 if (niters != NULL_TREE
3848 && niters != chrec_dont_know
3849 && host_integerp (niters,0))
3850 {
3851 *number_of_iterations = TREE_INT_CST_LOW (niters);
3852
3853 if (vect_debug_details (NULL))
3854 fprintf (dump_file, "==> get_loop_niters:" HOST_WIDE_INT_PRINT_DEC,
3855 *number_of_iterations);
3856 }
3857
3858 return get_loop_exit_condition (loop);
3859}
3860
3861
3862/* Function vect_analyze_loop_form.
3863
3864 Verify the following restrictions (some may be relaxed in the future):
3865 - it's an inner-most loop
3866 - number of BBs = 2 (which are the loop header and the latch)
3867 - the loop has a pre-header
3868 - the loop has a single entry and exit
3869 - the loop exit condition is simple enough, and the number of iterations
3870 can be analyzed (a countable loop). */
3871
3872static loop_vec_info
3873vect_analyze_loop_form (struct loop *loop)
3874{
3875 loop_vec_info loop_vinfo;
3876 tree loop_cond;
3877 HOST_WIDE_INT number_of_iterations = -1;
3878
3879 if (vect_debug_details (loop))
3880 fprintf (dump_file, "\n<<vect_analyze_loop_form>>\n");
3881
82b85a85
ZD
3882 if (loop->inner
3883 || !loop->single_exit
3884 || loop->num_nodes != 2)
79fe1b3b
DN
3885 {
3886 if (vect_debug_stats (loop) || vect_debug_details (loop))
3887 {
3888 fprintf (dump_file, "not vectorized: bad loop form. ");
82b85a85 3889 if (loop->inner)
79fe1b3b 3890 fprintf (dump_file, "nested loop.");
82b85a85
ZD
3891 else if (!loop->single_exit)
3892 fprintf (dump_file, "multiple exits.");
3893 else if (loop->num_nodes != 2)
79fe1b3b 3894 fprintf (dump_file, "too many BBs in loop.");
79fe1b3b
DN
3895 }
3896
3897 return NULL;
3898 }
3899
3900 /* We assume that the loop exit condition is at the end of the loop. i.e,
3901 that the loop is represented as a do-while (with a proper if-guard
3902 before the loop if needed), where the loop header contains all the
3903 executable statements, and the latch is empty. */
3904 if (!empty_block_p (loop->latch))
3905 {
3906 if (vect_debug_stats (loop) || vect_debug_details (loop))
3907 fprintf (dump_file, "not vectorized: unexpectd loop form.");
3908 return NULL;
3909 }
3910
3911 if (empty_block_p (loop->header))
3912 {
3913 if (vect_debug_stats (loop) || vect_debug_details (loop))
3914 fprintf (dump_file, "not vectorized: empty loop.");
3915 return NULL;
3916 }
3917
3918 loop_cond = vect_get_loop_niters (loop, &number_of_iterations);
3919 if (!loop_cond)
3920 {
3921 if (vect_debug_stats (loop) || vect_debug_details (loop))
3922 fprintf (dump_file, "not vectorized: complicated exit condition.");
3923 return NULL;
3924 }
3925
3926 if (number_of_iterations < 0)
3927 {
3928 if (vect_debug_stats (loop) || vect_debug_details (loop))
3929 fprintf (dump_file, "not vectorized: unknown loop bound.");
3930 return NULL;
3931 }
3932
3933 if (number_of_iterations == 0) /* CHECKME: can this happen? */
3934 {
3935 if (vect_debug_stats (loop) || vect_debug_details (loop))
3936 fprintf (dump_file, "not vectorized: number of iterations = 0.");
3937 return NULL;
3938 }
3939
3940 loop_vinfo = new_loop_vec_info (loop);
3941 LOOP_VINFO_EXIT_COND (loop_vinfo) = loop_cond;
3942 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
3943
3944 return loop_vinfo;
3945}
3946
3947
3948/* Function vect_analyze_loop.
3949
3950 Apply a set of analyses on LOOP, and create a loop_vec_info struct
3951 for it. The different analyses will record information in the
3952 loop_vec_info struct. */
3953
3954static loop_vec_info
3955vect_analyze_loop (struct loop *loop)
3956{
3957 bool ok;
3958 loop_vec_info loop_vinfo;
3959
3960 if (vect_debug_details (NULL))
3961 fprintf (dump_file, "\n<<<<<<< analyze_loop_nest >>>>>>>\n");
3962
3963 /* Check the CFG characteristics of the loop (nesting, entry/exit, etc. */
3964
3965 loop_vinfo = vect_analyze_loop_form (loop);
3966 if (!loop_vinfo)
3967 {
3968 if (vect_debug_details (loop))
3969 fprintf (dump_file, "bad loop form.");
3970 return NULL;
3971 }
3972
3973 /* Find all data references in the loop (which correspond to vdefs/vuses)
3974 and analyze their evolution in the loop.
3975
6775f1f3 3976 FORNOW: Handle only simple, array references, which
79fe1b3b
DN
3977 alignment can be forced, and aligned pointer-references. */
3978
3979 ok = vect_analyze_data_refs (loop_vinfo);
3980 if (!ok)
3981 {
3982 if (vect_debug_details (loop))
3983 fprintf (dump_file, "bad data references.");
3984 destroy_loop_vec_info (loop_vinfo);
3985 return NULL;
3986 }
3987
79fe1b3b
DN
3988 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
3989
3990 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
3991 if (!ok)
3992 {
3993 if (vect_debug_details (loop))
3994 fprintf (dump_file, "unexpected pattern.");
3995 if (vect_debug_details (loop))
3996 fprintf (dump_file, "not vectorized: unexpected pattern.");
3997 destroy_loop_vec_info (loop_vinfo);
3998 return NULL;
3999 }
4000
79fe1b3b
DN
4001 /* Check that all cross-iteration scalar data-flow cycles are OK.
4002 Cross-iteration cycles caused by virtual phis are analyzed separately. */
4003
4004 ok = vect_analyze_scalar_cycles (loop_vinfo);
4005 if (!ok)
4006 {
4007 if (vect_debug_details (loop))
4008 fprintf (dump_file, "bad scalar cycle.");
4009 destroy_loop_vec_info (loop_vinfo);
4010 return NULL;
4011 }
4012
79fe1b3b
DN
4013 /* Analyze data dependences between the data-refs in the loop.
4014 FORNOW: fail at the first data dependence that we encounter. */
4015
4016 ok = vect_analyze_data_ref_dependences (loop_vinfo);
4017 if (!ok)
4018 {
4019 if (vect_debug_details (loop))
4020 fprintf (dump_file, "bad data dependence.");
4021 destroy_loop_vec_info (loop_vinfo);
4022 return NULL;
4023 }
4024
79fe1b3b
DN
4025 /* Analyze the access patterns of the data-refs in the loop (consecutive,
4026 complex, etc.). FORNOW: Only handle consecutive access pattern. */
4027
4028 ok = vect_analyze_data_ref_accesses (loop_vinfo);
4029 if (!ok)
4030 {
4031 if (vect_debug_details (loop))
4032 fprintf (dump_file, "bad data access.");
4033 destroy_loop_vec_info (loop_vinfo);
4034 return NULL;
4035 }
4036
79fe1b3b
DN
4037 /* Analyze the alignment of the data-refs in the loop.
4038 FORNOW: Only aligned accesses are handled. */
4039
4040 ok = vect_analyze_data_refs_alignment (loop_vinfo);
4041 if (!ok)
4042 {
4043 if (vect_debug_details (loop))
4044 fprintf (dump_file, "bad data alignment.");
4045 destroy_loop_vec_info (loop_vinfo);
4046 return NULL;
4047 }
4048
79fe1b3b
DN
4049 /* Scan all the operations in the loop and make sure they are
4050 vectorizable. */
4051
4052 ok = vect_analyze_operations (loop_vinfo);
4053 if (!ok)
4054 {
4055 if (vect_debug_details (loop))
4056 fprintf (dump_file, "bad operation or unsupported loop bound.");
4057 destroy_loop_vec_info (loop_vinfo);
4058 return NULL;
4059 }
4060
4061 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
4062
4063 return loop_vinfo;
4064}
4065
4066
4067/* Function need_imm_uses_for.
4068
4069 Return whether we ought to include information for 'var'
4070 when calculating immediate uses. For this pass we only want use
4071 information for non-virtual variables. */
4072
4073static bool
4074need_imm_uses_for (tree var)
4075{
4076 return is_gimple_reg (var);
4077}
4078
4079
4080/* Function vectorize_loops.
4081
4082 Entry Point to loop vectorization phase. */
4083
4084void
4085vectorize_loops (struct loops *loops)
4086{
4087 unsigned int i, loops_num;
4088 unsigned int num_vectorized_loops = 0;
4089
4090 /* Does the target support SIMD? */
4091 /* FORNOW: until more sophisticated machine modelling is in place. */
4092 if (!UNITS_PER_SIMD_WORD)
4093 {
4094 if (vect_debug_details (NULL))
4095 fprintf (dump_file, "vectorizer: target vector size is not defined.");
4096 return;
4097 }
4098
4099 compute_immediate_uses (TDFA_USE_OPS, need_imm_uses_for);
4100
4101 /* ----------- Analyze loops. ----------- */
4102
4103 /* If some loop was duplicated, it gets bigger number
4104 than all previously defined loops. This fact allows us to run
4105 only over initial loops skipping newly generated ones. */
4106 loops_num = loops->num;
4107 for (i = 1; i < loops_num; i++)
4108 {
4109 loop_vec_info loop_vinfo;
4110 struct loop *loop = loops->parray[i];
4111
4112 if (!loop)
4113 continue;
4114
79fe1b3b
DN
4115 loop_vinfo = vect_analyze_loop (loop);
4116 loop->aux = loop_vinfo;
4117
4118 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo))
4119 continue;
4120
4121 vect_transform_loop (loop_vinfo, loops);
4122 num_vectorized_loops++;
4123 }
4124
4125 if (vect_debug_stats (NULL) || vect_debug_details (NULL))
4126 fprintf (dump_file, "\nvectorized %u loops in function.\n",
4127 num_vectorized_loops);
4128
4129 /* ----------- Finalize. ----------- */
4130
4131 free_df ();
4132 for (i = 1; i < loops_num; i++)
4133 {
4134 struct loop *loop = loops->parray[i];
6775f1f3
IR
4135 loop_vec_info loop_vinfo;
4136
79fe1b3b 4137 if (!loop)
6775f1f3
IR
4138 continue;
4139 loop_vinfo = loop->aux;
79fe1b3b
DN
4140 destroy_loop_vec_info (loop_vinfo);
4141 loop->aux = NULL;
4142 }
4143
79fe1b3b
DN
4144 rewrite_into_ssa (false);
4145 if (bitmap_first_set_bit (vars_to_rename) >= 0)
4146 {
4147 /* The rewrite of ssa names may cause violation of loop closed ssa
4148 form invariants. TODO -- avoid these rewrites completely.
4149 Information in virtual phi nodes is sufficient for it. */
4150 rewrite_into_loop_closed_ssa ();
4151 }
4152 bitmap_clear (vars_to_rename);
4153}