]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-data-refs.c
2009-10-27 Richard Guenther <rguenther@suse.de>
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
CommitLineData
fb85abff 1/* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "cfgloop.h"
35#include "expr.h"
36#include "optabs.h"
37#include "tree-chrec.h"
38#include "tree-scalar-evolution.h"
39#include "tree-vectorizer.h"
40#include "toplev.h"
41
42
43/* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
56 check the rhs.
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58 types. */
59
60tree
61vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
63{
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
66
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
68
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
73 {
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
75
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77 if (rhs < lhs)
78 scalar_type = rhs_type;
79 }
80
81 *lhs_size_unit = lhs;
82 *rhs_size_unit = rhs;
83 return scalar_type;
84}
85
86
87/* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
89
90int
91vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
92{
93 gimple next_stmt = first_stmt;
94 int result = 0;
95
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97 return -1;
98
99 while (next_stmt && next_stmt != stmt)
100 {
101 result++;
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
103 }
104
105 if (next_stmt)
106 return result;
107 else
108 return -1;
109}
110
111
112/* Function vect_insert_into_interleaving_chain.
113
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
115
116static void
117vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
119{
120 gimple prev, next;
121 tree next_init;
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
124
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127 while (next)
128 {
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
131 {
132 /* Insert here. */
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 return;
136 }
137 prev = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
139 }
140
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
144}
145
146
147/* Function vect_update_interleaving_chain.
148
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
151
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
154 FIRST_DR = DRB
155 NEXT_DR (DRB) = DRA
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
162 FIRST_DR = DRB
163 NEXT(FIRST_DR) = previous FIRST_DR
164 else
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
169
170static void
171vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
173{
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
178 tree node_init;
179 gimple node, prev, next, first_stmt;
180
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
183 {
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187 return;
188 }
189
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
192 {
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
196 return;
197 }
198
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
201 {
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 old_first_stmt)));
205 gimple tmp;
206
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
208 {
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
214
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
217 while (tmp)
218 {
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
221 }
222 }
223 else
224 {
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
228 }
229 return;
230 }
231
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
236 return;
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
239
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
241 {
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
248 }
249 else
250 {
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
257 }
258
259 while (node)
260 {
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263 while (next)
264 {
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
267 {
268 /* Insert here. */
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 prev = node;
272 break;
273 }
274 prev = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
276 }
277 if (!next)
278 {
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 prev = node;
283 }
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
286 }
287}
288
289
290/* Function vect_equal_offsets.
291
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
293
294static bool
295vect_equal_offsets (tree offset1, tree offset2)
296{
297 bool res0, res1;
298
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
301
302 if (offset1 == offset2)
303 return true;
304
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || !BINARY_CLASS_P (offset1)
307 || !BINARY_CLASS_P (offset2))
308 return false;
309
310 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
311 TREE_OPERAND (offset2, 0));
312 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
313 TREE_OPERAND (offset2, 1));
314
315 return (res0 && res1);
316}
317
318
319/* Function vect_check_interleaving.
320
321 Check if DRA and DRB are a part of interleaving. In case they are, insert
322 DRA and DRB in an interleaving chain. */
323
f083cd24 324static bool
fb85abff 325vect_check_interleaving (struct data_reference *dra,
326 struct data_reference *drb)
327{
328 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
329
330 /* Check that the data-refs have same first location (except init) and they
331 are both either store or load (not load and store). */
332 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
333 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
334 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
335 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
336 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
337 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
338 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
339 || DR_IS_READ (dra) != DR_IS_READ (drb))
f083cd24 340 return false;
fb85abff 341
342 /* Check:
343 1. data-refs are of the same type
344 2. their steps are equal
f083cd24 345 3. the step (if greater than zero) is greater than the difference between
346 data-refs' inits. */
fb85abff 347 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
348 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
349
350 if (type_size_a != type_size_b
351 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
352 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
353 TREE_TYPE (DR_REF (drb))))
f083cd24 354 return false;
fb85abff 355
356 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
357 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
358 step = TREE_INT_CST_LOW (DR_STEP (dra));
359
360 if (init_a > init_b)
361 {
362 /* If init_a == init_b + the size of the type * k, we have an interleaving,
363 and DRB is accessed before DRA. */
364 diff_mod_size = (init_a - init_b) % type_size_a;
365
37545e54 366 if (step && (init_a - init_b) > step)
f083cd24 367 return false;
fb85abff 368
369 if (diff_mod_size == 0)
370 {
371 vect_update_interleaving_chain (drb, dra);
372 if (vect_print_dump_info (REPORT_DR_DETAILS))
373 {
374 fprintf (vect_dump, "Detected interleaving ");
375 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
376 fprintf (vect_dump, " and ");
377 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
378 }
f083cd24 379 return true;
fb85abff 380 }
381 }
382 else
383 {
384 /* If init_b == init_a + the size of the type * k, we have an
385 interleaving, and DRA is accessed before DRB. */
386 diff_mod_size = (init_b - init_a) % type_size_a;
387
37545e54 388 if (step && (init_b - init_a) > step)
f083cd24 389 return false;
fb85abff 390
391 if (diff_mod_size == 0)
392 {
393 vect_update_interleaving_chain (dra, drb);
394 if (vect_print_dump_info (REPORT_DR_DETAILS))
395 {
396 fprintf (vect_dump, "Detected interleaving ");
397 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
398 fprintf (vect_dump, " and ");
399 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
400 }
f083cd24 401 return true;
fb85abff 402 }
403 }
f083cd24 404
405 return false;
fb85abff 406}
407
408/* Check if data references pointed by DR_I and DR_J are same or
409 belong to same interleaving group. Return FALSE if drs are
410 different, otherwise return TRUE. */
411
412static bool
413vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
414{
415 gimple stmt_i = DR_STMT (dr_i);
416 gimple stmt_j = DR_STMT (dr_j);
417
418 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
419 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
420 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
421 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
422 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
423 return true;
424 else
425 return false;
426}
427
428/* If address ranges represented by DDR_I and DDR_J are equal,
429 return TRUE, otherwise return FALSE. */
430
431static bool
432vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
433{
434 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
435 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
436 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
437 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
438 return true;
439 else
440 return false;
441}
442
443/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
444 tested at run-time. Return TRUE if DDR was successfully inserted.
445 Return false if versioning is not supported. */
446
447static bool
448vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
449{
450 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
451
452 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
453 return false;
454
455 if (vect_print_dump_info (REPORT_DR_DETAILS))
456 {
457 fprintf (vect_dump, "mark for run-time aliasing test between ");
458 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
459 fprintf (vect_dump, " and ");
460 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
461 }
462
463 if (optimize_loop_nest_for_size_p (loop))
464 {
465 if (vect_print_dump_info (REPORT_DR_DETAILS))
466 fprintf (vect_dump, "versioning not supported when optimizing for size.");
467 return false;
468 }
469
470 /* FORNOW: We don't support versioning with outer-loop vectorization. */
471 if (loop->inner)
472 {
473 if (vect_print_dump_info (REPORT_DR_DETAILS))
474 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
475 return false;
476 }
477
478 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
479 return true;
480}
481
37545e54 482
fb85abff 483/* Function vect_analyze_data_ref_dependence.
484
485 Return TRUE if there (might) exist a dependence between a memory-reference
486 DRA and a memory-reference DRB. When versioning for alias may check a
487 dependence at run-time, return FALSE. */
488
489static bool
490vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
491 loop_vec_info loop_vinfo)
492{
493 unsigned int i;
37545e54 494 struct loop *loop = NULL;
495 int vectorization_factor = 0;
fb85abff 496 struct data_reference *dra = DDR_A (ddr);
497 struct data_reference *drb = DDR_B (ddr);
498 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
499 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
500 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
501 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
502 lambda_vector dist_v;
503 unsigned int loop_depth;
504
505 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
506 {
507 /* Independent data accesses. */
508 vect_check_interleaving (dra, drb);
509 return false;
510 }
511
37545e54 512 if (loop_vinfo)
513 {
514 loop = LOOP_VINFO_LOOP (loop_vinfo);
515 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
516 }
517
518 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
fb85abff 519 return false;
520
521 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
522 {
37545e54 523 if (loop_vinfo)
524 {
525 if (vect_print_dump_info (REPORT_DR_DETAILS))
526 {
527 fprintf (vect_dump, "versioning for alias required: "
528 "can't determine dependence between ");
529 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
530 fprintf (vect_dump, " and ");
531 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
532 }
533
534 /* Add to list of ddrs that need to be tested at run-time. */
535 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
536 }
537
538 /* When vectorizing a basic block unknown depnedence can still mean
539 strided access. */
540 if (vect_check_interleaving (dra, drb))
541 return false;
542
fb85abff 543 if (vect_print_dump_info (REPORT_DR_DETAILS))
544 {
37545e54 545 fprintf (vect_dump, "can't determine dependence between ");
fb85abff 546 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
547 fprintf (vect_dump, " and ");
548 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
549 }
37545e54 550
551 return true;
fb85abff 552 }
553
37545e54 554 /* Versioning for alias is not yet supported for basic block SLP, and
555 dependence distance is unapplicable, hence, in case of known data
556 dependence, basic block vectorization is impossible for now. */
557 if (!loop_vinfo)
558 {
559 if (dra != drb && vect_check_interleaving (dra, drb))
560 return false;
561
562 if (vect_print_dump_info (REPORT_DR_DETAILS))
563 {
564 fprintf (vect_dump, "determined dependence between ");
565 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
566 fprintf (vect_dump, " and ");
567 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
568 }
569
570 return true;
571 }
572
573 /* Loop-based vectorization and known data dependence. */
fb85abff 574 if (DDR_NUM_DIST_VECTS (ddr) == 0)
575 {
576 if (vect_print_dump_info (REPORT_DR_DETAILS))
577 {
578 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
579 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
580 fprintf (vect_dump, " and ");
581 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
582 }
583 /* Add to list of ddrs that need to be tested at run-time. */
584 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
585 }
586
587 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
588 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
589 {
590 int dist = dist_v[loop_depth];
591
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
593 fprintf (vect_dump, "dependence distance = %d.", dist);
594
595 /* Same loop iteration. */
596 if (dist % vectorization_factor == 0 && dra_size == drb_size)
597 {
598 /* Two references with distance zero have the same alignment. */
599 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
600 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
601 if (vect_print_dump_info (REPORT_ALIGNMENT))
602 fprintf (vect_dump, "accesses have the same alignment.");
603 if (vect_print_dump_info (REPORT_DR_DETAILS))
604 {
605 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
606 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
607 fprintf (vect_dump, " and ");
608 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
609 }
610
611 /* For interleaving, mark that there is a read-write dependency if
612 necessary. We check before that one of the data-refs is store. */
613 if (DR_IS_READ (dra))
614 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
615 else
616 {
617 if (DR_IS_READ (drb))
618 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
619 }
620
621 continue;
622 }
623
624 if (abs (dist) >= vectorization_factor
625 || (dist > 0 && DDR_REVERSED_P (ddr)))
626 {
627 /* Dependence distance does not create dependence, as far as
628 vectorization is concerned, in this case. If DDR_REVERSED_P the
629 order of the data-refs in DDR was reversed (to make distance
630 vector positive), and the actual distance is negative. */
631 if (vect_print_dump_info (REPORT_DR_DETAILS))
632 fprintf (vect_dump, "dependence distance >= VF or negative.");
633 continue;
634 }
635
f083cd24 636 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 637 {
37545e54 638 fprintf (vect_dump, "not vectorized, possible dependence "
639 "between data-refs ");
fb85abff 640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
643 }
644
645 return true;
646 }
647
648 return false;
649}
650
651/* Function vect_analyze_data_ref_dependences.
652
653 Examine all the data references in the loop, and make sure there do not
654 exist any data dependences between them. */
655
656bool
37545e54 657vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
658 bb_vec_info bb_vinfo)
fb85abff 659{
660 unsigned int i;
37545e54 661 VEC (ddr_p, heap) *ddrs = NULL;
fb85abff 662 struct data_dependence_relation *ddr;
663
664 if (vect_print_dump_info (REPORT_DETAILS))
665 fprintf (vect_dump, "=== vect_analyze_dependences ===");
666
37545e54 667 if (loop_vinfo)
668 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
669 else
670 ddrs = BB_VINFO_DDRS (bb_vinfo);
671
fb85abff 672 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
673 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
674 return false;
675
676 return true;
677}
678
679
680/* Function vect_compute_data_ref_alignment
681
682 Compute the misalignment of the data reference DR.
683
684 Output:
685 1. If during the misalignment computation it is found that the data reference
686 cannot be vectorized then false is returned.
687 2. DR_MISALIGNMENT (DR) is defined.
688
689 FOR NOW: No analysis is actually performed. Misalignment is calculated
690 only for trivial cases. TODO. */
691
692static bool
693vect_compute_data_ref_alignment (struct data_reference *dr)
694{
695 gimple stmt = DR_STMT (dr);
696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 698 struct loop *loop = NULL;
fb85abff 699 tree ref = DR_REF (dr);
700 tree vectype;
701 tree base, base_addr;
702 bool base_aligned;
703 tree misalign;
704 tree aligned_to, alignment;
705
706 if (vect_print_dump_info (REPORT_DETAILS))
707 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
708
37545e54 709 if (loop_vinfo)
710 loop = LOOP_VINFO_LOOP (loop_vinfo);
711
fb85abff 712 /* Initialize misalignment to unknown. */
713 SET_DR_MISALIGNMENT (dr, -1);
714
715 misalign = DR_INIT (dr);
716 aligned_to = DR_ALIGNED_TO (dr);
717 base_addr = DR_BASE_ADDRESS (dr);
718 vectype = STMT_VINFO_VECTYPE (stmt_info);
719
720 /* In case the dataref is in an inner-loop of the loop that is being
721 vectorized (LOOP), we use the base and misalignment information
722 relative to the outer-loop (LOOP). This is ok only if the misalignment
723 stays the same throughout the execution of the inner-loop, which is why
724 we have to check that the stride of the dataref in the inner-loop evenly
725 divides by the vector size. */
37545e54 726 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 727 {
728 tree step = DR_STEP (dr);
729 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
730
731 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
732 {
733 if (vect_print_dump_info (REPORT_ALIGNMENT))
734 fprintf (vect_dump, "inner step divides the vector-size.");
735 misalign = STMT_VINFO_DR_INIT (stmt_info);
736 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
737 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
738 }
739 else
740 {
741 if (vect_print_dump_info (REPORT_ALIGNMENT))
742 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
743 misalign = NULL_TREE;
744 }
745 }
746
747 base = build_fold_indirect_ref (base_addr);
748 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
749
750 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
751 || !misalign)
752 {
753 if (vect_print_dump_info (REPORT_ALIGNMENT))
754 {
755 fprintf (vect_dump, "Unknown alignment for access: ");
756 print_generic_expr (vect_dump, base, TDF_SLIM);
757 }
758 return true;
759 }
760
761 if ((DECL_P (base)
762 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
763 alignment) >= 0)
764 || (TREE_CODE (base_addr) == SSA_NAME
765 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
766 TREE_TYPE (base_addr)))),
767 alignment) >= 0))
768 base_aligned = true;
769 else
770 base_aligned = false;
771
772 if (!base_aligned)
773 {
774 /* Do not change the alignment of global variables if
775 flag_section_anchors is enabled. */
776 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
777 || (TREE_STATIC (base) && flag_section_anchors))
778 {
779 if (vect_print_dump_info (REPORT_DETAILS))
780 {
781 fprintf (vect_dump, "can't force alignment of ref: ");
782 print_generic_expr (vect_dump, ref, TDF_SLIM);
783 }
784 return true;
785 }
786
787 /* Force the alignment of the decl.
788 NOTE: This is the only change to the code we make during
789 the analysis phase, before deciding to vectorize the loop. */
790 if (vect_print_dump_info (REPORT_DETAILS))
791 fprintf (vect_dump, "force alignment");
792 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
793 DECL_USER_ALIGN (base) = 1;
794 }
795
796 /* At this point we assume that the base is aligned. */
797 gcc_assert (base_aligned
798 || (TREE_CODE (base) == VAR_DECL
799 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
800
801 /* Modulo alignment. */
802 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
803
804 if (!host_integerp (misalign, 1))
805 {
806 /* Negative or overflowed misalignment value. */
807 if (vect_print_dump_info (REPORT_DETAILS))
808 fprintf (vect_dump, "unexpected misalign value");
809 return false;
810 }
811
812 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
813
814 if (vect_print_dump_info (REPORT_DETAILS))
815 {
816 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
817 print_generic_expr (vect_dump, ref, TDF_SLIM);
818 }
819
820 return true;
821}
822
823
824/* Function vect_compute_data_refs_alignment
825
826 Compute the misalignment of data references in the loop.
827 Return FALSE if a data reference is found that cannot be vectorized. */
828
829static bool
37545e54 830vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
831 bb_vec_info bb_vinfo)
fb85abff 832{
37545e54 833 VEC (data_reference_p, heap) *datarefs;
fb85abff 834 struct data_reference *dr;
835 unsigned int i;
836
37545e54 837 if (loop_vinfo)
838 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
839 else
840 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
841
fb85abff 842 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
843 if (!vect_compute_data_ref_alignment (dr))
844 return false;
845
846 return true;
847}
848
849
850/* Function vect_update_misalignment_for_peel
851
852 DR - the data reference whose misalignment is to be adjusted.
853 DR_PEEL - the data reference whose misalignment is being made
854 zero in the vector loop by the peel.
855 NPEEL - the number of iterations in the peel loop if the misalignment
856 of DR_PEEL is known at compile time. */
857
858static void
859vect_update_misalignment_for_peel (struct data_reference *dr,
860 struct data_reference *dr_peel, int npeel)
861{
862 unsigned int i;
863 VEC(dr_p,heap) *same_align_drs;
864 struct data_reference *current_dr;
865 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
866 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
867 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
868 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
869
870 /* For interleaved data accesses the step in the loop must be multiplied by
871 the size of the interleaving group. */
872 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
873 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
874 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
875 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
876
877 /* It can be assumed that the data refs with the same alignment as dr_peel
878 are aligned in the vector loop. */
879 same_align_drs
880 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
881 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
882 {
883 if (current_dr != dr)
884 continue;
885 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
886 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
887 SET_DR_MISALIGNMENT (dr, 0);
888 return;
889 }
890
891 if (known_alignment_for_access_p (dr)
892 && known_alignment_for_access_p (dr_peel))
893 {
894 int misal = DR_MISALIGNMENT (dr);
895 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
896 misal += npeel * dr_size;
897 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
898 SET_DR_MISALIGNMENT (dr, misal);
899 return;
900 }
901
902 if (vect_print_dump_info (REPORT_DETAILS))
903 fprintf (vect_dump, "Setting misalignment to -1.");
904 SET_DR_MISALIGNMENT (dr, -1);
905}
906
907
908/* Function vect_verify_datarefs_alignment
909
910 Return TRUE if all data references in the loop can be
911 handled with respect to alignment. */
912
37545e54 913bool
914vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 915{
37545e54 916 VEC (data_reference_p, heap) *datarefs;
fb85abff 917 struct data_reference *dr;
918 enum dr_alignment_support supportable_dr_alignment;
919 unsigned int i;
920
37545e54 921 if (loop_vinfo)
922 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
923 else
924 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
925
fb85abff 926 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
927 {
928 gimple stmt = DR_STMT (dr);
929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
930
931 /* For interleaving, only the alignment of the first access matters. */
932 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
933 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
934 continue;
935
936 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
937 if (!supportable_dr_alignment)
938 {
f083cd24 939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 940 {
941 if (DR_IS_READ (dr))
942 fprintf (vect_dump,
943 "not vectorized: unsupported unaligned load.");
944 else
945 fprintf (vect_dump,
946 "not vectorized: unsupported unaligned store.");
947 }
948 return false;
949 }
950 if (supportable_dr_alignment != dr_aligned
951 && vect_print_dump_info (REPORT_ALIGNMENT))
952 fprintf (vect_dump, "Vectorizing an unaligned access.");
953 }
954 return true;
955}
956
957
958/* Function vector_alignment_reachable_p
959
960 Return true if vector alignment for DR is reachable by peeling
961 a few loop iterations. Return false otherwise. */
962
963static bool
964vector_alignment_reachable_p (struct data_reference *dr)
965{
966 gimple stmt = DR_STMT (dr);
967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
969
970 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
971 {
972 /* For interleaved access we peel only if number of iterations in
973 the prolog loop ({VF - misalignment}), is a multiple of the
974 number of the interleaved accesses. */
975 int elem_size, mis_in_elements;
976 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
977
978 /* FORNOW: handle only known alignment. */
979 if (!known_alignment_for_access_p (dr))
980 return false;
981
982 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
983 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
984
985 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
986 return false;
987 }
988
989 /* If misalignment is known at the compile time then allow peeling
990 only if natural alignment is reachable through peeling. */
991 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
992 {
993 HOST_WIDE_INT elmsize =
994 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
995 if (vect_print_dump_info (REPORT_DETAILS))
996 {
997 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
998 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
999 }
1000 if (DR_MISALIGNMENT (dr) % elmsize)
1001 {
1002 if (vect_print_dump_info (REPORT_DETAILS))
1003 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1004 return false;
1005 }
1006 }
1007
1008 if (!known_alignment_for_access_p (dr))
1009 {
1010 tree type = (TREE_TYPE (DR_REF (dr)));
1011 tree ba = DR_BASE_OBJECT (dr);
1012 bool is_packed = false;
1013
1014 if (ba)
1015 is_packed = contains_packed_reference (ba);
1016
1017 if (vect_print_dump_info (REPORT_DETAILS))
1018 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1019 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1020 return true;
1021 else
1022 return false;
1023 }
1024
1025 return true;
1026}
1027
1028/* Function vect_enhance_data_refs_alignment
1029
1030 This pass will use loop versioning and loop peeling in order to enhance
1031 the alignment of data references in the loop.
1032
1033 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1034 original loop is to be vectorized; Any other loops that are created by
1035 the transformations performed in this pass - are not supposed to be
1036 vectorized. This restriction will be relaxed.
1037
1038 This pass will require a cost model to guide it whether to apply peeling
1039 or versioning or a combination of the two. For example, the scheme that
1040 intel uses when given a loop with several memory accesses, is as follows:
1041 choose one memory access ('p') which alignment you want to force by doing
1042 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1043 other accesses are not necessarily aligned, or (2) use loop versioning to
1044 generate one loop in which all accesses are aligned, and another loop in
1045 which only 'p' is necessarily aligned.
1046
1047 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1048 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1049 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1050
1051 Devising a cost model is the most critical aspect of this work. It will
1052 guide us on which access to peel for, whether to use loop versioning, how
1053 many versions to create, etc. The cost model will probably consist of
1054 generic considerations as well as target specific considerations (on
1055 powerpc for example, misaligned stores are more painful than misaligned
1056 loads).
1057
1058 Here are the general steps involved in alignment enhancements:
1059
1060 -- original loop, before alignment analysis:
1061 for (i=0; i<N; i++){
1062 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1063 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1064 }
1065
1066 -- After vect_compute_data_refs_alignment:
1067 for (i=0; i<N; i++){
1068 x = q[i]; # DR_MISALIGNMENT(q) = 3
1069 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1070 }
1071
1072 -- Possibility 1: we do loop versioning:
1073 if (p is aligned) {
1074 for (i=0; i<N; i++){ # loop 1A
1075 x = q[i]; # DR_MISALIGNMENT(q) = 3
1076 p[i] = y; # DR_MISALIGNMENT(p) = 0
1077 }
1078 }
1079 else {
1080 for (i=0; i<N; i++){ # loop 1B
1081 x = q[i]; # DR_MISALIGNMENT(q) = 3
1082 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1083 }
1084 }
1085
1086 -- Possibility 2: we do loop peeling:
1087 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1088 x = q[i];
1089 p[i] = y;
1090 }
1091 for (i = 3; i < N; i++){ # loop 2A
1092 x = q[i]; # DR_MISALIGNMENT(q) = 0
1093 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1094 }
1095
1096 -- Possibility 3: combination of loop peeling and versioning:
1097 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1098 x = q[i];
1099 p[i] = y;
1100 }
1101 if (p is aligned) {
1102 for (i = 3; i<N; i++){ # loop 3A
1103 x = q[i]; # DR_MISALIGNMENT(q) = 0
1104 p[i] = y; # DR_MISALIGNMENT(p) = 0
1105 }
1106 }
1107 else {
1108 for (i = 3; i<N; i++){ # loop 3B
1109 x = q[i]; # DR_MISALIGNMENT(q) = 0
1110 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1111 }
1112 }
1113
1114 These loops are later passed to loop_transform to be vectorized. The
1115 vectorizer will use the alignment information to guide the transformation
1116 (whether to generate regular loads/stores, or with special handling for
1117 misalignment). */
1118
1119bool
1120vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1121{
1122 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1124 enum dr_alignment_support supportable_dr_alignment;
1125 struct data_reference *dr0 = NULL;
1126 struct data_reference *dr;
1127 unsigned int i;
1128 bool do_peeling = false;
1129 bool do_versioning = false;
1130 bool stat;
1131 gimple stmt;
1132 stmt_vec_info stmt_info;
1133 int vect_versioning_for_alias_required;
1134
1135 if (vect_print_dump_info (REPORT_DETAILS))
1136 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1137
1138 /* While cost model enhancements are expected in the future, the high level
1139 view of the code at this time is as follows:
1140
454f25be 1141 A) If there is an unsupported misaligned access then see if peeling
1142 to align this access can make all data references satisfy
1143 vect_supportable_dr_alignment. If so, update data structures
1144 as needed and return true.
fb85abff 1145
1146 B) If peeling wasn't possible and there is a data reference with an
1147 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1148 then see if loop versioning checks can be used to make all data
1149 references satisfy vect_supportable_dr_alignment. If so, update
1150 data structures as needed and return true.
1151
1152 C) If neither peeling nor versioning were successful then return false if
1153 any data reference does not satisfy vect_supportable_dr_alignment.
1154
1155 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1156
1157 Note, Possibility 3 above (which is peeling and versioning together) is not
1158 being done at this time. */
1159
1160 /* (1) Peeling to force alignment. */
1161
1162 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1163 Considerations:
1164 + How many accesses will become aligned due to the peeling
1165 - How many accesses will become unaligned due to the peeling,
1166 and the cost of misaligned accesses.
1167 - The cost of peeling (the extra runtime checks, the increase
1168 in code size).
1169
1170 The scheme we use FORNOW: peel to force the alignment of the first
454f25be 1171 unsupported misaligned access in the loop.
fb85abff 1172
1173 TODO: Use a cost model. */
1174
1175 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1176 {
1177 stmt = DR_STMT (dr);
1178 stmt_info = vinfo_for_stmt (stmt);
454f25be 1179 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
fb85abff 1180
1181 /* For interleaving, only the alignment of the first access
1182 matters. */
1183 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1184 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1185 continue;
1186
454f25be 1187 if (!supportable_dr_alignment)
fb85abff 1188 {
1189 do_peeling = vector_alignment_reachable_p (dr);
1190 if (do_peeling)
1191 dr0 = dr;
1192 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1193 fprintf (vect_dump, "vector alignment may not be reachable");
1194 break;
1195 }
1196 }
1197
10095225 1198 vect_versioning_for_alias_required
1199 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
fb85abff 1200
1201 /* Temporarily, if versioning for alias is required, we disable peeling
1202 until we support peeling and versioning. Often peeling for alignment
1203 will require peeling for loop-bound, which in turn requires that we
1204 know how to adjust the loop ivs after the loop. */
1205 if (vect_versioning_for_alias_required
10095225 1206 || !vect_can_advance_ivs_p (loop_vinfo)
fb85abff 1207 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1208 do_peeling = false;
1209
1210 if (do_peeling)
1211 {
1212 int mis;
1213 int npeel = 0;
1214 gimple stmt = DR_STMT (dr0);
1215 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1216 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1217 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1218
1219 if (known_alignment_for_access_p (dr0))
1220 {
1221 /* Since it's known at compile time, compute the number of iterations
1222 in the peeled loop (the peeling factor) for use in updating
1223 DR_MISALIGNMENT values. The peeling factor is the vectorization
1224 factor minus the misalignment as an element count. */
1225 mis = DR_MISALIGNMENT (dr0);
1226 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1227 npeel = nelements - mis;
1228
1229 /* For interleaved data access every iteration accesses all the
1230 members of the group, therefore we divide the number of iterations
1231 by the group size. */
1232 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1233 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1234 npeel /= DR_GROUP_SIZE (stmt_info);
1235
1236 if (vect_print_dump_info (REPORT_DETAILS))
1237 fprintf (vect_dump, "Try peeling by %d", npeel);
1238 }
1239
1240 /* Ensure that all data refs can be vectorized after the peel. */
1241 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1242 {
1243 int save_misalignment;
1244
1245 if (dr == dr0)
1246 continue;
1247
1248 stmt = DR_STMT (dr);
1249 stmt_info = vinfo_for_stmt (stmt);
1250 /* For interleaving, only the alignment of the first access
1251 matters. */
1252 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1253 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1254 continue;
1255
1256 save_misalignment = DR_MISALIGNMENT (dr);
1257 vect_update_misalignment_for_peel (dr, dr0, npeel);
1258 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1259 SET_DR_MISALIGNMENT (dr, save_misalignment);
1260
1261 if (!supportable_dr_alignment)
1262 {
1263 do_peeling = false;
1264 break;
1265 }
1266 }
1267
1268 if (do_peeling)
1269 {
1270 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1271 If the misalignment of DR_i is identical to that of dr0 then set
1272 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1273 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1274 by the peeling factor times the element size of DR_i (MOD the
1275 vectorization factor times the size). Otherwise, the
1276 misalignment of DR_i must be set to unknown. */
1277 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1278 if (dr != dr0)
1279 vect_update_misalignment_for_peel (dr, dr0, npeel);
1280
1281 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1282 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1283 SET_DR_MISALIGNMENT (dr0, 0);
1284 if (vect_print_dump_info (REPORT_ALIGNMENT))
1285 fprintf (vect_dump, "Alignment of access forced using peeling.");
1286
1287 if (vect_print_dump_info (REPORT_DETAILS))
1288 fprintf (vect_dump, "Peeling for alignment will be applied.");
1289
37545e54 1290 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1291 gcc_assert (stat);
1292 return stat;
1293 }
1294 }
1295
1296
1297 /* (2) Versioning to force alignment. */
1298
1299 /* Try versioning if:
1300 1) flag_tree_vect_loop_version is TRUE
1301 2) optimize loop for speed
1302 3) there is at least one unsupported misaligned data ref with an unknown
1303 misalignment, and
1304 4) all misaligned data refs with a known misalignment are supported, and
1305 5) the number of runtime alignment checks is within reason. */
1306
1307 do_versioning =
1308 flag_tree_vect_loop_version
1309 && optimize_loop_nest_for_speed_p (loop)
1310 && (!loop->inner); /* FORNOW */
1311
1312 if (do_versioning)
1313 {
1314 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1315 {
1316 stmt = DR_STMT (dr);
1317 stmt_info = vinfo_for_stmt (stmt);
1318
1319 /* For interleaving, only the alignment of the first access
1320 matters. */
1321 if (aligned_access_p (dr)
1322 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1323 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1324 continue;
1325
1326 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1327
1328 if (!supportable_dr_alignment)
1329 {
1330 gimple stmt;
1331 int mask;
1332 tree vectype;
1333
1334 if (known_alignment_for_access_p (dr)
1335 || VEC_length (gimple,
1336 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1337 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1338 {
1339 do_versioning = false;
1340 break;
1341 }
1342
1343 stmt = DR_STMT (dr);
1344 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1345 gcc_assert (vectype);
1346
1347 /* The rightmost bits of an aligned address must be zeros.
1348 Construct the mask needed for this test. For example,
1349 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1350 mask must be 15 = 0xf. */
1351 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1352
1353 /* FORNOW: use the same mask to test all potentially unaligned
1354 references in the loop. The vectorizer currently supports
1355 a single vector size, see the reference to
1356 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1357 vectorization factor is computed. */
1358 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1359 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1360 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1361 VEC_safe_push (gimple, heap,
1362 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1363 DR_STMT (dr));
1364 }
1365 }
1366
1367 /* Versioning requires at least one misaligned data reference. */
10095225 1368 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
fb85abff 1369 do_versioning = false;
1370 else if (!do_versioning)
1371 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1372 }
1373
1374 if (do_versioning)
1375 {
1376 VEC(gimple,heap) *may_misalign_stmts
1377 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1378 gimple stmt;
1379
1380 /* It can now be assumed that the data references in the statements
1381 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1382 of the loop being vectorized. */
1383 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1384 {
1385 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1386 dr = STMT_VINFO_DATA_REF (stmt_info);
1387 SET_DR_MISALIGNMENT (dr, 0);
1388 if (vect_print_dump_info (REPORT_ALIGNMENT))
1389 fprintf (vect_dump, "Alignment of access forced using versioning.");
1390 }
1391
1392 if (vect_print_dump_info (REPORT_DETAILS))
1393 fprintf (vect_dump, "Versioning for alignment will be applied.");
1394
1395 /* Peeling and versioning can't be done together at this time. */
1396 gcc_assert (! (do_peeling && do_versioning));
1397
37545e54 1398 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1399 gcc_assert (stat);
1400 return stat;
1401 }
1402
1403 /* This point is reached if neither peeling nor versioning is being done. */
1404 gcc_assert (! (do_peeling || do_versioning));
1405
37545e54 1406 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1407 return stat;
1408}
1409
1410
1411/* Function vect_analyze_data_refs_alignment
1412
1413 Analyze the alignment of the data-references in the loop.
1414 Return FALSE if a data reference is found that cannot be vectorized. */
1415
1416bool
37545e54 1417vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1418 bb_vec_info bb_vinfo)
fb85abff 1419{
1420 if (vect_print_dump_info (REPORT_DETAILS))
1421 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1422
37545e54 1423 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
fb85abff 1424 {
f083cd24 1425 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1426 fprintf (vect_dump,
1427 "not vectorized: can't calculate alignment for data ref.");
1428 return false;
1429 }
1430
1431 return true;
1432}
1433
1434
1435/* Analyze groups of strided accesses: check that DR belongs to a group of
1436 strided accesses of legal size, step, etc. Detect gaps, single element
1437 interleaving, and other special cases. Set strided access info.
1438 Collect groups of strided stores for further use in SLP analysis. */
1439
1440static bool
1441vect_analyze_group_access (struct data_reference *dr)
1442{
1443 tree step = DR_STEP (dr);
1444 tree scalar_type = TREE_TYPE (DR_REF (dr));
1445 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1446 gimple stmt = DR_STMT (dr);
1447 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1448 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 1449 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
fb85abff 1450 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1451 HOST_WIDE_INT stride;
1452 bool slp_impossible = false;
1453
1454 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1455 interleaving group (including gaps). */
1456 stride = dr_step / type_size;
1457
1458 /* Not consecutive access is possible only if it is a part of interleaving. */
1459 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1460 {
1461 /* Check if it this DR is a part of interleaving, and is a single
1462 element of the group that is accessed in the loop. */
1463
1464 /* Gaps are supported only for loads. STEP must be a multiple of the type
1465 size. The size of the group must be a power of 2. */
1466 if (DR_IS_READ (dr)
1467 && (dr_step % type_size) == 0
1468 && stride > 0
1469 && exact_log2 (stride) != -1)
1470 {
1471 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1472 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1473 if (vect_print_dump_info (REPORT_DR_DETAILS))
1474 {
37545e54 1475 fprintf (vect_dump, "Detected single element interleaving ");
fb85abff 1476 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1477 fprintf (vect_dump, " step ");
1478 print_generic_expr (vect_dump, step, TDF_SLIM);
1479 }
1480 return true;
1481 }
1482 if (vect_print_dump_info (REPORT_DETAILS))
1483 fprintf (vect_dump, "not consecutive access");
1484 return false;
1485 }
1486
1487 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1488 {
1489 /* First stmt in the interleaving chain. Check the chain. */
1490 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1491 struct data_reference *data_ref = dr;
1a0e7d51 1492 unsigned int count = 1;
fb85abff 1493 tree next_step;
1494 tree prev_init = DR_INIT (data_ref);
1495 gimple prev = stmt;
1a0e7d51 1496 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
fb85abff 1497
1498 while (next)
1499 {
1500 /* Skip same data-refs. In case that two or more stmts share data-ref
1501 (supported only for loads), we vectorize only the first stmt, and
1502 the rest get their vectorized loads from the first one. */
1503 if (!tree_int_cst_compare (DR_INIT (data_ref),
1504 DR_INIT (STMT_VINFO_DATA_REF (
1505 vinfo_for_stmt (next)))))
1506 {
1507 if (!DR_IS_READ (data_ref))
1508 {
1509 if (vect_print_dump_info (REPORT_DETAILS))
1510 fprintf (vect_dump, "Two store stmts share the same dr.");
1511 return false;
1512 }
1513
1514 /* Check that there is no load-store dependencies for this loads
1515 to prevent a case of load-store-load to the same location. */
1516 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1517 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1518 {
1519 if (vect_print_dump_info (REPORT_DETAILS))
1520 fprintf (vect_dump,
1521 "READ_WRITE dependence in interleaving.");
1522 return false;
1523 }
1524
1525 /* For load use the same data-ref load. */
1526 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1527
1528 prev = next;
1529 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1530 continue;
1531 }
1532 prev = next;
1533
1534 /* Check that all the accesses have the same STEP. */
1535 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1536 if (tree_int_cst_compare (step, next_step))
1537 {
1538 if (vect_print_dump_info (REPORT_DETAILS))
1539 fprintf (vect_dump, "not consecutive access in interleaving");
1540 return false;
1541 }
1542
1543 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1544 /* Check that the distance between two accesses is equal to the type
1545 size. Otherwise, we have gaps. */
1546 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1547 - TREE_INT_CST_LOW (prev_init)) / type_size;
1548 if (diff != 1)
1549 {
1550 /* FORNOW: SLP of accesses with gaps is not supported. */
1551 slp_impossible = true;
1552 if (!DR_IS_READ (data_ref))
1553 {
1554 if (vect_print_dump_info (REPORT_DETAILS))
1555 fprintf (vect_dump, "interleaved store with gaps");
1556 return false;
1557 }
b11576bf 1558
1559 gaps += diff - 1;
fb85abff 1560 }
1561
1562 /* Store the gap from the previous member of the group. If there is no
1563 gap in the access, DR_GROUP_GAP is always 1. */
1564 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1565
1566 prev_init = DR_INIT (data_ref);
1567 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1568 /* Count the number of data-refs in the chain. */
1569 count++;
1570 }
1571
1572 /* COUNT is the number of accesses found, we multiply it by the size of
1573 the type to get COUNT_IN_BYTES. */
1574 count_in_bytes = type_size * count;
1575
37545e54 1576 /* Check that the size of the interleaving (including gaps) is not
1577 greater than STEP. */
b11576bf 1578 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
fb85abff 1579 {
1580 if (vect_print_dump_info (REPORT_DETAILS))
1581 {
1582 fprintf (vect_dump, "interleaving size is greater than step for ");
1583 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1584 }
1585 return false;
1586 }
1587
1588 /* Check that the size of the interleaving is equal to STEP for stores,
1589 i.e., that there are no gaps. */
37545e54 1590 if (dr_step && dr_step != count_in_bytes)
fb85abff 1591 {
1592 if (DR_IS_READ (dr))
1593 {
1594 slp_impossible = true;
1595 /* There is a gap after the last load in the group. This gap is a
1596 difference between the stride and the number of elements. When
1597 there is no gap, this difference should be 0. */
1598 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1599 }
1600 else
1601 {
1602 if (vect_print_dump_info (REPORT_DETAILS))
1603 fprintf (vect_dump, "interleaved store with gaps");
1604 return false;
1605 }
1606 }
1607
1608 /* Check that STEP is a multiple of type size. */
37545e54 1609 if (dr_step && (dr_step % type_size) != 0)
fb85abff 1610 {
1611 if (vect_print_dump_info (REPORT_DETAILS))
1612 {
1613 fprintf (vect_dump, "step is not a multiple of type size: step ");
1614 print_generic_expr (vect_dump, step, TDF_SLIM);
1615 fprintf (vect_dump, " size ");
1616 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1617 TDF_SLIM);
1618 }
1619 return false;
1620 }
1621
1622 /* FORNOW: we handle only interleaving that is a power of 2.
1623 We don't fail here if it may be still possible to vectorize the
1624 group using SLP. If not, the size of the group will be checked in
1625 vect_analyze_operations, and the vectorization will fail. */
1626 if (exact_log2 (stride) == -1)
1627 {
1628 if (vect_print_dump_info (REPORT_DETAILS))
1629 fprintf (vect_dump, "interleaving is not a power of 2");
1630
1631 if (slp_impossible)
1632 return false;
1633 }
37545e54 1634
1635 if (stride == 0)
1636 stride = count;
1637
fb85abff 1638 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1639 if (vect_print_dump_info (REPORT_DETAILS))
1640 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1641
1642 /* SLP: create an SLP data structure for every interleaving group of
1643 stores for further analysis in vect_analyse_slp. */
1644 if (!DR_IS_READ (dr) && !slp_impossible)
37545e54 1645 {
1646 if (loop_vinfo)
1647 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1648 stmt);
1649 if (bb_vinfo)
1650 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1651 stmt);
1652 }
fb85abff 1653 }
1654
1655 return true;
1656}
1657
1658
1659/* Analyze the access pattern of the data-reference DR.
1660 In case of non-consecutive accesses call vect_analyze_group_access() to
1661 analyze groups of strided accesses. */
1662
1663static bool
1664vect_analyze_data_ref_access (struct data_reference *dr)
1665{
1666 tree step = DR_STEP (dr);
1667 tree scalar_type = TREE_TYPE (DR_REF (dr));
1668 gimple stmt = DR_STMT (dr);
1669 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1670 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 1671 struct loop *loop = NULL;
fb85abff 1672 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1673
37545e54 1674 if (loop_vinfo)
1675 loop = LOOP_VINFO_LOOP (loop_vinfo);
1676
1677 if (loop_vinfo && !step)
fb85abff 1678 {
1679 if (vect_print_dump_info (REPORT_DETAILS))
37545e54 1680 fprintf (vect_dump, "bad data-ref access in loop");
fb85abff 1681 return false;
1682 }
1683
37545e54 1684 /* Don't allow invariant accesses in loops. */
1685 if (loop_vinfo && dr_step == 0)
fb85abff 1686 return false;
1687
37545e54 1688 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 1689 {
1690 /* Interleaved accesses are not yet supported within outer-loop
1691 vectorization for references in the inner-loop. */
1692 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1693
1694 /* For the rest of the analysis we use the outer-loop step. */
1695 step = STMT_VINFO_DR_STEP (stmt_info);
1696 dr_step = TREE_INT_CST_LOW (step);
1697
1698 if (dr_step == 0)
1699 {
1700 if (vect_print_dump_info (REPORT_ALIGNMENT))
1701 fprintf (vect_dump, "zero step in outer loop.");
1702 if (DR_IS_READ (dr))
1703 return true;
1704 else
1705 return false;
1706 }
1707 }
1708
1709 /* Consecutive? */
1710 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1711 {
1712 /* Mark that it is not interleaving. */
1713 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1714 return true;
1715 }
1716
37545e54 1717 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 1718 {
1719 if (vect_print_dump_info (REPORT_ALIGNMENT))
1720 fprintf (vect_dump, "strided access in outer loop.");
1721 return false;
1722 }
1723
1724 /* Not consecutive access - check if it's a part of interleaving group. */
1725 return vect_analyze_group_access (dr);
1726}
1727
1728
1729/* Function vect_analyze_data_ref_accesses.
1730
1731 Analyze the access pattern of all the data references in the loop.
1732
1733 FORNOW: the only access pattern that is considered vectorizable is a
1734 simple step 1 (consecutive) access.
1735
1736 FORNOW: handle only arrays and pointer accesses. */
1737
1738bool
37545e54 1739vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 1740{
1741 unsigned int i;
37545e54 1742 VEC (data_reference_p, heap) *datarefs;
fb85abff 1743 struct data_reference *dr;
1744
1745 if (vect_print_dump_info (REPORT_DETAILS))
1746 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1747
37545e54 1748 if (loop_vinfo)
1749 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1750 else
1751 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1752
fb85abff 1753 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1754 if (!vect_analyze_data_ref_access (dr))
1755 {
f083cd24 1756 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1757 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1758 return false;
1759 }
1760
1761 return true;
1762}
1763
1764/* Function vect_prune_runtime_alias_test_list.
1765
1766 Prune a list of ddrs to be tested at run-time by versioning for alias.
1767 Return FALSE if resulting list of ddrs is longer then allowed by
1768 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1769
1770bool
1771vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1772{
1773 VEC (ddr_p, heap) * ddrs =
1774 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1775 unsigned i, j;
1776
1777 if (vect_print_dump_info (REPORT_DETAILS))
1778 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1779
1780 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1781 {
1782 bool found;
1783 ddr_p ddr_i;
1784
1785 ddr_i = VEC_index (ddr_p, ddrs, i);
1786 found = false;
1787
1788 for (j = 0; j < i; j++)
1789 {
1790 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1791
1792 if (vect_vfa_range_equal (ddr_i, ddr_j))
1793 {
1794 if (vect_print_dump_info (REPORT_DR_DETAILS))
1795 {
1796 fprintf (vect_dump, "found equal ranges ");
1797 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1798 fprintf (vect_dump, ", ");
1799 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1800 fprintf (vect_dump, " and ");
1801 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1802 fprintf (vect_dump, ", ");
1803 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1804 }
1805 found = true;
1806 break;
1807 }
1808 }
1809
1810 if (found)
1811 {
1812 VEC_ordered_remove (ddr_p, ddrs, i);
1813 continue;
1814 }
1815 i++;
1816 }
1817
1818 if (VEC_length (ddr_p, ddrs) >
1819 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1820 {
1821 if (vect_print_dump_info (REPORT_DR_DETAILS))
1822 {
1823 fprintf (vect_dump,
1824 "disable versioning for alias - max number of generated "
1825 "checks exceeded.");
1826 }
1827
1828 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1829
1830 return false;
1831 }
1832
1833 return true;
1834}
1835
1836
1837/* Function vect_analyze_data_refs.
1838
37545e54 1839 Find all the data references in the loop or basic block.
fb85abff 1840
1841 The general structure of the analysis of data refs in the vectorizer is as
1842 follows:
37545e54 1843 1- vect_analyze_data_refs(loop/bb): call
1844 compute_data_dependences_for_loop/bb to find and analyze all data-refs
1845 in the loop/bb and their dependences.
fb85abff 1846 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1847 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1848 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1849
1850*/
1851
1852bool
37545e54 1853vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 1854{
37545e54 1855 struct loop *loop = NULL;
1856 basic_block bb = NULL;
fb85abff 1857 unsigned int i;
1858 VEC (data_reference_p, heap) *datarefs;
1859 struct data_reference *dr;
1860 tree scalar_type;
1861
1862 if (vect_print_dump_info (REPORT_DETAILS))
1863 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
37545e54 1864
1865 if (loop_vinfo)
1866 {
1867 loop = LOOP_VINFO_LOOP (loop_vinfo);
1868 compute_data_dependences_for_loop (loop, true,
1869 &LOOP_VINFO_DATAREFS (loop_vinfo),
1870 &LOOP_VINFO_DDRS (loop_vinfo));
1871 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1872 }
1873 else
1874 {
1875 bb = BB_VINFO_BB (bb_vinfo);
1876 compute_data_dependences_for_bb (bb, true,
1877 &BB_VINFO_DATAREFS (bb_vinfo),
1878 &BB_VINFO_DDRS (bb_vinfo));
1879 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1880 }
fb85abff 1881
1882 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1883 from stmt_vec_info struct to DR and vectype. */
fb85abff 1884
1885 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1886 {
1887 gimple stmt;
1888 stmt_vec_info stmt_info;
1889 basic_block bb;
1890 tree base, offset, init;
1891
1892 if (!dr || !DR_REF (dr))
1893 {
f083cd24 1894 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1895 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1896 return false;
1897 }
1898
1899 stmt = DR_STMT (dr);
1900 stmt_info = vinfo_for_stmt (stmt);
1901
1902 /* Check that analysis of the data-ref succeeded. */
1903 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1904 || !DR_STEP (dr))
1905 {
f083cd24 1906 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1907 {
1908 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1909 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1910 }
1911 return false;
1912 }
1913
1914 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1915 {
f083cd24 1916 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1917 fprintf (vect_dump, "not vectorized: base addr of dr is a "
1918 "constant");
1919 return false;
1920 }
1921
fb85abff 1922 base = unshare_expr (DR_BASE_ADDRESS (dr));
1923 offset = unshare_expr (DR_OFFSET (dr));
1924 init = unshare_expr (DR_INIT (dr));
1925
1926 /* Update DR field in stmt_vec_info struct. */
1927 bb = gimple_bb (stmt);
1928
1929 /* If the dataref is in an inner-loop of the loop that is considered for
1930 for vectorization, we also want to analyze the access relative to
1931 the outer-loop (DR contains information only relative to the
1932 inner-most enclosing loop). We do that by building a reference to the
1933 first location accessed by the inner-loop, and analyze it relative to
1934 the outer-loop. */
37545e54 1935 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 1936 {
1937 tree outer_step, outer_base, outer_init;
1938 HOST_WIDE_INT pbitsize, pbitpos;
1939 tree poffset;
1940 enum machine_mode pmode;
1941 int punsignedp, pvolatilep;
1942 affine_iv base_iv, offset_iv;
1943 tree dinit;
1944
1945 /* Build a reference to the first location accessed by the
1946 inner-loop: *(BASE+INIT). (The first location is actually
1947 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1948 tree inner_base = build_fold_indirect_ref
1949 (fold_build2 (POINTER_PLUS_EXPR,
1950 TREE_TYPE (base), base,
1951 fold_convert (sizetype, init)));
1952
1953 if (vect_print_dump_info (REPORT_DETAILS))
1954 {
1955 fprintf (vect_dump, "analyze in outer-loop: ");
1956 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1957 }
1958
1959 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1960 &poffset, &pmode, &punsignedp, &pvolatilep, false);
1961 gcc_assert (outer_base != NULL_TREE);
1962
1963 if (pbitpos % BITS_PER_UNIT != 0)
1964 {
1965 if (vect_print_dump_info (REPORT_DETAILS))
1966 fprintf (vect_dump, "failed: bit offset alignment.\n");
1967 return false;
1968 }
1969
1970 outer_base = build_fold_addr_expr (outer_base);
1971 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1972 &base_iv, false))
1973 {
1974 if (vect_print_dump_info (REPORT_DETAILS))
1975 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1976 return false;
1977 }
1978
1979 if (offset)
1980 {
1981 if (poffset)
1982 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
1983 poffset);
1984 else
1985 poffset = offset;
1986 }
1987
1988 if (!poffset)
1989 {
1990 offset_iv.base = ssize_int (0);
1991 offset_iv.step = ssize_int (0);
1992 }
1993 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
1994 &offset_iv, false))
1995 {
1996 if (vect_print_dump_info (REPORT_DETAILS))
1997 fprintf (vect_dump, "evolution of offset is not affine.\n");
1998 return false;
1999 }
2000
2001 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2002 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2003 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2004 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2005 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2006
2007 outer_step = size_binop (PLUS_EXPR,
2008 fold_convert (ssizetype, base_iv.step),
2009 fold_convert (ssizetype, offset_iv.step));
2010
2011 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2012 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2013 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2014 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2015 STMT_VINFO_DR_OFFSET (stmt_info) =
2016 fold_convert (ssizetype, offset_iv.base);
2017 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2018 size_int (highest_pow2_factor (offset_iv.base));
2019
2020 if (vect_print_dump_info (REPORT_DETAILS))
2021 {
2022 fprintf (vect_dump, "\touter base_address: ");
2023 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2024 fprintf (vect_dump, "\n\touter offset from base address: ");
2025 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2026 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2027 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2028 fprintf (vect_dump, "\n\touter step: ");
2029 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2030 fprintf (vect_dump, "\n\touter aligned to: ");
2031 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2032 }
2033 }
2034
2035 if (STMT_VINFO_DATA_REF (stmt_info))
2036 {
f083cd24 2037 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 2038 {
2039 fprintf (vect_dump,
2040 "not vectorized: more than one data ref in stmt: ");
2041 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2042 }
2043 return false;
2044 }
f083cd24 2045
fb85abff 2046 STMT_VINFO_DATA_REF (stmt_info) = dr;
2047
2048 /* Set vectype for STMT. */
2049 scalar_type = TREE_TYPE (DR_REF (dr));
2050 STMT_VINFO_VECTYPE (stmt_info) =
2051 get_vectype_for_scalar_type (scalar_type);
2052 if (!STMT_VINFO_VECTYPE (stmt_info))
2053 {
f083cd24 2054 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 2055 {
2056 fprintf (vect_dump,
2057 "not vectorized: no vectype for stmt: ");
2058 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2059 fprintf (vect_dump, " scalar_type: ");
2060 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2061 }
2062 return false;
2063 }
2064 }
2065
2066 return true;
2067}
2068
2069
2070/* Function vect_get_new_vect_var.
2071
2072 Returns a name for a new variable. The current naming scheme appends the
2073 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2074 the name of vectorizer generated variables, and appends that to NAME if
2075 provided. */
2076
2077tree
2078vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2079{
2080 const char *prefix;
2081 tree new_vect_var;
2082
2083 switch (var_kind)
2084 {
2085 case vect_simple_var:
2086 prefix = "vect_";
2087 break;
2088 case vect_scalar_var:
2089 prefix = "stmp_";
2090 break;
2091 case vect_pointer_var:
2092 prefix = "vect_p";
2093 break;
2094 default:
2095 gcc_unreachable ();
2096 }
2097
2098 if (name)
2099 {
2100 char* tmp = concat (prefix, name, NULL);
2101 new_vect_var = create_tmp_var (type, tmp);
2102 free (tmp);
2103 }
2104 else
2105 new_vect_var = create_tmp_var (type, prefix);
2106
2107 /* Mark vector typed variable as a gimple register variable. */
2108 if (TREE_CODE (type) == VECTOR_TYPE)
2109 DECL_GIMPLE_REG_P (new_vect_var) = true;
2110
2111 return new_vect_var;
2112}
2113
2114
2115/* Function vect_create_addr_base_for_vector_ref.
2116
2117 Create an expression that computes the address of the first memory location
2118 that will be accessed for a data reference.
2119
2120 Input:
2121 STMT: The statement containing the data reference.
2122 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2123 OFFSET: Optional. If supplied, it is be added to the initial address.
2124 LOOP: Specify relative to which loop-nest should the address be computed.
2125 For example, when the dataref is in an inner-loop nested in an
2126 outer-loop that is now being vectorized, LOOP can be either the
2127 outer-loop, or the inner-loop. The first memory location accessed
2128 by the following dataref ('in' points to short):
2129
2130 for (i=0; i<N; i++)
2131 for (j=0; j<M; j++)
2132 s += in[i+j]
2133
2134 is as follows:
2135 if LOOP=i_loop: &in (relative to i_loop)
2136 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2137
2138 Output:
2139 1. Return an SSA_NAME whose value is the address of the memory location of
2140 the first vector of the data reference.
2141 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2142 these statement(s) which define the returned SSA_NAME.
2143
2144 FORNOW: We are only handling array accesses with step 1. */
2145
2146tree
2147vect_create_addr_base_for_vector_ref (gimple stmt,
2148 gimple_seq *new_stmt_list,
2149 tree offset,
2150 struct loop *loop)
2151{
2152 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2153 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
fb85abff 2154 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2155 tree base_name;
2156 tree data_ref_base_var;
2157 tree vec_stmt;
2158 tree addr_base, addr_expr;
2159 tree dest;
2160 gimple_seq seq = NULL;
2161 tree base_offset = unshare_expr (DR_OFFSET (dr));
2162 tree init = unshare_expr (DR_INIT (dr));
f083cd24 2163 tree vect_ptr_type;
fb85abff 2164 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
37545e54 2165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
fb85abff 2166
37545e54 2167 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
fb85abff 2168 {
37545e54 2169 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 2170
37545e54 2171 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
fb85abff 2172
2173 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2174 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2175 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2176 }
2177
37545e54 2178 if (loop_vinfo)
2179 base_name = build_fold_indirect_ref (data_ref_base);
2180 else
2181 {
2182 base_offset = ssize_int (0);
2183 init = ssize_int (0);
2184 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2185 }
2186
fb85abff 2187 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2188 add_referenced_var (data_ref_base_var);
2189 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2190 data_ref_base_var);
2191 gimple_seq_add_seq (new_stmt_list, seq);
2192
2193 /* Create base_offset */
2194 base_offset = size_binop (PLUS_EXPR,
2195 fold_convert (sizetype, base_offset),
2196 fold_convert (sizetype, init));
2197 dest = create_tmp_var (sizetype, "base_off");
2198 add_referenced_var (dest);
2199 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2200 gimple_seq_add_seq (new_stmt_list, seq);
2201
2202 if (offset)
2203 {
2204 tree tmp = create_tmp_var (sizetype, "offset");
2205
2206 add_referenced_var (tmp);
2207 offset = fold_build2 (MULT_EXPR, sizetype,
2208 fold_convert (sizetype, offset), step);
2209 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2210 base_offset, offset);
2211 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2212 gimple_seq_add_seq (new_stmt_list, seq);
2213 }
2214
2215 /* base + base_offset */
37545e54 2216 if (loop_vinfo)
2217 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2218 data_ref_base, base_offset);
2219 else
2220 {
2221 if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2222 addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2223 else
2224 addr_base = build1 (ADDR_EXPR,
2225 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2226 unshare_expr (DR_REF (dr)));
2227 }
2228
fb85abff 2229 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2230
f083cd24 2231 vec_stmt = fold_convert (vect_ptr_type, addr_base);
fb85abff 2232 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2233 get_name (base_name));
2234 add_referenced_var (addr_expr);
f083cd24 2235 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
fb85abff 2236 gimple_seq_add_seq (new_stmt_list, seq);
2237
2238 if (vect_print_dump_info (REPORT_DETAILS))
2239 {
2240 fprintf (vect_dump, "created ");
2241 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2242 }
f083cd24 2243
fb85abff 2244 return vec_stmt;
2245}
2246
2247
2248/* Function vect_create_data_ref_ptr.
2249
2250 Create a new pointer to vector type (vp), that points to the first location
2251 accessed in the loop by STMT, along with the def-use update chain to
2252 appropriately advance the pointer through the loop iterations. Also set
2253 aliasing information for the pointer. This vector pointer is used by the
2254 callers to this function to create a memory reference expression for vector
2255 load/store access.
2256
2257 Input:
2258 1. STMT: a stmt that references memory. Expected to be of the form
2259 GIMPLE_ASSIGN <name, data-ref> or
2260 GIMPLE_ASSIGN <data-ref, name>.
2261 2. AT_LOOP: the loop where the vector memref is to be created.
2262 3. OFFSET (optional): an offset to be added to the initial address accessed
2263 by the data-ref in STMT.
2264 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2265 pointing to the initial address.
2266 5. TYPE: if not NULL indicates the required type of the data-ref.
2267
2268 Output:
2269 1. Declare a new ptr to vector_type, and have it point to the base of the
2270 data reference (initial addressed accessed by the data reference).
2271 For example, for vector of type V8HI, the following code is generated:
2272
2273 v8hi *vp;
2274 vp = (v8hi *)initial_address;
2275
2276 if OFFSET is not supplied:
2277 initial_address = &a[init];
2278 if OFFSET is supplied:
2279 initial_address = &a[init + OFFSET];
2280
2281 Return the initial_address in INITIAL_ADDRESS.
2282
2283 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2284 update the pointer in each iteration of the loop.
2285
2286 Return the increment stmt that updates the pointer in PTR_INCR.
2287
2288 3. Set INV_P to true if the access pattern of the data reference in the
2289 vectorized loop is invariant. Set it to false otherwise.
2290
2291 4. Return the pointer. */
2292
2293tree
2294vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2295 tree offset, tree *initial_address, gimple *ptr_incr,
dd277d48 2296 bool only_init, bool *inv_p)
fb85abff 2297{
2298 tree base_name;
2299 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2300 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 2301 struct loop *loop = NULL;
2302 bool nested_in_vect_loop = false;
2303 struct loop *containing_loop = NULL;
fb85abff 2304 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2305 tree vect_ptr_type;
2306 tree vect_ptr;
fb85abff 2307 tree new_temp;
2308 gimple vec_stmt;
2309 gimple_seq new_stmt_list = NULL;
37545e54 2310 edge pe = NULL;
fb85abff 2311 basic_block new_bb;
2312 tree vect_ptr_init;
2313 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2314 tree vptr;
2315 gimple_stmt_iterator incr_gsi;
2316 bool insert_after;
2317 tree indx_before_incr, indx_after_incr;
2318 gimple incr;
2319 tree step;
37545e54 2320 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2321 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2322
2323 if (loop_vinfo)
2324 {
2325 loop = LOOP_VINFO_LOOP (loop_vinfo);
2326 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2327 containing_loop = (gimple_bb (stmt))->loop_father;
2328 pe = loop_preheader_edge (loop);
2329 }
2330 else
2331 {
2332 gcc_assert (bb_vinfo);
2333 only_init = true;
2334 *ptr_incr = NULL;
2335 }
2336
fb85abff 2337 /* Check the step (evolution) of the load in LOOP, and record
2338 whether it's invariant. */
2339 if (nested_in_vect_loop)
2340 step = STMT_VINFO_DR_STEP (stmt_info);
2341 else
2342 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2343
2344 if (tree_int_cst_compare (step, size_zero_node) == 0)
2345 *inv_p = true;
2346 else
2347 *inv_p = false;
2348
2349 /* Create an expression for the first address accessed by this load
2350 in LOOP. */
2351 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2352
2353 if (vect_print_dump_info (REPORT_DETAILS))
2354 {
2355 tree data_ref_base = base_name;
2356 fprintf (vect_dump, "create vector-pointer variable to type: ");
2357 print_generic_expr (vect_dump, vectype, TDF_SLIM);
10095225 2358 if (TREE_CODE (data_ref_base) == VAR_DECL
2359 || TREE_CODE (data_ref_base) == ARRAY_REF)
2360 fprintf (vect_dump, " vectorizing an array ref: ");
fb85abff 2361 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2362 fprintf (vect_dump, " vectorizing a record based array ref: ");
2363 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2364 fprintf (vect_dump, " vectorizing a pointer ref: ");
2365 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2366 }
2367
2368 /** (1) Create the new vector-pointer variable: **/
dd277d48 2369 vect_ptr_type = build_pointer_type (vectype);
fb85abff 2370 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2371 get_name (base_name));
a34701c9 2372
2373 /* Vector types inherit the alias set of their component type by default so
2374 we need to use a ref-all pointer if the data reference does not conflict
2375 with the created vector data reference because it is not addressable. */
2376 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2377 get_alias_set (DR_REF (dr))))
2378 {
98155838 2379 vect_ptr_type
2380 = build_pointer_type_for_mode (vectype,
2381 TYPE_MODE (vect_ptr_type), true);
a34701c9 2382 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2383 get_name (base_name));
2384 }
2385
2386 /* Likewise for any of the data references in the stmt group. */
2387 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
fb85abff 2388 {
dd277d48 2389 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2390 do
2391 {
2392 tree lhs = gimple_assign_lhs (orig_stmt);
2393 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2394 get_alias_set (lhs)))
2395 {
a34701c9 2396 vect_ptr_type
98155838 2397 = build_pointer_type_for_mode (vectype,
2398 TYPE_MODE (vect_ptr_type), true);
a34701c9 2399 vect_ptr
2400 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2401 get_name (base_name));
dd277d48 2402 break;
2403 }
2404
2405 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2406 }
2407 while (orig_stmt);
fb85abff 2408 }
2409
2410 add_referenced_var (vect_ptr);
2411
fb85abff 2412 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2413 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2414 def-use update cycles for the pointer: One relative to the outer-loop
2415 (LOOP), which is what steps (3) and (4) below do. The other is relative
2416 to the inner-loop (which is the inner-most loop containing the dataref),
2417 and this is done be step (5) below.
2418
2419 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2420 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2421 redundant. Steps (3),(4) create the following:
2422
2423 vp0 = &base_addr;
2424 LOOP: vp1 = phi(vp0,vp2)
2425 ...
2426 ...
2427 vp2 = vp1 + step
2428 goto LOOP
2429
2430 If there is an inner-loop nested in loop, then step (5) will also be
2431 applied, and an additional update in the inner-loop will be created:
2432
2433 vp0 = &base_addr;
2434 LOOP: vp1 = phi(vp0,vp2)
2435 ...
2436 inner: vp3 = phi(vp1,vp4)
2437 vp4 = vp3 + inner_step
2438 if () goto inner
2439 ...
2440 vp2 = vp1 + step
2441 if () goto LOOP */
2442
2443 /** (3) Calculate the initial address the vector-pointer, and set
2444 the vector-pointer to point to it before the loop: **/
2445
2446 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2447
2448 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2449 offset, loop);
fb85abff 2450 if (new_stmt_list)
2451 {
37545e54 2452 if (pe)
2453 {
2454 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2455 gcc_assert (!new_bb);
2456 }
2457 else
2458 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
fb85abff 2459 }
2460
2461 *initial_address = new_temp;
2462
2463 /* Create: p = (vectype *) initial_base */
2464 vec_stmt = gimple_build_assign (vect_ptr,
2465 fold_convert (vect_ptr_type, new_temp));
2466 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2467 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
37545e54 2468 if (pe)
2469 {
2470 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2471 gcc_assert (!new_bb);
2472 }
2473 else
2474 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
fb85abff 2475
2476 /** (4) Handle the updating of the vector-pointer inside the loop.
2477 This is needed when ONLY_INIT is false, and also when AT_LOOP
2478 is the inner-loop nested in LOOP (during outer-loop vectorization).
2479 **/
2480
37545e54 2481 /* No update in loop is required. */
2482 if (only_init && (!loop_vinfo || at_loop == loop))
fb85abff 2483 {
2484 /* Copy the points-to information if it exists. */
2485 if (DR_PTR_INFO (dr))
2486 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2487 vptr = vect_ptr_init;
2488 }
2489 else
2490 {
2491 /* The step of the vector pointer is the Vector Size. */
2492 tree step = TYPE_SIZE_UNIT (vectype);
2493 /* One exception to the above is when the scalar step of the load in
2494 LOOP is zero. In this case the step here is also zero. */
2495 if (*inv_p)
2496 step = size_zero_node;
2497
2498 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2499
2500 create_iv (vect_ptr_init,
2501 fold_convert (vect_ptr_type, step),
2502 vect_ptr, loop, &incr_gsi, insert_after,
2503 &indx_before_incr, &indx_after_incr);
2504 incr = gsi_stmt (incr_gsi);
37545e54 2505 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
fb85abff 2506
2507 /* Copy the points-to information if it exists. */
2508 if (DR_PTR_INFO (dr))
2509 {
2510 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2511 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2512 }
fb85abff 2513 if (ptr_incr)
2514 *ptr_incr = incr;
2515
2516 vptr = indx_before_incr;
2517 }
2518
2519 if (!nested_in_vect_loop || only_init)
2520 return vptr;
2521
2522
2523 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2524 nested in LOOP, if exists: **/
2525
2526 gcc_assert (nested_in_vect_loop);
2527 if (!only_init)
2528 {
2529 standard_iv_increment_position (containing_loop, &incr_gsi,
2530 &insert_after);
2531 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2532 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2533 &indx_after_incr);
2534 incr = gsi_stmt (incr_gsi);
37545e54 2535 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
fb85abff 2536
2537 /* Copy the points-to information if it exists. */
2538 if (DR_PTR_INFO (dr))
2539 {
2540 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2541 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2542 }
fb85abff 2543 if (ptr_incr)
2544 *ptr_incr = incr;
2545
2546 return indx_before_incr;
2547 }
2548 else
2549 gcc_unreachable ();
2550}
2551
2552
2553/* Function bump_vector_ptr
2554
2555 Increment a pointer (to a vector type) by vector-size. If requested,
2556 i.e. if PTR-INCR is given, then also connect the new increment stmt
2557 to the existing def-use update-chain of the pointer, by modifying
2558 the PTR_INCR as illustrated below:
2559
2560 The pointer def-use update-chain before this function:
2561 DATAREF_PTR = phi (p_0, p_2)
2562 ....
2563 PTR_INCR: p_2 = DATAREF_PTR + step
2564
2565 The pointer def-use update-chain after this function:
2566 DATAREF_PTR = phi (p_0, p_2)
2567 ....
2568 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2569 ....
2570 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2571
2572 Input:
2573 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2574 in the loop.
2575 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2576 the loop. The increment amount across iterations is expected
2577 to be vector_size.
2578 BSI - location where the new update stmt is to be placed.
2579 STMT - the original scalar memory-access stmt that is being vectorized.
2580 BUMP - optional. The offset by which to bump the pointer. If not given,
2581 the offset is assumed to be vector_size.
2582
2583 Output: Return NEW_DATAREF_PTR as illustrated above.
2584
2585*/
2586
2587tree
2588bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2589 gimple stmt, tree bump)
2590{
2591 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2592 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2593 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2594 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2595 tree update = TYPE_SIZE_UNIT (vectype);
2596 gimple incr_stmt;
2597 ssa_op_iter iter;
2598 use_operand_p use_p;
2599 tree new_dataref_ptr;
2600
2601 if (bump)
2602 update = bump;
2603
2604 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2605 dataref_ptr, update);
2606 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2607 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2608 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2609
2610 /* Copy the points-to information if it exists. */
2611 if (DR_PTR_INFO (dr))
2612 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
fb85abff 2613
2614 if (!ptr_incr)
2615 return new_dataref_ptr;
2616
2617 /* Update the vector-pointer's cross-iteration increment. */
2618 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2619 {
2620 tree use = USE_FROM_PTR (use_p);
2621
2622 if (use == dataref_ptr)
2623 SET_USE (use_p, new_dataref_ptr);
2624 else
2625 gcc_assert (tree_int_cst_compare (use, update) == 0);
2626 }
2627
2628 return new_dataref_ptr;
2629}
2630
2631
2632/* Function vect_create_destination_var.
2633
2634 Create a new temporary of type VECTYPE. */
2635
2636tree
2637vect_create_destination_var (tree scalar_dest, tree vectype)
2638{
2639 tree vec_dest;
2640 const char *new_name;
2641 tree type;
2642 enum vect_var_kind kind;
2643
2644 kind = vectype ? vect_simple_var : vect_scalar_var;
2645 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2646
2647 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2648
2649 new_name = get_name (scalar_dest);
2650 if (!new_name)
2651 new_name = "var_";
2652 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2653 add_referenced_var (vec_dest);
2654
2655 return vec_dest;
2656}
2657
2658/* Function vect_strided_store_supported.
2659
2660 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2661 and FALSE otherwise. */
2662
2663bool
2664vect_strided_store_supported (tree vectype)
2665{
2666 optab interleave_high_optab, interleave_low_optab;
2667 int mode;
2668
2669 mode = (int) TYPE_MODE (vectype);
2670
2671 /* Check that the operation is supported. */
2672 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2673 vectype, optab_default);
2674 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2675 vectype, optab_default);
2676 if (!interleave_high_optab || !interleave_low_optab)
2677 {
2678 if (vect_print_dump_info (REPORT_DETAILS))
2679 fprintf (vect_dump, "no optab for interleave.");
2680 return false;
2681 }
2682
2683 if (optab_handler (interleave_high_optab, mode)->insn_code
2684 == CODE_FOR_nothing
2685 || optab_handler (interleave_low_optab, mode)->insn_code
2686 == CODE_FOR_nothing)
2687 {
2688 if (vect_print_dump_info (REPORT_DETAILS))
2689 fprintf (vect_dump, "interleave op not supported by target.");
2690 return false;
2691 }
2692
2693 return true;
2694}
2695
2696
2697/* Function vect_permute_store_chain.
2698
2699 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2700 a power of 2, generate interleave_high/low stmts to reorder the data
2701 correctly for the stores. Return the final references for stores in
2702 RESULT_CHAIN.
2703
2704 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2705 The input is 4 vectors each containing 8 elements. We assign a number to each
2706 element, the input sequence is:
2707
2708 1st vec: 0 1 2 3 4 5 6 7
2709 2nd vec: 8 9 10 11 12 13 14 15
2710 3rd vec: 16 17 18 19 20 21 22 23
2711 4th vec: 24 25 26 27 28 29 30 31
2712
2713 The output sequence should be:
2714
2715 1st vec: 0 8 16 24 1 9 17 25
2716 2nd vec: 2 10 18 26 3 11 19 27
2717 3rd vec: 4 12 20 28 5 13 21 30
2718 4th vec: 6 14 22 30 7 15 23 31
2719
2720 i.e., we interleave the contents of the four vectors in their order.
2721
2722 We use interleave_high/low instructions to create such output. The input of
2723 each interleave_high/low operation is two vectors:
2724 1st vec 2nd vec
2725 0 1 2 3 4 5 6 7
2726 the even elements of the result vector are obtained left-to-right from the
2727 high/low elements of the first vector. The odd elements of the result are
2728 obtained left-to-right from the high/low elements of the second vector.
2729 The output of interleave_high will be: 0 4 1 5
2730 and of interleave_low: 2 6 3 7
2731
2732
2733 The permutation is done in log LENGTH stages. In each stage interleave_high
2734 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2735 where the first argument is taken from the first half of DR_CHAIN and the
2736 second argument from it's second half.
2737 In our example,
2738
2739 I1: interleave_high (1st vec, 3rd vec)
2740 I2: interleave_low (1st vec, 3rd vec)
2741 I3: interleave_high (2nd vec, 4th vec)
2742 I4: interleave_low (2nd vec, 4th vec)
2743
2744 The output for the first stage is:
2745
2746 I1: 0 16 1 17 2 18 3 19
2747 I2: 4 20 5 21 6 22 7 23
2748 I3: 8 24 9 25 10 26 11 27
2749 I4: 12 28 13 29 14 30 15 31
2750
2751 The output of the second stage, i.e. the final result is:
2752
2753 I1: 0 8 16 24 1 9 17 25
2754 I2: 2 10 18 26 3 11 19 27
2755 I3: 4 12 20 28 5 13 21 30
2756 I4: 6 14 22 30 7 15 23 31. */
2757
2758bool
2759vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2760 unsigned int length,
2761 gimple stmt,
2762 gimple_stmt_iterator *gsi,
2763 VEC(tree,heap) **result_chain)
2764{
2765 tree perm_dest, vect1, vect2, high, low;
2766 gimple perm_stmt;
2767 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2768 tree scalar_dest;
2769 int i;
2770 unsigned int j;
2771 enum tree_code high_code, low_code;
2772
2773 scalar_dest = gimple_assign_lhs (stmt);
2774
2775 /* Check that the operation is supported. */
2776 if (!vect_strided_store_supported (vectype))
2777 return false;
2778
2779 *result_chain = VEC_copy (tree, heap, dr_chain);
2780
2781 for (i = 0; i < exact_log2 (length); i++)
2782 {
2783 for (j = 0; j < length/2; j++)
2784 {
2785 vect1 = VEC_index (tree, dr_chain, j);
2786 vect2 = VEC_index (tree, dr_chain, j+length/2);
2787
2788 /* Create interleaving stmt:
2789 in the case of big endian:
2790 high = interleave_high (vect1, vect2)
2791 and in the case of little endian:
2792 high = interleave_low (vect1, vect2). */
2793 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2794 DECL_GIMPLE_REG_P (perm_dest) = 1;
2795 add_referenced_var (perm_dest);
2796 if (BYTES_BIG_ENDIAN)
2797 {
2798 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2799 low_code = VEC_INTERLEAVE_LOW_EXPR;
2800 }
2801 else
2802 {
2803 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2804 high_code = VEC_INTERLEAVE_LOW_EXPR;
2805 }
2806 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2807 vect1, vect2);
2808 high = make_ssa_name (perm_dest, perm_stmt);
2809 gimple_assign_set_lhs (perm_stmt, high);
2810 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2811 VEC_replace (tree, *result_chain, 2*j, high);
2812
2813 /* Create interleaving stmt:
2814 in the case of big endian:
2815 low = interleave_low (vect1, vect2)
2816 and in the case of little endian:
2817 low = interleave_high (vect1, vect2). */
2818 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2819 DECL_GIMPLE_REG_P (perm_dest) = 1;
2820 add_referenced_var (perm_dest);
2821 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2822 vect1, vect2);
2823 low = make_ssa_name (perm_dest, perm_stmt);
2824 gimple_assign_set_lhs (perm_stmt, low);
2825 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2826 VEC_replace (tree, *result_chain, 2*j+1, low);
2827 }
2828 dr_chain = VEC_copy (tree, heap, *result_chain);
2829 }
2830 return true;
2831}
2832
2833/* Function vect_setup_realignment
2834
2835 This function is called when vectorizing an unaligned load using
2836 the dr_explicit_realign[_optimized] scheme.
2837 This function generates the following code at the loop prolog:
2838
2839 p = initial_addr;
2840 x msq_init = *(floor(p)); # prolog load
2841 realignment_token = call target_builtin;
2842 loop:
2843 x msq = phi (msq_init, ---)
2844
2845 The stmts marked with x are generated only for the case of
2846 dr_explicit_realign_optimized.
2847
2848 The code above sets up a new (vector) pointer, pointing to the first
2849 location accessed by STMT, and a "floor-aligned" load using that pointer.
2850 It also generates code to compute the "realignment-token" (if the relevant
2851 target hook was defined), and creates a phi-node at the loop-header bb
2852 whose arguments are the result of the prolog-load (created by this
2853 function) and the result of a load that takes place in the loop (to be
2854 created by the caller to this function).
2855
2856 For the case of dr_explicit_realign_optimized:
2857 The caller to this function uses the phi-result (msq) to create the
2858 realignment code inside the loop, and sets up the missing phi argument,
2859 as follows:
2860 loop:
2861 msq = phi (msq_init, lsq)
2862 lsq = *(floor(p')); # load in loop
2863 result = realign_load (msq, lsq, realignment_token);
2864
2865 For the case of dr_explicit_realign:
2866 loop:
2867 msq = *(floor(p)); # load in loop
2868 p' = p + (VS-1);
2869 lsq = *(floor(p')); # load in loop
2870 result = realign_load (msq, lsq, realignment_token);
2871
2872 Input:
2873 STMT - (scalar) load stmt to be vectorized. This load accesses
2874 a memory location that may be unaligned.
2875 BSI - place where new code is to be inserted.
2876 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2877 is used.
2878
2879 Output:
2880 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2881 target hook, if defined.
2882 Return value - the result of the loop-header phi node. */
2883
2884tree
2885vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2886 tree *realignment_token,
2887 enum dr_alignment_support alignment_support_scheme,
2888 tree init_addr,
2889 struct loop **at_loop)
2890{
2891 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2892 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2893 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2894 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2895 edge pe;
2896 tree scalar_dest = gimple_assign_lhs (stmt);
2897 tree vec_dest;
2898 gimple inc;
2899 tree ptr;
2900 tree data_ref;
2901 gimple new_stmt;
2902 basic_block new_bb;
2903 tree msq_init = NULL_TREE;
2904 tree new_temp;
2905 gimple phi_stmt;
2906 tree msq = NULL_TREE;
2907 gimple_seq stmts = NULL;
2908 bool inv_p;
2909 bool compute_in_loop = false;
2910 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2911 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2912 struct loop *loop_for_initial_load;
2913
2914 gcc_assert (alignment_support_scheme == dr_explicit_realign
2915 || alignment_support_scheme == dr_explicit_realign_optimized);
2916
2917 /* We need to generate three things:
2918 1. the misalignment computation
2919 2. the extra vector load (for the optimized realignment scheme).
2920 3. the phi node for the two vectors from which the realignment is
2921 done (for the optimized realignment scheme).
2922 */
2923
2924 /* 1. Determine where to generate the misalignment computation.
2925
2926 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2927 calculation will be generated by this function, outside the loop (in the
2928 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2929 caller, inside the loop.
2930
2931 Background: If the misalignment remains fixed throughout the iterations of
2932 the loop, then both realignment schemes are applicable, and also the
2933 misalignment computation can be done outside LOOP. This is because we are
2934 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2935 are a multiple of VS (the Vector Size), and therefore the misalignment in
2936 different vectorized LOOP iterations is always the same.
2937 The problem arises only if the memory access is in an inner-loop nested
2938 inside LOOP, which is now being vectorized using outer-loop vectorization.
2939 This is the only case when the misalignment of the memory access may not
2940 remain fixed throughout the iterations of the inner-loop (as explained in
2941 detail in vect_supportable_dr_alignment). In this case, not only is the
2942 optimized realignment scheme not applicable, but also the misalignment
2943 computation (and generation of the realignment token that is passed to
2944 REALIGN_LOAD) have to be done inside the loop.
2945
2946 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2947 or not, which in turn determines if the misalignment is computed inside
2948 the inner-loop, or outside LOOP. */
2949
2950 if (init_addr != NULL_TREE)
2951 {
2952 compute_in_loop = true;
2953 gcc_assert (alignment_support_scheme == dr_explicit_realign);
2954 }
2955
2956
2957 /* 2. Determine where to generate the extra vector load.
2958
2959 For the optimized realignment scheme, instead of generating two vector
2960 loads in each iteration, we generate a single extra vector load in the
2961 preheader of the loop, and in each iteration reuse the result of the
2962 vector load from the previous iteration. In case the memory access is in
2963 an inner-loop nested inside LOOP, which is now being vectorized using
2964 outer-loop vectorization, we need to determine whether this initial vector
2965 load should be generated at the preheader of the inner-loop, or can be
2966 generated at the preheader of LOOP. If the memory access has no evolution
2967 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2968 to be generated inside LOOP (in the preheader of the inner-loop). */
2969
2970 if (nested_in_vect_loop)
2971 {
2972 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2973 bool invariant_in_outerloop =
2974 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2975 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2976 }
2977 else
2978 loop_for_initial_load = loop;
2979 if (at_loop)
2980 *at_loop = loop_for_initial_load;
2981
2982 /* 3. For the case of the optimized realignment, create the first vector
2983 load at the loop preheader. */
2984
2985 if (alignment_support_scheme == dr_explicit_realign_optimized)
2986 {
2987 /* Create msq_init = *(floor(p1)) in the loop preheader */
2988
2989 gcc_assert (!compute_in_loop);
2990 pe = loop_preheader_edge (loop_for_initial_load);
2991 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2992 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
dd277d48 2993 &init_addr, &inc, true, &inv_p);
fb85abff 2994 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2995 new_stmt = gimple_build_assign (vec_dest, data_ref);
2996 new_temp = make_ssa_name (vec_dest, new_stmt);
2997 gimple_assign_set_lhs (new_stmt, new_temp);
2998 mark_symbols_for_renaming (new_stmt);
2999 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3000 gcc_assert (!new_bb);
3001 msq_init = gimple_assign_lhs (new_stmt);
3002 }
3003
3004 /* 4. Create realignment token using a target builtin, if available.
3005 It is done either inside the containing loop, or before LOOP (as
3006 determined above). */
3007
3008 if (targetm.vectorize.builtin_mask_for_load)
3009 {
3010 tree builtin_decl;
3011
3012 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3013 if (compute_in_loop)
3014 gcc_assert (init_addr); /* already computed by the caller. */
3015 else
3016 {
3017 /* Generate the INIT_ADDR computation outside LOOP. */
3018 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3019 NULL_TREE, loop);
3020 pe = loop_preheader_edge (loop);
3021 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3022 gcc_assert (!new_bb);
3023 }
3024
3025 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3026 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3027 vec_dest =
3028 vect_create_destination_var (scalar_dest,
3029 gimple_call_return_type (new_stmt));
3030 new_temp = make_ssa_name (vec_dest, new_stmt);
3031 gimple_call_set_lhs (new_stmt, new_temp);
3032
3033 if (compute_in_loop)
3034 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3035 else
3036 {
3037 /* Generate the misalignment computation outside LOOP. */
3038 pe = loop_preheader_edge (loop);
3039 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3040 gcc_assert (!new_bb);
3041 }
3042
3043 *realignment_token = gimple_call_lhs (new_stmt);
3044
3045 /* The result of the CALL_EXPR to this builtin is determined from
3046 the value of the parameter and no global variables are touched
3047 which makes the builtin a "const" function. Requiring the
3048 builtin to have the "const" attribute makes it unnecessary
3049 to call mark_call_clobbered. */
3050 gcc_assert (TREE_READONLY (builtin_decl));
3051 }
3052
3053 if (alignment_support_scheme == dr_explicit_realign)
3054 return msq;
3055
3056 gcc_assert (!compute_in_loop);
3057 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3058
3059
3060 /* 5. Create msq = phi <msq_init, lsq> in loop */
3061
3062 pe = loop_preheader_edge (containing_loop);
3063 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3064 msq = make_ssa_name (vec_dest, NULL);
3065 phi_stmt = create_phi_node (msq, containing_loop->header);
3066 SSA_NAME_DEF_STMT (msq) = phi_stmt;
efbcb6de 3067 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
fb85abff 3068
3069 return msq;
3070}
3071
3072
3073/* Function vect_strided_load_supported.
3074
3075 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3076 and FALSE otherwise. */
3077
3078bool
3079vect_strided_load_supported (tree vectype)
3080{
3081 optab perm_even_optab, perm_odd_optab;
3082 int mode;
3083
3084 mode = (int) TYPE_MODE (vectype);
3085
3086 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3087 optab_default);
3088 if (!perm_even_optab)
3089 {
3090 if (vect_print_dump_info (REPORT_DETAILS))
3091 fprintf (vect_dump, "no optab for perm_even.");
3092 return false;
3093 }
3094
3095 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3096 {
3097 if (vect_print_dump_info (REPORT_DETAILS))
3098 fprintf (vect_dump, "perm_even op not supported by target.");
3099 return false;
3100 }
3101
3102 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3103 optab_default);
3104 if (!perm_odd_optab)
3105 {
3106 if (vect_print_dump_info (REPORT_DETAILS))
3107 fprintf (vect_dump, "no optab for perm_odd.");
3108 return false;
3109 }
3110
3111 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3112 {
3113 if (vect_print_dump_info (REPORT_DETAILS))
3114 fprintf (vect_dump, "perm_odd op not supported by target.");
3115 return false;
3116 }
3117 return true;
3118}
3119
3120
3121/* Function vect_permute_load_chain.
3122
3123 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3124 a power of 2, generate extract_even/odd stmts to reorder the input data
3125 correctly. Return the final references for loads in RESULT_CHAIN.
3126
3127 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3128 The input is 4 vectors each containing 8 elements. We assign a number to each
3129 element, the input sequence is:
3130
3131 1st vec: 0 1 2 3 4 5 6 7
3132 2nd vec: 8 9 10 11 12 13 14 15
3133 3rd vec: 16 17 18 19 20 21 22 23
3134 4th vec: 24 25 26 27 28 29 30 31
3135
3136 The output sequence should be:
3137
3138 1st vec: 0 4 8 12 16 20 24 28
3139 2nd vec: 1 5 9 13 17 21 25 29
3140 3rd vec: 2 6 10 14 18 22 26 30
3141 4th vec: 3 7 11 15 19 23 27 31
3142
3143 i.e., the first output vector should contain the first elements of each
3144 interleaving group, etc.
3145
3146 We use extract_even/odd instructions to create such output. The input of each
3147 extract_even/odd operation is two vectors
3148 1st vec 2nd vec
3149 0 1 2 3 4 5 6 7
3150
3151 and the output is the vector of extracted even/odd elements. The output of
3152 extract_even will be: 0 2 4 6
3153 and of extract_odd: 1 3 5 7
3154
3155
3156 The permutation is done in log LENGTH stages. In each stage extract_even and
3157 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3158 order. In our example,
3159
3160 E1: extract_even (1st vec, 2nd vec)
3161 E2: extract_odd (1st vec, 2nd vec)
3162 E3: extract_even (3rd vec, 4th vec)
3163 E4: extract_odd (3rd vec, 4th vec)
3164
3165 The output for the first stage will be:
3166
3167 E1: 0 2 4 6 8 10 12 14
3168 E2: 1 3 5 7 9 11 13 15
3169 E3: 16 18 20 22 24 26 28 30
3170 E4: 17 19 21 23 25 27 29 31
3171
3172 In order to proceed and create the correct sequence for the next stage (or
3173 for the correct output, if the second stage is the last one, as in our
3174 example), we first put the output of extract_even operation and then the
3175 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3176 The input for the second stage is:
3177
3178 1st vec (E1): 0 2 4 6 8 10 12 14
3179 2nd vec (E3): 16 18 20 22 24 26 28 30
3180 3rd vec (E2): 1 3 5 7 9 11 13 15
3181 4th vec (E4): 17 19 21 23 25 27 29 31
3182
3183 The output of the second stage:
3184
3185 E1: 0 4 8 12 16 20 24 28
3186 E2: 2 6 10 14 18 22 26 30
3187 E3: 1 5 9 13 17 21 25 29
3188 E4: 3 7 11 15 19 23 27 31
3189
3190 And RESULT_CHAIN after reordering:
3191
3192 1st vec (E1): 0 4 8 12 16 20 24 28
3193 2nd vec (E3): 1 5 9 13 17 21 25 29
3194 3rd vec (E2): 2 6 10 14 18 22 26 30
3195 4th vec (E4): 3 7 11 15 19 23 27 31. */
3196
3197bool
3198vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3199 unsigned int length,
3200 gimple stmt,
3201 gimple_stmt_iterator *gsi,
3202 VEC(tree,heap) **result_chain)
3203{
3204 tree perm_dest, data_ref, first_vect, second_vect;
3205 gimple perm_stmt;
3206 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3207 int i;
3208 unsigned int j;
3209
3210 /* Check that the operation is supported. */
3211 if (!vect_strided_load_supported (vectype))
3212 return false;
3213
3214 *result_chain = VEC_copy (tree, heap, dr_chain);
3215 for (i = 0; i < exact_log2 (length); i++)
3216 {
3217 for (j = 0; j < length; j +=2)
3218 {
3219 first_vect = VEC_index (tree, dr_chain, j);
3220 second_vect = VEC_index (tree, dr_chain, j+1);
3221
3222 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3223 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3224 DECL_GIMPLE_REG_P (perm_dest) = 1;
3225 add_referenced_var (perm_dest);
3226
3227 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3228 perm_dest, first_vect,
3229 second_vect);
3230
3231 data_ref = make_ssa_name (perm_dest, perm_stmt);
3232 gimple_assign_set_lhs (perm_stmt, data_ref);
3233 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3234 mark_symbols_for_renaming (perm_stmt);
3235
3236 VEC_replace (tree, *result_chain, j/2, data_ref);
3237
3238 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3239 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3240 DECL_GIMPLE_REG_P (perm_dest) = 1;
3241 add_referenced_var (perm_dest);
3242
3243 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3244 perm_dest, first_vect,
3245 second_vect);
3246 data_ref = make_ssa_name (perm_dest, perm_stmt);
3247 gimple_assign_set_lhs (perm_stmt, data_ref);
3248 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3249 mark_symbols_for_renaming (perm_stmt);
3250
3251 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3252 }
3253 dr_chain = VEC_copy (tree, heap, *result_chain);
3254 }
3255 return true;
3256}
3257
3258
3259/* Function vect_transform_strided_load.
3260
3261 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3262 to perform their permutation and ascribe the result vectorized statements to
3263 the scalar statements.
3264*/
3265
3266bool
3267vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3268 gimple_stmt_iterator *gsi)
3269{
3270 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3271 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3272 gimple next_stmt, new_stmt;
3273 VEC(tree,heap) *result_chain = NULL;
3274 unsigned int i, gap_count;
3275 tree tmp_data_ref;
3276
3277 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3278 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3279 vectors, that are ready for vector computation. */
3280 result_chain = VEC_alloc (tree, heap, size);
3281 /* Permute. */
3282 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3283 return false;
3284
3285 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3286 Since we scan the chain starting from it's first node, their order
3287 corresponds the order of data-refs in RESULT_CHAIN. */
3288 next_stmt = first_stmt;
3289 gap_count = 1;
3290 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3291 {
3292 if (!next_stmt)
3293 break;
3294
3295 /* Skip the gaps. Loads created for the gaps will be removed by dead
3296 code elimination pass later. No need to check for the first stmt in
3297 the group, since it always exists.
3298 DR_GROUP_GAP is the number of steps in elements from the previous
3299 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3300 correspond to the gaps.
3301 */
3302 if (next_stmt != first_stmt
3303 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3304 {
3305 gap_count++;
3306 continue;
3307 }
3308
3309 while (next_stmt)
3310 {
3311 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3312 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3313 copies, and we put the new vector statement in the first available
3314 RELATED_STMT. */
3315 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3316 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3317 else
3318 {
3319 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3320 {
3321 gimple prev_stmt =
3322 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3323 gimple rel_stmt =
3324 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3325 while (rel_stmt)
3326 {
3327 prev_stmt = rel_stmt;
3328 rel_stmt =
3329 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3330 }
3331
3332 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3333 new_stmt;
3334 }
3335 }
3336
3337 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3338 gap_count = 1;
3339 /* If NEXT_STMT accesses the same DR as the previous statement,
3340 put the same TMP_DATA_REF as its vectorized statement; otherwise
3341 get the next data-ref from RESULT_CHAIN. */
3342 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3343 break;
3344 }
3345 }
3346
3347 VEC_free (tree, heap, result_chain);
3348 return true;
3349}
3350
3351/* Function vect_force_dr_alignment_p.
3352
3353 Returns whether the alignment of a DECL can be forced to be aligned
3354 on ALIGNMENT bit boundary. */
3355
3356bool
3357vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3358{
3359 if (TREE_CODE (decl) != VAR_DECL)
3360 return false;
3361
3362 if (DECL_EXTERNAL (decl))
3363 return false;
3364
3365 if (TREE_ASM_WRITTEN (decl))
3366 return false;
3367
3368 if (TREE_STATIC (decl))
3369 return (alignment <= MAX_OFILE_ALIGNMENT);
3370 else
3371 return (alignment <= MAX_STACK_ALIGNMENT);
3372}
3373
3374/* Function vect_supportable_dr_alignment
3375
3376 Return whether the data reference DR is supported with respect to its
3377 alignment. */
3378
3379enum dr_alignment_support
3380vect_supportable_dr_alignment (struct data_reference *dr)
3381{
3382 gimple stmt = DR_STMT (dr);
3383 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3384 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
bc620c5c 3385 enum machine_mode mode = TYPE_MODE (vectype);
fb85abff 3386 bool invariant_in_outerloop = false;
37545e54 3387 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3388 struct loop *vect_loop = NULL;
3389 bool nested_in_vect_loop = false;
fb85abff 3390
3391 if (aligned_access_p (dr))
3392 return dr_aligned;
3393
37545e54 3394 if (!loop_vinfo)
3395 /* FORNOW: Misaligned accesses are supported only in loops. */
3396 return dr_unaligned_unsupported;
3397
3398 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3399 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3400
fb85abff 3401 if (nested_in_vect_loop)
3402 {
3403 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3404 invariant_in_outerloop =
3405 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3406 }
3407
3408 /* Possibly unaligned access. */
3409
3410 /* We can choose between using the implicit realignment scheme (generating
3411 a misaligned_move stmt) and the explicit realignment scheme (generating
3412 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3413 realignment scheme: optimized, and unoptimized.
3414 We can optimize the realignment only if the step between consecutive
3415 vector loads is equal to the vector size. Since the vector memory
3416 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3417 is guaranteed that the misalignment amount remains the same throughout the
3418 execution of the vectorized loop. Therefore, we can create the
3419 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3420 at the loop preheader.
3421
3422 However, in the case of outer-loop vectorization, when vectorizing a
3423 memory access in the inner-loop nested within the LOOP that is now being
3424 vectorized, while it is guaranteed that the misalignment of the
3425 vectorized memory access will remain the same in different outer-loop
3426 iterations, it is *not* guaranteed that is will remain the same throughout
3427 the execution of the inner-loop. This is because the inner-loop advances
3428 with the original scalar step (and not in steps of VS). If the inner-loop
3429 step happens to be a multiple of VS, then the misalignment remains fixed
3430 and we can use the optimized realignment scheme. For example:
3431
3432 for (i=0; i<N; i++)
3433 for (j=0; j<M; j++)
3434 s += a[i+j];
3435
3436 When vectorizing the i-loop in the above example, the step between
3437 consecutive vector loads is 1, and so the misalignment does not remain
3438 fixed across the execution of the inner-loop, and the realignment cannot
3439 be optimized (as illustrated in the following pseudo vectorized loop):
3440
3441 for (i=0; i<N; i+=4)
3442 for (j=0; j<M; j++){
3443 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3444 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3445 // (assuming that we start from an aligned address).
3446 }
3447
3448 We therefore have to use the unoptimized realignment scheme:
3449
3450 for (i=0; i<N; i+=4)
3451 for (j=k; j<M; j+=4)
3452 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3453 // that the misalignment of the initial address is
3454 // 0).
3455
3456 The loop can then be vectorized as follows:
3457
3458 for (k=0; k<4; k++){
3459 rt = get_realignment_token (&vp[k]);
3460 for (i=0; i<N; i+=4){
3461 v1 = vp[i+k];
3462 for (j=k; j<M; j+=4){
3463 v2 = vp[i+j+VS-1];
3464 va = REALIGN_LOAD <v1,v2,rt>;
3465 vs += va;
3466 v1 = v2;
3467 }
3468 }
3469 } */
3470
3471 if (DR_IS_READ (dr))
3472 {
c6b19c5f 3473 bool is_packed = false;
3474 tree type = (TREE_TYPE (DR_REF (dr)));
3475
fb85abff 3476 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3477 CODE_FOR_nothing
3478 && (!targetm.vectorize.builtin_mask_for_load
3479 || targetm.vectorize.builtin_mask_for_load ()))
3480 {
3481 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3482 if (nested_in_vect_loop
3483 && (TREE_INT_CST_LOW (DR_STEP (dr))
3484 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3485 return dr_explicit_realign;
3486 else
3487 return dr_explicit_realign_optimized;
3488 }
c6b19c5f 3489 if (!known_alignment_for_access_p (dr))
3490 {
3491 tree ba = DR_BASE_OBJECT (dr);
3492
3493 if (ba)
3494 is_packed = contains_packed_reference (ba);
3495 }
3496
3497 if (targetm.vectorize.
3498 builtin_support_vector_misalignment (mode, type,
3499 DR_MISALIGNMENT (dr), is_packed))
fb85abff 3500 /* Can't software pipeline the loads, but can at least do them. */
3501 return dr_unaligned_supported;
3502 }
c6b19c5f 3503 else
3504 {
3505 bool is_packed = false;
3506 tree type = (TREE_TYPE (DR_REF (dr)));
fb85abff 3507
c6b19c5f 3508 if (!known_alignment_for_access_p (dr))
3509 {
3510 tree ba = DR_BASE_OBJECT (dr);
3511
3512 if (ba)
3513 is_packed = contains_packed_reference (ba);
3514 }
3515
3516 if (targetm.vectorize.
3517 builtin_support_vector_misalignment (mode, type,
3518 DR_MISALIGNMENT (dr), is_packed))
3519 return dr_unaligned_supported;
3520 }
3521
fb85abff 3522 /* Unsupported. */
3523 return dr_unaligned_unsupported;
3524}