]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-data-refs.c
Remove trailing white spaces.
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
CommitLineData
48e1416a 1/* Data References Analysis and Manipulation Utilities for Vectorization.
fb85abff 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
48e1416a 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
29#include "target.h"
30#include "basic-block.h"
31#include "diagnostic.h"
32#include "tree-flow.h"
33#include "tree-dump.h"
34#include "cfgloop.h"
35#include "expr.h"
36#include "optabs.h"
37#include "tree-chrec.h"
38#include "tree-scalar-evolution.h"
39#include "tree-vectorizer.h"
40#include "toplev.h"
41
42
43/* Return the smallest scalar part of STMT.
48e1416a 44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
fb85abff 46 result-type is different than the type of the arguments (e.g., demotion,
48e1416a 47 promotion), vectype will be reset appropriately (later). Note that we have
fb85abff 48 to visit the smallest datatype in this function, because that determines the
48e1416a 49 VF. If the smallest datatype in the loop is present only as the rhs of a
fb85abff 50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
48e1416a 53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
fb85abff 54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
48e1416a 56 check the rhs.
fb85abff 57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58 types. */
59
60tree
61vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
63{
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
66
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
68
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
73 {
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
75
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77 if (rhs < lhs)
78 scalar_type = rhs_type;
79 }
48e1416a 80
81 *lhs_size_unit = lhs;
fb85abff 82 *rhs_size_unit = rhs;
83 return scalar_type;
84}
85
86
87/* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
89
48e1416a 90int
fb85abff 91vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
92{
93 gimple next_stmt = first_stmt;
94 int result = 0;
95
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97 return -1;
98
99 while (next_stmt && next_stmt != stmt)
100 {
101 result++;
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
103 }
104
105 if (next_stmt)
106 return result;
107 else
108 return -1;
109}
110
111
112/* Function vect_insert_into_interleaving_chain.
113
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
115
116static void
117vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
119{
120 gimple prev, next;
121 tree next_init;
48e1416a 122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
fb85abff 123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
124
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
48e1416a 126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
fb85abff 127 while (next)
128 {
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
131 {
132 /* Insert here. */
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 return;
136 }
137 prev = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
139 }
140
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
144}
145
146
147/* Function vect_update_interleaving_chain.
48e1416a 148
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
fb85abff 150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
151
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
154 FIRST_DR = DRB
155 NEXT_DR (DRB) = DRA
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
162 FIRST_DR = DRB
163 NEXT(FIRST_DR) = previous FIRST_DR
164 else
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
169
170static void
171vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
173{
48e1416a 174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
fb85abff 175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
178 tree node_init;
179 gimple node, prev, next, first_stmt;
180
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
183 {
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187 return;
188 }
189
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
192 {
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
196 return;
197 }
198
48e1416a 199 /* 3. DRA is a part of a chain and DRB is not. */
fb85abff 200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
201 {
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 old_first_stmt)));
205 gimple tmp;
206
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
208 {
48e1416a 209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
fb85abff 211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
48e1416a 214
fb85abff 215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
217 while (tmp)
218 {
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
221 }
222 }
223 else
224 {
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
48e1416a 227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
fb85abff 228 }
229 return;
230 }
48e1416a 231
fb85abff 232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
236 return;
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
239
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
241 {
48e1416a 242 /* Insert the nodes of DRA chain into the DRB chain.
fb85abff 243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
48e1416a 246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
fb85abff 247 first_stmt = first_b;
248 }
249 else
250 {
48e1416a 251 /* Insert the nodes of DRB chain into the DRA chain.
fb85abff 252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
48e1416a 255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
fb85abff 256 first_stmt = first_a;
257 }
48e1416a 258
fb85abff 259 while (node)
260 {
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
48e1416a 262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
fb85abff 263 while (next)
48e1416a 264 {
fb85abff 265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
267 {
268 /* Insert here. */
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 prev = node;
272 break;
273 }
274 prev = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
276 }
277 if (!next)
278 {
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 prev = node;
48e1416a 283 }
fb85abff 284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
48e1416a 285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
fb85abff 286 }
287}
288
289
290/* Function vect_equal_offsets.
291
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
293
294static bool
295vect_equal_offsets (tree offset1, tree offset2)
296{
297 bool res0, res1;
298
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
301
302 if (offset1 == offset2)
303 return true;
304
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || !BINARY_CLASS_P (offset1)
48e1416a 307 || !BINARY_CLASS_P (offset2))
fb85abff 308 return false;
48e1416a 309
310 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
fb85abff 311 TREE_OPERAND (offset2, 0));
48e1416a 312 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
fb85abff 313 TREE_OPERAND (offset2, 1));
314
315 return (res0 && res1);
316}
317
318
319/* Function vect_check_interleaving.
320
321 Check if DRA and DRB are a part of interleaving. In case they are, insert
322 DRA and DRB in an interleaving chain. */
323
48e1416a 324static bool
fb85abff 325vect_check_interleaving (struct data_reference *dra,
326 struct data_reference *drb)
327{
328 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
329
330 /* Check that the data-refs have same first location (except init) and they
331 are both either store or load (not load and store). */
332 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
48e1416a 333 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
fb85abff 334 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
48e1416a 335 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
fb85abff 336 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
337 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
48e1416a 338 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
fb85abff 339 || DR_IS_READ (dra) != DR_IS_READ (drb))
f083cd24 340 return false;
fb85abff 341
342 /* Check:
343 1. data-refs are of the same type
344 2. their steps are equal
48e1416a 345 3. the step (if greater than zero) is greater than the difference between
f083cd24 346 data-refs' inits. */
fb85abff 347 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
348 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
349
350 if (type_size_a != type_size_b
351 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
48e1416a 352 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
fb85abff 353 TREE_TYPE (DR_REF (drb))))
f083cd24 354 return false;
fb85abff 355
356 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
357 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
358 step = TREE_INT_CST_LOW (DR_STEP (dra));
359
360 if (init_a > init_b)
361 {
48e1416a 362 /* If init_a == init_b + the size of the type * k, we have an interleaving,
fb85abff 363 and DRB is accessed before DRA. */
364 diff_mod_size = (init_a - init_b) % type_size_a;
365
37545e54 366 if (step && (init_a - init_b) > step)
48e1416a 367 return false;
fb85abff 368
369 if (diff_mod_size == 0)
370 {
48e1416a 371 vect_update_interleaving_chain (drb, dra);
fb85abff 372 if (vect_print_dump_info (REPORT_DR_DETAILS))
373 {
374 fprintf (vect_dump, "Detected interleaving ");
375 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
376 fprintf (vect_dump, " and ");
377 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
378 }
f083cd24 379 return true;
48e1416a 380 }
fb85abff 381 }
48e1416a 382 else
fb85abff 383 {
48e1416a 384 /* If init_b == init_a + the size of the type * k, we have an
fb85abff 385 interleaving, and DRA is accessed before DRB. */
386 diff_mod_size = (init_b - init_a) % type_size_a;
387
37545e54 388 if (step && (init_b - init_a) > step)
f083cd24 389 return false;
fb85abff 390
391 if (diff_mod_size == 0)
392 {
48e1416a 393 vect_update_interleaving_chain (dra, drb);
fb85abff 394 if (vect_print_dump_info (REPORT_DR_DETAILS))
395 {
396 fprintf (vect_dump, "Detected interleaving ");
397 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
398 fprintf (vect_dump, " and ");
399 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
400 }
f083cd24 401 return true;
48e1416a 402 }
fb85abff 403 }
48e1416a 404
f083cd24 405 return false;
fb85abff 406}
407
408/* Check if data references pointed by DR_I and DR_J are same or
409 belong to same interleaving group. Return FALSE if drs are
410 different, otherwise return TRUE. */
411
412static bool
413vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
414{
415 gimple stmt_i = DR_STMT (dr_i);
416 gimple stmt_j = DR_STMT (dr_j);
417
418 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
419 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
420 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
421 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
422 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
423 return true;
424 else
425 return false;
426}
427
428/* If address ranges represented by DDR_I and DDR_J are equal,
429 return TRUE, otherwise return FALSE. */
430
431static bool
432vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
433{
434 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
435 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
436 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
437 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
438 return true;
439 else
440 return false;
441}
442
443/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
444 tested at run-time. Return TRUE if DDR was successfully inserted.
445 Return false if versioning is not supported. */
446
447static bool
448vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
449{
450 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
451
452 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
453 return false;
454
455 if (vect_print_dump_info (REPORT_DR_DETAILS))
456 {
457 fprintf (vect_dump, "mark for run-time aliasing test between ");
458 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
459 fprintf (vect_dump, " and ");
460 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
461 }
462
463 if (optimize_loop_nest_for_size_p (loop))
464 {
465 if (vect_print_dump_info (REPORT_DR_DETAILS))
466 fprintf (vect_dump, "versioning not supported when optimizing for size.");
467 return false;
468 }
469
470 /* FORNOW: We don't support versioning with outer-loop vectorization. */
471 if (loop->inner)
472 {
473 if (vect_print_dump_info (REPORT_DR_DETAILS))
474 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
475 return false;
476 }
477
478 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
479 return true;
480}
481
37545e54 482
fb85abff 483/* Function vect_analyze_data_ref_dependence.
484
485 Return TRUE if there (might) exist a dependence between a memory-reference
486 DRA and a memory-reference DRB. When versioning for alias may check a
487 dependence at run-time, return FALSE. */
48e1416a 488
fb85abff 489static bool
490vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
491 loop_vec_info loop_vinfo)
492{
493 unsigned int i;
37545e54 494 struct loop *loop = NULL;
495 int vectorization_factor = 0;
fb85abff 496 struct data_reference *dra = DDR_A (ddr);
497 struct data_reference *drb = DDR_B (ddr);
48e1416a 498 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
fb85abff 499 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
500 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
501 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
502 lambda_vector dist_v;
503 unsigned int loop_depth;
48e1416a 504
fb85abff 505 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
506 {
507 /* Independent data accesses. */
508 vect_check_interleaving (dra, drb);
509 return false;
510 }
511
37545e54 512 if (loop_vinfo)
513 {
514 loop = LOOP_VINFO_LOOP (loop_vinfo);
515 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
516 }
517
518 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
fb85abff 519 return false;
48e1416a 520
fb85abff 521 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
522 {
48e1416a 523 if (loop_vinfo)
37545e54 524 {
525 if (vect_print_dump_info (REPORT_DR_DETAILS))
526 {
527 fprintf (vect_dump, "versioning for alias required: "
528 "can't determine dependence between ");
529 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
530 fprintf (vect_dump, " and ");
531 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
532 }
48e1416a 533
37545e54 534 /* Add to list of ddrs that need to be tested at run-time. */
535 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
536 }
537
538 /* When vectorizing a basic block unknown depnedence can still mean
539 strided access. */
540 if (vect_check_interleaving (dra, drb))
541 return false;
542
fb85abff 543 if (vect_print_dump_info (REPORT_DR_DETAILS))
544 {
37545e54 545 fprintf (vect_dump, "can't determine dependence between ");
fb85abff 546 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
547 fprintf (vect_dump, " and ");
548 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
549 }
37545e54 550
551 return true;
fb85abff 552 }
553
37545e54 554 /* Versioning for alias is not yet supported for basic block SLP, and
48e1416a 555 dependence distance is unapplicable, hence, in case of known data
37545e54 556 dependence, basic block vectorization is impossible for now. */
557 if (!loop_vinfo)
558 {
559 if (dra != drb && vect_check_interleaving (dra, drb))
560 return false;
48e1416a 561
37545e54 562 if (vect_print_dump_info (REPORT_DR_DETAILS))
563 {
564 fprintf (vect_dump, "determined dependence between ");
565 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
566 fprintf (vect_dump, " and ");
567 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
568 }
569
48e1416a 570 return true;
37545e54 571 }
572
573 /* Loop-based vectorization and known data dependence. */
fb85abff 574 if (DDR_NUM_DIST_VECTS (ddr) == 0)
575 {
576 if (vect_print_dump_info (REPORT_DR_DETAILS))
577 {
578 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
579 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
580 fprintf (vect_dump, " and ");
581 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
582 }
583 /* Add to list of ddrs that need to be tested at run-time. */
584 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
48e1416a 585 }
fb85abff 586
587 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
588 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
589 {
590 int dist = dist_v[loop_depth];
591
592 if (vect_print_dump_info (REPORT_DR_DETAILS))
593 fprintf (vect_dump, "dependence distance = %d.", dist);
594
595 /* Same loop iteration. */
596 if (dist % vectorization_factor == 0 && dra_size == drb_size)
597 {
598 /* Two references with distance zero have the same alignment. */
599 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
600 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
601 if (vect_print_dump_info (REPORT_ALIGNMENT))
602 fprintf (vect_dump, "accesses have the same alignment.");
603 if (vect_print_dump_info (REPORT_DR_DETAILS))
604 {
605 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
606 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
607 fprintf (vect_dump, " and ");
608 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
609 }
610
611 /* For interleaving, mark that there is a read-write dependency if
48e1416a 612 necessary. We check before that one of the data-refs is store. */
fb85abff 613 if (DR_IS_READ (dra))
614 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
615 else
616 {
617 if (DR_IS_READ (drb))
618 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
619 }
48e1416a 620
fb85abff 621 continue;
622 }
623
48e1416a 624 if (abs (dist) >= vectorization_factor
fb85abff 625 || (dist > 0 && DDR_REVERSED_P (ddr)))
626 {
48e1416a 627 /* Dependence distance does not create dependence, as far as
628 vectorization is concerned, in this case. If DDR_REVERSED_P the
fb85abff 629 order of the data-refs in DDR was reversed (to make distance
630 vector positive), and the actual distance is negative. */
631 if (vect_print_dump_info (REPORT_DR_DETAILS))
632 fprintf (vect_dump, "dependence distance >= VF or negative.");
633 continue;
634 }
635
f083cd24 636 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 637 {
37545e54 638 fprintf (vect_dump, "not vectorized, possible dependence "
639 "between data-refs ");
fb85abff 640 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
641 fprintf (vect_dump, " and ");
642 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
643 }
644
645 return true;
646 }
647
648 return false;
649}
650
651/* Function vect_analyze_data_ref_dependences.
48e1416a 652
fb85abff 653 Examine all the data references in the loop, and make sure there do not
654 exist any data dependences between them. */
48e1416a 655
fb85abff 656bool
48e1416a 657vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
37545e54 658 bb_vec_info bb_vinfo)
fb85abff 659{
660 unsigned int i;
37545e54 661 VEC (ddr_p, heap) *ddrs = NULL;
fb85abff 662 struct data_dependence_relation *ddr;
663
48e1416a 664 if (vect_print_dump_info (REPORT_DETAILS))
fb85abff 665 fprintf (vect_dump, "=== vect_analyze_dependences ===");
48e1416a 666
37545e54 667 if (loop_vinfo)
668 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
669 else
670 ddrs = BB_VINFO_DDRS (bb_vinfo);
48e1416a 671
fb85abff 672 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
673 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
674 return false;
675
676 return true;
677}
678
679
680/* Function vect_compute_data_ref_alignment
681
682 Compute the misalignment of the data reference DR.
683
684 Output:
685 1. If during the misalignment computation it is found that the data reference
686 cannot be vectorized then false is returned.
687 2. DR_MISALIGNMENT (DR) is defined.
688
689 FOR NOW: No analysis is actually performed. Misalignment is calculated
690 only for trivial cases. TODO. */
691
692static bool
693vect_compute_data_ref_alignment (struct data_reference *dr)
694{
695 gimple stmt = DR_STMT (dr);
48e1416a 696 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
fb85abff 697 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 698 struct loop *loop = NULL;
fb85abff 699 tree ref = DR_REF (dr);
700 tree vectype;
701 tree base, base_addr;
702 bool base_aligned;
703 tree misalign;
704 tree aligned_to, alignment;
48e1416a 705
fb85abff 706 if (vect_print_dump_info (REPORT_DETAILS))
707 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
708
37545e54 709 if (loop_vinfo)
710 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 711
fb85abff 712 /* Initialize misalignment to unknown. */
713 SET_DR_MISALIGNMENT (dr, -1);
714
715 misalign = DR_INIT (dr);
716 aligned_to = DR_ALIGNED_TO (dr);
717 base_addr = DR_BASE_ADDRESS (dr);
718 vectype = STMT_VINFO_VECTYPE (stmt_info);
719
720 /* In case the dataref is in an inner-loop of the loop that is being
721 vectorized (LOOP), we use the base and misalignment information
722 relative to the outer-loop (LOOP). This is ok only if the misalignment
723 stays the same throughout the execution of the inner-loop, which is why
724 we have to check that the stride of the dataref in the inner-loop evenly
725 divides by the vector size. */
37545e54 726 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 727 {
728 tree step = DR_STEP (dr);
729 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
48e1416a 730
fb85abff 731 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
732 {
733 if (vect_print_dump_info (REPORT_ALIGNMENT))
734 fprintf (vect_dump, "inner step divides the vector-size.");
735 misalign = STMT_VINFO_DR_INIT (stmt_info);
736 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
737 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
738 }
739 else
740 {
741 if (vect_print_dump_info (REPORT_ALIGNMENT))
742 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
743 misalign = NULL_TREE;
744 }
745 }
746
747 base = build_fold_indirect_ref (base_addr);
748 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
749
750 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
751 || !misalign)
752 {
753 if (vect_print_dump_info (REPORT_ALIGNMENT))
754 {
755 fprintf (vect_dump, "Unknown alignment for access: ");
756 print_generic_expr (vect_dump, base, TDF_SLIM);
757 }
758 return true;
759 }
760
48e1416a 761 if ((DECL_P (base)
fb85abff 762 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
763 alignment) >= 0)
764 || (TREE_CODE (base_addr) == SSA_NAME
765 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
766 TREE_TYPE (base_addr)))),
767 alignment) >= 0))
768 base_aligned = true;
769 else
48e1416a 770 base_aligned = false;
fb85abff 771
48e1416a 772 if (!base_aligned)
fb85abff 773 {
48e1416a 774 /* Do not change the alignment of global variables if
fb85abff 775 flag_section_anchors is enabled. */
776 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
777 || (TREE_STATIC (base) && flag_section_anchors))
778 {
779 if (vect_print_dump_info (REPORT_DETAILS))
780 {
781 fprintf (vect_dump, "can't force alignment of ref: ");
782 print_generic_expr (vect_dump, ref, TDF_SLIM);
783 }
784 return true;
785 }
48e1416a 786
fb85abff 787 /* Force the alignment of the decl.
788 NOTE: This is the only change to the code we make during
789 the analysis phase, before deciding to vectorize the loop. */
790 if (vect_print_dump_info (REPORT_DETAILS))
791 fprintf (vect_dump, "force alignment");
792 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
793 DECL_USER_ALIGN (base) = 1;
794 }
795
796 /* At this point we assume that the base is aligned. */
797 gcc_assert (base_aligned
48e1416a 798 || (TREE_CODE (base) == VAR_DECL
fb85abff 799 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
800
801 /* Modulo alignment. */
802 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
803
804 if (!host_integerp (misalign, 1))
805 {
806 /* Negative or overflowed misalignment value. */
807 if (vect_print_dump_info (REPORT_DETAILS))
808 fprintf (vect_dump, "unexpected misalign value");
809 return false;
810 }
811
812 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
813
814 if (vect_print_dump_info (REPORT_DETAILS))
815 {
816 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
817 print_generic_expr (vect_dump, ref, TDF_SLIM);
818 }
819
820 return true;
821}
822
823
824/* Function vect_compute_data_refs_alignment
825
826 Compute the misalignment of data references in the loop.
827 Return FALSE if a data reference is found that cannot be vectorized. */
828
829static bool
48e1416a 830vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
37545e54 831 bb_vec_info bb_vinfo)
fb85abff 832{
37545e54 833 VEC (data_reference_p, heap) *datarefs;
fb85abff 834 struct data_reference *dr;
835 unsigned int i;
836
37545e54 837 if (loop_vinfo)
838 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
839 else
840 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
48e1416a 841
fb85abff 842 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
843 if (!vect_compute_data_ref_alignment (dr))
844 return false;
845
846 return true;
847}
848
849
850/* Function vect_update_misalignment_for_peel
851
852 DR - the data reference whose misalignment is to be adjusted.
853 DR_PEEL - the data reference whose misalignment is being made
854 zero in the vector loop by the peel.
855 NPEEL - the number of iterations in the peel loop if the misalignment
856 of DR_PEEL is known at compile time. */
857
858static void
859vect_update_misalignment_for_peel (struct data_reference *dr,
860 struct data_reference *dr_peel, int npeel)
861{
862 unsigned int i;
863 VEC(dr_p,heap) *same_align_drs;
864 struct data_reference *current_dr;
865 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
866 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
867 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
868 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
869
870 /* For interleaved data accesses the step in the loop must be multiplied by
871 the size of the interleaving group. */
872 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
873 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
874 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
875 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
876
877 /* It can be assumed that the data refs with the same alignment as dr_peel
878 are aligned in the vector loop. */
879 same_align_drs
880 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
881 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
882 {
883 if (current_dr != dr)
884 continue;
885 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
886 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
887 SET_DR_MISALIGNMENT (dr, 0);
888 return;
889 }
890
891 if (known_alignment_for_access_p (dr)
892 && known_alignment_for_access_p (dr_peel))
893 {
894 int misal = DR_MISALIGNMENT (dr);
895 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
896 misal += npeel * dr_size;
897 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
898 SET_DR_MISALIGNMENT (dr, misal);
899 return;
900 }
901
902 if (vect_print_dump_info (REPORT_DETAILS))
903 fprintf (vect_dump, "Setting misalignment to -1.");
904 SET_DR_MISALIGNMENT (dr, -1);
905}
906
907
908/* Function vect_verify_datarefs_alignment
909
910 Return TRUE if all data references in the loop can be
911 handled with respect to alignment. */
912
37545e54 913bool
914vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 915{
37545e54 916 VEC (data_reference_p, heap) *datarefs;
fb85abff 917 struct data_reference *dr;
918 enum dr_alignment_support supportable_dr_alignment;
919 unsigned int i;
920
37545e54 921 if (loop_vinfo)
922 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
923 else
924 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
925
fb85abff 926 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
927 {
928 gimple stmt = DR_STMT (dr);
929 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
930
931 /* For interleaving, only the alignment of the first access matters. */
932 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
933 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
934 continue;
935
936 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
937 if (!supportable_dr_alignment)
938 {
f083cd24 939 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 940 {
941 if (DR_IS_READ (dr))
48e1416a 942 fprintf (vect_dump,
fb85abff 943 "not vectorized: unsupported unaligned load.");
944 else
48e1416a 945 fprintf (vect_dump,
fb85abff 946 "not vectorized: unsupported unaligned store.");
947 }
948 return false;
949 }
950 if (supportable_dr_alignment != dr_aligned
951 && vect_print_dump_info (REPORT_ALIGNMENT))
952 fprintf (vect_dump, "Vectorizing an unaligned access.");
953 }
954 return true;
955}
956
957
958/* Function vector_alignment_reachable_p
959
960 Return true if vector alignment for DR is reachable by peeling
961 a few loop iterations. Return false otherwise. */
962
963static bool
964vector_alignment_reachable_p (struct data_reference *dr)
965{
966 gimple stmt = DR_STMT (dr);
967 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
968 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
969
970 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
971 {
972 /* For interleaved access we peel only if number of iterations in
973 the prolog loop ({VF - misalignment}), is a multiple of the
974 number of the interleaved accesses. */
975 int elem_size, mis_in_elements;
976 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
977
978 /* FORNOW: handle only known alignment. */
979 if (!known_alignment_for_access_p (dr))
980 return false;
981
982 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
983 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
984
985 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
986 return false;
987 }
988
989 /* If misalignment is known at the compile time then allow peeling
990 only if natural alignment is reachable through peeling. */
991 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
992 {
48e1416a 993 HOST_WIDE_INT elmsize =
fb85abff 994 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
995 if (vect_print_dump_info (REPORT_DETAILS))
996 {
997 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
998 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
999 }
1000 if (DR_MISALIGNMENT (dr) % elmsize)
1001 {
1002 if (vect_print_dump_info (REPORT_DETAILS))
1003 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1004 return false;
1005 }
1006 }
1007
1008 if (!known_alignment_for_access_p (dr))
1009 {
1010 tree type = (TREE_TYPE (DR_REF (dr)));
1011 tree ba = DR_BASE_OBJECT (dr);
1012 bool is_packed = false;
1013
1014 if (ba)
1015 is_packed = contains_packed_reference (ba);
1016
1017 if (vect_print_dump_info (REPORT_DETAILS))
1018 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1019 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1020 return true;
1021 else
1022 return false;
1023 }
1024
1025 return true;
1026}
1027
1028/* Function vect_enhance_data_refs_alignment
1029
1030 This pass will use loop versioning and loop peeling in order to enhance
1031 the alignment of data references in the loop.
1032
1033 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1034 original loop is to be vectorized; Any other loops that are created by
1035 the transformations performed in this pass - are not supposed to be
1036 vectorized. This restriction will be relaxed.
1037
1038 This pass will require a cost model to guide it whether to apply peeling
1039 or versioning or a combination of the two. For example, the scheme that
1040 intel uses when given a loop with several memory accesses, is as follows:
1041 choose one memory access ('p') which alignment you want to force by doing
1042 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1043 other accesses are not necessarily aligned, or (2) use loop versioning to
1044 generate one loop in which all accesses are aligned, and another loop in
1045 which only 'p' is necessarily aligned.
1046
1047 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1048 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1049 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1050
1051 Devising a cost model is the most critical aspect of this work. It will
1052 guide us on which access to peel for, whether to use loop versioning, how
1053 many versions to create, etc. The cost model will probably consist of
1054 generic considerations as well as target specific considerations (on
1055 powerpc for example, misaligned stores are more painful than misaligned
1056 loads).
1057
1058 Here are the general steps involved in alignment enhancements:
1059
1060 -- original loop, before alignment analysis:
1061 for (i=0; i<N; i++){
1062 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1063 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1064 }
1065
1066 -- After vect_compute_data_refs_alignment:
1067 for (i=0; i<N; i++){
1068 x = q[i]; # DR_MISALIGNMENT(q) = 3
1069 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1070 }
1071
1072 -- Possibility 1: we do loop versioning:
1073 if (p is aligned) {
1074 for (i=0; i<N; i++){ # loop 1A
1075 x = q[i]; # DR_MISALIGNMENT(q) = 3
1076 p[i] = y; # DR_MISALIGNMENT(p) = 0
1077 }
1078 }
1079 else {
1080 for (i=0; i<N; i++){ # loop 1B
1081 x = q[i]; # DR_MISALIGNMENT(q) = 3
1082 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1083 }
1084 }
1085
1086 -- Possibility 2: we do loop peeling:
1087 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1088 x = q[i];
1089 p[i] = y;
1090 }
1091 for (i = 3; i < N; i++){ # loop 2A
1092 x = q[i]; # DR_MISALIGNMENT(q) = 0
1093 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1094 }
1095
1096 -- Possibility 3: combination of loop peeling and versioning:
1097 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1098 x = q[i];
1099 p[i] = y;
1100 }
1101 if (p is aligned) {
1102 for (i = 3; i<N; i++){ # loop 3A
1103 x = q[i]; # DR_MISALIGNMENT(q) = 0
1104 p[i] = y; # DR_MISALIGNMENT(p) = 0
1105 }
1106 }
1107 else {
1108 for (i = 3; i<N; i++){ # loop 3B
1109 x = q[i]; # DR_MISALIGNMENT(q) = 0
1110 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1111 }
1112 }
1113
1114 These loops are later passed to loop_transform to be vectorized. The
1115 vectorizer will use the alignment information to guide the transformation
1116 (whether to generate regular loads/stores, or with special handling for
1117 misalignment). */
1118
1119bool
1120vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1121{
1122 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1123 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1124 enum dr_alignment_support supportable_dr_alignment;
1125 struct data_reference *dr0 = NULL;
1126 struct data_reference *dr;
1127 unsigned int i;
1128 bool do_peeling = false;
1129 bool do_versioning = false;
1130 bool stat;
1131 gimple stmt;
1132 stmt_vec_info stmt_info;
1133 int vect_versioning_for_alias_required;
1134
1135 if (vect_print_dump_info (REPORT_DETAILS))
1136 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1137
1138 /* While cost model enhancements are expected in the future, the high level
1139 view of the code at this time is as follows:
1140
ec2886ed 1141 A) If there is a misaligned access then see if peeling to align
1142 this access can make all data references satisfy
454f25be 1143 vect_supportable_dr_alignment. If so, update data structures
1144 as needed and return true.
fb85abff 1145
1146 B) If peeling wasn't possible and there is a data reference with an
1147 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1148 then see if loop versioning checks can be used to make all data
1149 references satisfy vect_supportable_dr_alignment. If so, update
1150 data structures as needed and return true.
1151
1152 C) If neither peeling nor versioning were successful then return false if
1153 any data reference does not satisfy vect_supportable_dr_alignment.
1154
1155 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1156
1157 Note, Possibility 3 above (which is peeling and versioning together) is not
1158 being done at this time. */
1159
1160 /* (1) Peeling to force alignment. */
1161
1162 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1163 Considerations:
1164 + How many accesses will become aligned due to the peeling
1165 - How many accesses will become unaligned due to the peeling,
1166 and the cost of misaligned accesses.
48e1416a 1167 - The cost of peeling (the extra runtime checks, the increase
fb85abff 1168 in code size).
1169
1170 The scheme we use FORNOW: peel to force the alignment of the first
454f25be 1171 unsupported misaligned access in the loop.
fb85abff 1172
1173 TODO: Use a cost model. */
1174
1175 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1176 {
1177 stmt = DR_STMT (dr);
1178 stmt_info = vinfo_for_stmt (stmt);
1179
1180 /* For interleaving, only the alignment of the first access
1181 matters. */
1182 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1183 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1184 continue;
1185
5784a5da 1186 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
fb85abff 1187 {
1188 do_peeling = vector_alignment_reachable_p (dr);
1189 if (do_peeling)
1190 dr0 = dr;
1191 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1192 fprintf (vect_dump, "vector alignment may not be reachable");
1193 break;
1194 }
1195 }
1196
48e1416a 1197 vect_versioning_for_alias_required
10095225 1198 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
fb85abff 1199
1200 /* Temporarily, if versioning for alias is required, we disable peeling
1201 until we support peeling and versioning. Often peeling for alignment
1202 will require peeling for loop-bound, which in turn requires that we
1203 know how to adjust the loop ivs after the loop. */
1204 if (vect_versioning_for_alias_required
10095225 1205 || !vect_can_advance_ivs_p (loop_vinfo)
fb85abff 1206 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1207 do_peeling = false;
1208
1209 if (do_peeling)
1210 {
1211 int mis;
1212 int npeel = 0;
1213 gimple stmt = DR_STMT (dr0);
1214 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1215 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1216 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1217
1218 if (known_alignment_for_access_p (dr0))
1219 {
1220 /* Since it's known at compile time, compute the number of iterations
1221 in the peeled loop (the peeling factor) for use in updating
1222 DR_MISALIGNMENT values. The peeling factor is the vectorization
1223 factor minus the misalignment as an element count. */
1224 mis = DR_MISALIGNMENT (dr0);
1225 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1226 npeel = nelements - mis;
1227
48e1416a 1228 /* For interleaved data access every iteration accesses all the
fb85abff 1229 members of the group, therefore we divide the number of iterations
1230 by the group size. */
48e1416a 1231 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
fb85abff 1232 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1233 npeel /= DR_GROUP_SIZE (stmt_info);
1234
1235 if (vect_print_dump_info (REPORT_DETAILS))
1236 fprintf (vect_dump, "Try peeling by %d", npeel);
1237 }
1238
1239 /* Ensure that all data refs can be vectorized after the peel. */
1240 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1241 {
1242 int save_misalignment;
1243
1244 if (dr == dr0)
1245 continue;
1246
1247 stmt = DR_STMT (dr);
1248 stmt_info = vinfo_for_stmt (stmt);
1249 /* For interleaving, only the alignment of the first access
1250 matters. */
1251 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1252 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1253 continue;
1254
1255 save_misalignment = DR_MISALIGNMENT (dr);
1256 vect_update_misalignment_for_peel (dr, dr0, npeel);
1257 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1258 SET_DR_MISALIGNMENT (dr, save_misalignment);
48e1416a 1259
fb85abff 1260 if (!supportable_dr_alignment)
1261 {
1262 do_peeling = false;
1263 break;
1264 }
1265 }
1266
1267 if (do_peeling)
1268 {
1269 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1270 If the misalignment of DR_i is identical to that of dr0 then set
1271 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1272 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1273 by the peeling factor times the element size of DR_i (MOD the
1274 vectorization factor times the size). Otherwise, the
1275 misalignment of DR_i must be set to unknown. */
1276 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1277 if (dr != dr0)
1278 vect_update_misalignment_for_peel (dr, dr0, npeel);
1279
1280 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1281 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1282 SET_DR_MISALIGNMENT (dr0, 0);
1283 if (vect_print_dump_info (REPORT_ALIGNMENT))
1284 fprintf (vect_dump, "Alignment of access forced using peeling.");
1285
1286 if (vect_print_dump_info (REPORT_DETAILS))
1287 fprintf (vect_dump, "Peeling for alignment will be applied.");
1288
37545e54 1289 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1290 gcc_assert (stat);
1291 return stat;
1292 }
1293 }
1294
1295
1296 /* (2) Versioning to force alignment. */
1297
1298 /* Try versioning if:
1299 1) flag_tree_vect_loop_version is TRUE
1300 2) optimize loop for speed
1301 3) there is at least one unsupported misaligned data ref with an unknown
1302 misalignment, and
1303 4) all misaligned data refs with a known misalignment are supported, and
1304 5) the number of runtime alignment checks is within reason. */
1305
48e1416a 1306 do_versioning =
1307 flag_tree_vect_loop_version
fb85abff 1308 && optimize_loop_nest_for_speed_p (loop)
1309 && (!loop->inner); /* FORNOW */
1310
1311 if (do_versioning)
1312 {
1313 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1314 {
1315 stmt = DR_STMT (dr);
1316 stmt_info = vinfo_for_stmt (stmt);
1317
1318 /* For interleaving, only the alignment of the first access
1319 matters. */
1320 if (aligned_access_p (dr)
1321 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1322 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1323 continue;
1324
1325 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1326
1327 if (!supportable_dr_alignment)
1328 {
1329 gimple stmt;
1330 int mask;
1331 tree vectype;
1332
1333 if (known_alignment_for_access_p (dr)
1334 || VEC_length (gimple,
1335 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1336 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1337 {
1338 do_versioning = false;
1339 break;
1340 }
1341
1342 stmt = DR_STMT (dr);
1343 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1344 gcc_assert (vectype);
48e1416a 1345
fb85abff 1346 /* The rightmost bits of an aligned address must be zeros.
1347 Construct the mask needed for this test. For example,
1348 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1349 mask must be 15 = 0xf. */
1350 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1351
1352 /* FORNOW: use the same mask to test all potentially unaligned
1353 references in the loop. The vectorizer currently supports
1354 a single vector size, see the reference to
1355 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1356 vectorization factor is computed. */
1357 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1358 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1359 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1360 VEC_safe_push (gimple, heap,
1361 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1362 DR_STMT (dr));
1363 }
1364 }
48e1416a 1365
fb85abff 1366 /* Versioning requires at least one misaligned data reference. */
10095225 1367 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
fb85abff 1368 do_versioning = false;
1369 else if (!do_versioning)
1370 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1371 }
1372
1373 if (do_versioning)
1374 {
1375 VEC(gimple,heap) *may_misalign_stmts
1376 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1377 gimple stmt;
1378
1379 /* It can now be assumed that the data references in the statements
1380 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1381 of the loop being vectorized. */
1382 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1383 {
1384 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1385 dr = STMT_VINFO_DATA_REF (stmt_info);
1386 SET_DR_MISALIGNMENT (dr, 0);
1387 if (vect_print_dump_info (REPORT_ALIGNMENT))
1388 fprintf (vect_dump, "Alignment of access forced using versioning.");
1389 }
1390
1391 if (vect_print_dump_info (REPORT_DETAILS))
1392 fprintf (vect_dump, "Versioning for alignment will be applied.");
1393
1394 /* Peeling and versioning can't be done together at this time. */
1395 gcc_assert (! (do_peeling && do_versioning));
1396
37545e54 1397 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1398 gcc_assert (stat);
1399 return stat;
1400 }
1401
1402 /* This point is reached if neither peeling nor versioning is being done. */
1403 gcc_assert (! (do_peeling || do_versioning));
1404
37545e54 1405 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1406 return stat;
1407}
1408
1409
1410/* Function vect_analyze_data_refs_alignment
1411
1412 Analyze the alignment of the data-references in the loop.
1413 Return FALSE if a data reference is found that cannot be vectorized. */
1414
1415bool
48e1416a 1416vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
37545e54 1417 bb_vec_info bb_vinfo)
fb85abff 1418{
1419 if (vect_print_dump_info (REPORT_DETAILS))
1420 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1421
37545e54 1422 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
fb85abff 1423 {
f083cd24 1424 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
48e1416a 1425 fprintf (vect_dump,
fb85abff 1426 "not vectorized: can't calculate alignment for data ref.");
1427 return false;
1428 }
1429
1430 return true;
1431}
1432
1433
1434/* Analyze groups of strided accesses: check that DR belongs to a group of
1435 strided accesses of legal size, step, etc. Detect gaps, single element
1436 interleaving, and other special cases. Set strided access info.
1437 Collect groups of strided stores for further use in SLP analysis. */
1438
1439static bool
1440vect_analyze_group_access (struct data_reference *dr)
1441{
1442 tree step = DR_STEP (dr);
1443 tree scalar_type = TREE_TYPE (DR_REF (dr));
1444 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1445 gimple stmt = DR_STMT (dr);
1446 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1447 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 1448 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
fb85abff 1449 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1450 HOST_WIDE_INT stride;
1451 bool slp_impossible = false;
1452
48e1416a 1453 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
fb85abff 1454 interleaving group (including gaps). */
48e1416a 1455 stride = dr_step / type_size;
fb85abff 1456
1457 /* Not consecutive access is possible only if it is a part of interleaving. */
1458 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1459 {
1460 /* Check if it this DR is a part of interleaving, and is a single
1461 element of the group that is accessed in the loop. */
48e1416a 1462
fb85abff 1463 /* Gaps are supported only for loads. STEP must be a multiple of the type
1464 size. The size of the group must be a power of 2. */
1465 if (DR_IS_READ (dr)
1466 && (dr_step % type_size) == 0
1467 && stride > 0
1468 && exact_log2 (stride) != -1)
1469 {
1470 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1471 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1472 if (vect_print_dump_info (REPORT_DR_DETAILS))
1473 {
37545e54 1474 fprintf (vect_dump, "Detected single element interleaving ");
fb85abff 1475 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1476 fprintf (vect_dump, " step ");
1477 print_generic_expr (vect_dump, step, TDF_SLIM);
1478 }
1479 return true;
1480 }
1481 if (vect_print_dump_info (REPORT_DETAILS))
1482 fprintf (vect_dump, "not consecutive access");
1483 return false;
1484 }
1485
1486 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1487 {
1488 /* First stmt in the interleaving chain. Check the chain. */
1489 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1490 struct data_reference *data_ref = dr;
1a0e7d51 1491 unsigned int count = 1;
fb85abff 1492 tree next_step;
1493 tree prev_init = DR_INIT (data_ref);
1494 gimple prev = stmt;
1a0e7d51 1495 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
fb85abff 1496
1497 while (next)
1498 {
1499 /* Skip same data-refs. In case that two or more stmts share data-ref
1500 (supported only for loads), we vectorize only the first stmt, and
1501 the rest get their vectorized loads from the first one. */
1502 if (!tree_int_cst_compare (DR_INIT (data_ref),
1503 DR_INIT (STMT_VINFO_DATA_REF (
1504 vinfo_for_stmt (next)))))
1505 {
1506 if (!DR_IS_READ (data_ref))
1507 {
1508 if (vect_print_dump_info (REPORT_DETAILS))
1509 fprintf (vect_dump, "Two store stmts share the same dr.");
1510 return false;
1511 }
1512
1513 /* Check that there is no load-store dependencies for this loads
1514 to prevent a case of load-store-load to the same location. */
1515 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1516 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1517 {
1518 if (vect_print_dump_info (REPORT_DETAILS))
1519 fprintf (vect_dump,
1520 "READ_WRITE dependence in interleaving.");
1521 return false;
1522 }
1523
1524 /* For load use the same data-ref load. */
1525 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1526
1527 prev = next;
1528 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1529 continue;
1530 }
1531 prev = next;
1532
1533 /* Check that all the accesses have the same STEP. */
1534 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1535 if (tree_int_cst_compare (step, next_step))
1536 {
1537 if (vect_print_dump_info (REPORT_DETAILS))
1538 fprintf (vect_dump, "not consecutive access in interleaving");
1539 return false;
1540 }
1541
1542 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1543 /* Check that the distance between two accesses is equal to the type
1544 size. Otherwise, we have gaps. */
1545 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1546 - TREE_INT_CST_LOW (prev_init)) / type_size;
1547 if (diff != 1)
1548 {
1549 /* FORNOW: SLP of accesses with gaps is not supported. */
1550 slp_impossible = true;
1551 if (!DR_IS_READ (data_ref))
1552 {
1553 if (vect_print_dump_info (REPORT_DETAILS))
1554 fprintf (vect_dump, "interleaved store with gaps");
1555 return false;
1556 }
b11576bf 1557
1558 gaps += diff - 1;
fb85abff 1559 }
1560
1561 /* Store the gap from the previous member of the group. If there is no
1562 gap in the access, DR_GROUP_GAP is always 1. */
1563 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1564
1565 prev_init = DR_INIT (data_ref);
1566 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1567 /* Count the number of data-refs in the chain. */
1568 count++;
1569 }
1570
1571 /* COUNT is the number of accesses found, we multiply it by the size of
1572 the type to get COUNT_IN_BYTES. */
1573 count_in_bytes = type_size * count;
1574
48e1416a 1575 /* Check that the size of the interleaving (including gaps) is not
37545e54 1576 greater than STEP. */
b11576bf 1577 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
fb85abff 1578 {
1579 if (vect_print_dump_info (REPORT_DETAILS))
1580 {
1581 fprintf (vect_dump, "interleaving size is greater than step for ");
1582 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1583 }
1584 return false;
1585 }
1586
1587 /* Check that the size of the interleaving is equal to STEP for stores,
1588 i.e., that there are no gaps. */
37545e54 1589 if (dr_step && dr_step != count_in_bytes)
fb85abff 1590 {
1591 if (DR_IS_READ (dr))
1592 {
1593 slp_impossible = true;
1594 /* There is a gap after the last load in the group. This gap is a
48e1416a 1595 difference between the stride and the number of elements. When
1596 there is no gap, this difference should be 0. */
1597 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
fb85abff 1598 }
1599 else
1600 {
1601 if (vect_print_dump_info (REPORT_DETAILS))
1602 fprintf (vect_dump, "interleaved store with gaps");
1603 return false;
1604 }
1605 }
1606
1607 /* Check that STEP is a multiple of type size. */
37545e54 1608 if (dr_step && (dr_step % type_size) != 0)
fb85abff 1609 {
1610 if (vect_print_dump_info (REPORT_DETAILS))
1611 {
1612 fprintf (vect_dump, "step is not a multiple of type size: step ");
1613 print_generic_expr (vect_dump, step, TDF_SLIM);
1614 fprintf (vect_dump, " size ");
1615 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1616 TDF_SLIM);
1617 }
1618 return false;
1619 }
1620
48e1416a 1621 /* FORNOW: we handle only interleaving that is a power of 2.
fb85abff 1622 We don't fail here if it may be still possible to vectorize the
1623 group using SLP. If not, the size of the group will be checked in
1624 vect_analyze_operations, and the vectorization will fail. */
1625 if (exact_log2 (stride) == -1)
1626 {
1627 if (vect_print_dump_info (REPORT_DETAILS))
1628 fprintf (vect_dump, "interleaving is not a power of 2");
1629
1630 if (slp_impossible)
1631 return false;
1632 }
37545e54 1633
1634 if (stride == 0)
1635 stride = count;
48e1416a 1636
fb85abff 1637 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1638 if (vect_print_dump_info (REPORT_DETAILS))
1639 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1640
48e1416a 1641 /* SLP: create an SLP data structure for every interleaving group of
fb85abff 1642 stores for further analysis in vect_analyse_slp. */
1643 if (!DR_IS_READ (dr) && !slp_impossible)
37545e54 1644 {
1645 if (loop_vinfo)
1646 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1647 stmt);
1648 if (bb_vinfo)
48e1416a 1649 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
37545e54 1650 stmt);
1651 }
fb85abff 1652 }
1653
1654 return true;
1655}
1656
1657
1658/* Analyze the access pattern of the data-reference DR.
1659 In case of non-consecutive accesses call vect_analyze_group_access() to
1660 analyze groups of strided accesses. */
1661
1662static bool
1663vect_analyze_data_ref_access (struct data_reference *dr)
1664{
1665 tree step = DR_STEP (dr);
1666 tree scalar_type = TREE_TYPE (DR_REF (dr));
1667 gimple stmt = DR_STMT (dr);
1668 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1669 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 1670 struct loop *loop = NULL;
fb85abff 1671 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1672
37545e54 1673 if (loop_vinfo)
1674 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 1675
37545e54 1676 if (loop_vinfo && !step)
fb85abff 1677 {
1678 if (vect_print_dump_info (REPORT_DETAILS))
37545e54 1679 fprintf (vect_dump, "bad data-ref access in loop");
fb85abff 1680 return false;
1681 }
1682
37545e54 1683 /* Don't allow invariant accesses in loops. */
1684 if (loop_vinfo && dr_step == 0)
48e1416a 1685 return false;
fb85abff 1686
37545e54 1687 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 1688 {
1689 /* Interleaved accesses are not yet supported within outer-loop
1690 vectorization for references in the inner-loop. */
1691 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1692
1693 /* For the rest of the analysis we use the outer-loop step. */
1694 step = STMT_VINFO_DR_STEP (stmt_info);
1695 dr_step = TREE_INT_CST_LOW (step);
48e1416a 1696
fb85abff 1697 if (dr_step == 0)
1698 {
1699 if (vect_print_dump_info (REPORT_ALIGNMENT))
1700 fprintf (vect_dump, "zero step in outer loop.");
1701 if (DR_IS_READ (dr))
48e1416a 1702 return true;
fb85abff 1703 else
1704 return false;
1705 }
1706 }
1707
1708 /* Consecutive? */
1709 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1710 {
1711 /* Mark that it is not interleaving. */
1712 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1713 return true;
1714 }
1715
37545e54 1716 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 1717 {
1718 if (vect_print_dump_info (REPORT_ALIGNMENT))
1719 fprintf (vect_dump, "strided access in outer loop.");
1720 return false;
1721 }
1722
1723 /* Not consecutive access - check if it's a part of interleaving group. */
1724 return vect_analyze_group_access (dr);
1725}
1726
1727
1728/* Function vect_analyze_data_ref_accesses.
1729
1730 Analyze the access pattern of all the data references in the loop.
1731
1732 FORNOW: the only access pattern that is considered vectorizable is a
1733 simple step 1 (consecutive) access.
1734
1735 FORNOW: handle only arrays and pointer accesses. */
1736
1737bool
37545e54 1738vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 1739{
1740 unsigned int i;
37545e54 1741 VEC (data_reference_p, heap) *datarefs;
fb85abff 1742 struct data_reference *dr;
1743
1744 if (vect_print_dump_info (REPORT_DETAILS))
1745 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1746
37545e54 1747 if (loop_vinfo)
1748 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1749 else
1750 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1751
fb85abff 1752 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1753 if (!vect_analyze_data_ref_access (dr))
1754 {
f083cd24 1755 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1756 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1757 return false;
1758 }
1759
1760 return true;
1761}
1762
1763/* Function vect_prune_runtime_alias_test_list.
1764
1765 Prune a list of ddrs to be tested at run-time by versioning for alias.
1766 Return FALSE if resulting list of ddrs is longer then allowed by
1767 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1768
1769bool
1770vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1771{
1772 VEC (ddr_p, heap) * ddrs =
1773 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1774 unsigned i, j;
1775
1776 if (vect_print_dump_info (REPORT_DETAILS))
1777 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1778
1779 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1780 {
1781 bool found;
1782 ddr_p ddr_i;
1783
1784 ddr_i = VEC_index (ddr_p, ddrs, i);
1785 found = false;
1786
1787 for (j = 0; j < i; j++)
1788 {
1789 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1790
1791 if (vect_vfa_range_equal (ddr_i, ddr_j))
1792 {
1793 if (vect_print_dump_info (REPORT_DR_DETAILS))
1794 {
1795 fprintf (vect_dump, "found equal ranges ");
1796 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1797 fprintf (vect_dump, ", ");
1798 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1799 fprintf (vect_dump, " and ");
1800 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1801 fprintf (vect_dump, ", ");
1802 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1803 }
1804 found = true;
1805 break;
1806 }
1807 }
48e1416a 1808
fb85abff 1809 if (found)
1810 {
1811 VEC_ordered_remove (ddr_p, ddrs, i);
1812 continue;
1813 }
1814 i++;
1815 }
1816
1817 if (VEC_length (ddr_p, ddrs) >
1818 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1819 {
1820 if (vect_print_dump_info (REPORT_DR_DETAILS))
1821 {
1822 fprintf (vect_dump,
1823 "disable versioning for alias - max number of generated "
1824 "checks exceeded.");
1825 }
1826
1827 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1828
1829 return false;
1830 }
1831
1832 return true;
1833}
1834
1835
1836/* Function vect_analyze_data_refs.
1837
37545e54 1838 Find all the data references in the loop or basic block.
fb85abff 1839
1840 The general structure of the analysis of data refs in the vectorizer is as
1841 follows:
48e1416a 1842 1- vect_analyze_data_refs(loop/bb): call
37545e54 1843 compute_data_dependences_for_loop/bb to find and analyze all data-refs
1844 in the loop/bb and their dependences.
fb85abff 1845 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1846 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1847 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1848
1849*/
1850
1851bool
48e1416a 1852vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 1853{
37545e54 1854 struct loop *loop = NULL;
1855 basic_block bb = NULL;
fb85abff 1856 unsigned int i;
1857 VEC (data_reference_p, heap) *datarefs;
1858 struct data_reference *dr;
1859 tree scalar_type;
1860
1861 if (vect_print_dump_info (REPORT_DETAILS))
1862 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
48e1416a 1863
37545e54 1864 if (loop_vinfo)
1865 {
1866 loop = LOOP_VINFO_LOOP (loop_vinfo);
1867 compute_data_dependences_for_loop (loop, true,
1868 &LOOP_VINFO_DATAREFS (loop_vinfo),
1869 &LOOP_VINFO_DDRS (loop_vinfo));
1870 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1871 }
1872 else
1873 {
1874 bb = BB_VINFO_BB (bb_vinfo);
1875 compute_data_dependences_for_bb (bb, true,
1876 &BB_VINFO_DATAREFS (bb_vinfo),
1877 &BB_VINFO_DDRS (bb_vinfo));
1878 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1879 }
fb85abff 1880
1881 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1882 from stmt_vec_info struct to DR and vectype. */
fb85abff 1883
1884 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1885 {
1886 gimple stmt;
1887 stmt_vec_info stmt_info;
1888 basic_block bb;
48e1416a 1889 tree base, offset, init;
1890
fb85abff 1891 if (!dr || !DR_REF (dr))
1892 {
f083cd24 1893 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1894 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1895 return false;
1896 }
1897
1898 stmt = DR_STMT (dr);
1899 stmt_info = vinfo_for_stmt (stmt);
1900
1901 /* Check that analysis of the data-ref succeeded. */
1902 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1903 || !DR_STEP (dr))
1904 {
f083cd24 1905 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1906 {
1907 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1908 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1909 }
1910 return false;
1911 }
1912
1913 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1914 {
f083cd24 1915 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1916 fprintf (vect_dump, "not vectorized: base addr of dr is a "
1917 "constant");
1918 return false;
1919 }
1920
fb85abff 1921 base = unshare_expr (DR_BASE_ADDRESS (dr));
1922 offset = unshare_expr (DR_OFFSET (dr));
1923 init = unshare_expr (DR_INIT (dr));
48e1416a 1924
fb85abff 1925 /* Update DR field in stmt_vec_info struct. */
1926 bb = gimple_bb (stmt);
1927
1928 /* If the dataref is in an inner-loop of the loop that is considered for
1929 for vectorization, we also want to analyze the access relative to
48e1416a 1930 the outer-loop (DR contains information only relative to the
fb85abff 1931 inner-most enclosing loop). We do that by building a reference to the
1932 first location accessed by the inner-loop, and analyze it relative to
48e1416a 1933 the outer-loop. */
1934 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 1935 {
1936 tree outer_step, outer_base, outer_init;
1937 HOST_WIDE_INT pbitsize, pbitpos;
1938 tree poffset;
1939 enum machine_mode pmode;
1940 int punsignedp, pvolatilep;
1941 affine_iv base_iv, offset_iv;
1942 tree dinit;
1943
48e1416a 1944 /* Build a reference to the first location accessed by the
fb85abff 1945 inner-loop: *(BASE+INIT). (The first location is actually
1946 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1947 tree inner_base = build_fold_indirect_ref
1948 (fold_build2 (POINTER_PLUS_EXPR,
48e1416a 1949 TREE_TYPE (base), base,
fb85abff 1950 fold_convert (sizetype, init)));
1951
1952 if (vect_print_dump_info (REPORT_DETAILS))
1953 {
1954 fprintf (vect_dump, "analyze in outer-loop: ");
1955 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1956 }
1957
48e1416a 1958 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
fb85abff 1959 &poffset, &pmode, &punsignedp, &pvolatilep, false);
1960 gcc_assert (outer_base != NULL_TREE);
1961
1962 if (pbitpos % BITS_PER_UNIT != 0)
1963 {
1964 if (vect_print_dump_info (REPORT_DETAILS))
1965 fprintf (vect_dump, "failed: bit offset alignment.\n");
1966 return false;
1967 }
1968
1969 outer_base = build_fold_addr_expr (outer_base);
48e1416a 1970 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
fb85abff 1971 &base_iv, false))
1972 {
1973 if (vect_print_dump_info (REPORT_DETAILS))
1974 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1975 return false;
1976 }
1977
1978 if (offset)
1979 {
1980 if (poffset)
48e1416a 1981 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
fb85abff 1982 poffset);
1983 else
1984 poffset = offset;
1985 }
1986
1987 if (!poffset)
1988 {
1989 offset_iv.base = ssize_int (0);
1990 offset_iv.step = ssize_int (0);
1991 }
48e1416a 1992 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
fb85abff 1993 &offset_iv, false))
1994 {
1995 if (vect_print_dump_info (REPORT_DETAILS))
1996 fprintf (vect_dump, "evolution of offset is not affine.\n");
1997 return false;
1998 }
1999
2000 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2001 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2002 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2003 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2004 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2005
2006 outer_step = size_binop (PLUS_EXPR,
2007 fold_convert (ssizetype, base_iv.step),
2008 fold_convert (ssizetype, offset_iv.step));
2009
2010 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2011 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
48e1416a 2012 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
fb85abff 2013 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
48e1416a 2014 STMT_VINFO_DR_OFFSET (stmt_info) =
fb85abff 2015 fold_convert (ssizetype, offset_iv.base);
48e1416a 2016 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
fb85abff 2017 size_int (highest_pow2_factor (offset_iv.base));
2018
2019 if (vect_print_dump_info (REPORT_DETAILS))
2020 {
2021 fprintf (vect_dump, "\touter base_address: ");
2022 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2023 fprintf (vect_dump, "\n\touter offset from base address: ");
2024 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2025 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2026 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2027 fprintf (vect_dump, "\n\touter step: ");
2028 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2029 fprintf (vect_dump, "\n\touter aligned to: ");
2030 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2031 }
2032 }
2033
2034 if (STMT_VINFO_DATA_REF (stmt_info))
2035 {
f083cd24 2036 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 2037 {
2038 fprintf (vect_dump,
2039 "not vectorized: more than one data ref in stmt: ");
2040 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2041 }
2042 return false;
2043 }
f083cd24 2044
fb85abff 2045 STMT_VINFO_DATA_REF (stmt_info) = dr;
48e1416a 2046
fb85abff 2047 /* Set vectype for STMT. */
2048 scalar_type = TREE_TYPE (DR_REF (dr));
2049 STMT_VINFO_VECTYPE (stmt_info) =
2050 get_vectype_for_scalar_type (scalar_type);
48e1416a 2051 if (!STMT_VINFO_VECTYPE (stmt_info))
fb85abff 2052 {
f083cd24 2053 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 2054 {
2055 fprintf (vect_dump,
2056 "not vectorized: no vectype for stmt: ");
2057 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2058 fprintf (vect_dump, " scalar_type: ");
2059 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2060 }
2061 return false;
2062 }
2063 }
48e1416a 2064
fb85abff 2065 return true;
2066}
2067
2068
2069/* Function vect_get_new_vect_var.
2070
48e1416a 2071 Returns a name for a new variable. The current naming scheme appends the
2072 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2073 the name of vectorizer generated variables, and appends that to NAME if
fb85abff 2074 provided. */
2075
2076tree
2077vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2078{
2079 const char *prefix;
2080 tree new_vect_var;
2081
2082 switch (var_kind)
2083 {
2084 case vect_simple_var:
2085 prefix = "vect_";
2086 break;
2087 case vect_scalar_var:
2088 prefix = "stmp_";
2089 break;
2090 case vect_pointer_var:
2091 prefix = "vect_p";
2092 break;
2093 default:
2094 gcc_unreachable ();
2095 }
2096
2097 if (name)
2098 {
2099 char* tmp = concat (prefix, name, NULL);
2100 new_vect_var = create_tmp_var (type, tmp);
2101 free (tmp);
2102 }
2103 else
2104 new_vect_var = create_tmp_var (type, prefix);
2105
2106 /* Mark vector typed variable as a gimple register variable. */
2107 if (TREE_CODE (type) == VECTOR_TYPE)
2108 DECL_GIMPLE_REG_P (new_vect_var) = true;
2109
2110 return new_vect_var;
2111}
2112
2113
2114/* Function vect_create_addr_base_for_vector_ref.
2115
2116 Create an expression that computes the address of the first memory location
2117 that will be accessed for a data reference.
2118
2119 Input:
2120 STMT: The statement containing the data reference.
2121 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2122 OFFSET: Optional. If supplied, it is be added to the initial address.
2123 LOOP: Specify relative to which loop-nest should the address be computed.
2124 For example, when the dataref is in an inner-loop nested in an
2125 outer-loop that is now being vectorized, LOOP can be either the
2126 outer-loop, or the inner-loop. The first memory location accessed
2127 by the following dataref ('in' points to short):
2128
2129 for (i=0; i<N; i++)
2130 for (j=0; j<M; j++)
2131 s += in[i+j]
2132
2133 is as follows:
2134 if LOOP=i_loop: &in (relative to i_loop)
2135 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2136
2137 Output:
48e1416a 2138 1. Return an SSA_NAME whose value is the address of the memory location of
fb85abff 2139 the first vector of the data reference.
2140 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2141 these statement(s) which define the returned SSA_NAME.
2142
2143 FORNOW: We are only handling array accesses with step 1. */
2144
2145tree
2146vect_create_addr_base_for_vector_ref (gimple stmt,
2147 gimple_seq *new_stmt_list,
2148 tree offset,
2149 struct loop *loop)
2150{
2151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2152 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
fb85abff 2153 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2154 tree base_name;
2155 tree data_ref_base_var;
2156 tree vec_stmt;
2157 tree addr_base, addr_expr;
2158 tree dest;
2159 gimple_seq seq = NULL;
2160 tree base_offset = unshare_expr (DR_OFFSET (dr));
2161 tree init = unshare_expr (DR_INIT (dr));
f083cd24 2162 tree vect_ptr_type;
fb85abff 2163 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
37545e54 2164 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
fb85abff 2165
37545e54 2166 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
fb85abff 2167 {
37545e54 2168 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 2169
37545e54 2170 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
fb85abff 2171
2172 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2173 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2174 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2175 }
2176
37545e54 2177 if (loop_vinfo)
2178 base_name = build_fold_indirect_ref (data_ref_base);
2179 else
2180 {
2181 base_offset = ssize_int (0);
2182 init = ssize_int (0);
2183 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
48e1416a 2184 }
37545e54 2185
fb85abff 2186 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2187 add_referenced_var (data_ref_base_var);
2188 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2189 data_ref_base_var);
2190 gimple_seq_add_seq (new_stmt_list, seq);
2191
2192 /* Create base_offset */
2193 base_offset = size_binop (PLUS_EXPR,
2194 fold_convert (sizetype, base_offset),
2195 fold_convert (sizetype, init));
2196 dest = create_tmp_var (sizetype, "base_off");
2197 add_referenced_var (dest);
2198 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2199 gimple_seq_add_seq (new_stmt_list, seq);
2200
2201 if (offset)
2202 {
2203 tree tmp = create_tmp_var (sizetype, "offset");
2204
2205 add_referenced_var (tmp);
2206 offset = fold_build2 (MULT_EXPR, sizetype,
2207 fold_convert (sizetype, offset), step);
2208 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2209 base_offset, offset);
2210 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2211 gimple_seq_add_seq (new_stmt_list, seq);
2212 }
2213
2214 /* base + base_offset */
37545e54 2215 if (loop_vinfo)
2216 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2217 data_ref_base, base_offset);
2218 else
2219 {
2220 if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2221 addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2222 else
48e1416a 2223 addr_base = build1 (ADDR_EXPR,
37545e54 2224 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2225 unshare_expr (DR_REF (dr)));
2226 }
48e1416a 2227
fb85abff 2228 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2229
f083cd24 2230 vec_stmt = fold_convert (vect_ptr_type, addr_base);
fb85abff 2231 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2232 get_name (base_name));
2233 add_referenced_var (addr_expr);
f083cd24 2234 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
fb85abff 2235 gimple_seq_add_seq (new_stmt_list, seq);
2236
2237 if (vect_print_dump_info (REPORT_DETAILS))
2238 {
2239 fprintf (vect_dump, "created ");
2240 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2241 }
f083cd24 2242
fb85abff 2243 return vec_stmt;
2244}
2245
2246
2247/* Function vect_create_data_ref_ptr.
2248
2249 Create a new pointer to vector type (vp), that points to the first location
48e1416a 2250 accessed in the loop by STMT, along with the def-use update chain to
fb85abff 2251 appropriately advance the pointer through the loop iterations. Also set
2252 aliasing information for the pointer. This vector pointer is used by the
2253 callers to this function to create a memory reference expression for vector
2254 load/store access.
2255
2256 Input:
2257 1. STMT: a stmt that references memory. Expected to be of the form
2258 GIMPLE_ASSIGN <name, data-ref> or
2259 GIMPLE_ASSIGN <data-ref, name>.
2260 2. AT_LOOP: the loop where the vector memref is to be created.
2261 3. OFFSET (optional): an offset to be added to the initial address accessed
2262 by the data-ref in STMT.
2263 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2264 pointing to the initial address.
2265 5. TYPE: if not NULL indicates the required type of the data-ref.
2266
2267 Output:
2268 1. Declare a new ptr to vector_type, and have it point to the base of the
2269 data reference (initial addressed accessed by the data reference).
2270 For example, for vector of type V8HI, the following code is generated:
2271
2272 v8hi *vp;
2273 vp = (v8hi *)initial_address;
2274
2275 if OFFSET is not supplied:
2276 initial_address = &a[init];
2277 if OFFSET is supplied:
2278 initial_address = &a[init + OFFSET];
2279
2280 Return the initial_address in INITIAL_ADDRESS.
2281
2282 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
48e1416a 2283 update the pointer in each iteration of the loop.
fb85abff 2284
2285 Return the increment stmt that updates the pointer in PTR_INCR.
2286
48e1416a 2287 3. Set INV_P to true if the access pattern of the data reference in the
fb85abff 2288 vectorized loop is invariant. Set it to false otherwise.
2289
2290 4. Return the pointer. */
2291
2292tree
2293vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2294 tree offset, tree *initial_address, gimple *ptr_incr,
dd277d48 2295 bool only_init, bool *inv_p)
fb85abff 2296{
2297 tree base_name;
2298 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2299 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 2300 struct loop *loop = NULL;
2301 bool nested_in_vect_loop = false;
2302 struct loop *containing_loop = NULL;
fb85abff 2303 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2304 tree vect_ptr_type;
2305 tree vect_ptr;
fb85abff 2306 tree new_temp;
2307 gimple vec_stmt;
2308 gimple_seq new_stmt_list = NULL;
37545e54 2309 edge pe = NULL;
fb85abff 2310 basic_block new_bb;
2311 tree vect_ptr_init;
2312 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2313 tree vptr;
2314 gimple_stmt_iterator incr_gsi;
2315 bool insert_after;
2316 tree indx_before_incr, indx_after_incr;
2317 gimple incr;
2318 tree step;
37545e54 2319 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2320 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
48e1416a 2321
37545e54 2322 if (loop_vinfo)
2323 {
2324 loop = LOOP_VINFO_LOOP (loop_vinfo);
2325 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2326 containing_loop = (gimple_bb (stmt))->loop_father;
2327 pe = loop_preheader_edge (loop);
2328 }
2329 else
2330 {
2331 gcc_assert (bb_vinfo);
2332 only_init = true;
2333 *ptr_incr = NULL;
2334 }
48e1416a 2335
fb85abff 2336 /* Check the step (evolution) of the load in LOOP, and record
2337 whether it's invariant. */
2338 if (nested_in_vect_loop)
2339 step = STMT_VINFO_DR_STEP (stmt_info);
2340 else
2341 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
48e1416a 2342
fb85abff 2343 if (tree_int_cst_compare (step, size_zero_node) == 0)
2344 *inv_p = true;
2345 else
2346 *inv_p = false;
2347
2348 /* Create an expression for the first address accessed by this load
48e1416a 2349 in LOOP. */
fb85abff 2350 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2351
2352 if (vect_print_dump_info (REPORT_DETAILS))
2353 {
2354 tree data_ref_base = base_name;
2355 fprintf (vect_dump, "create vector-pointer variable to type: ");
2356 print_generic_expr (vect_dump, vectype, TDF_SLIM);
48e1416a 2357 if (TREE_CODE (data_ref_base) == VAR_DECL
10095225 2358 || TREE_CODE (data_ref_base) == ARRAY_REF)
2359 fprintf (vect_dump, " vectorizing an array ref: ");
fb85abff 2360 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2361 fprintf (vect_dump, " vectorizing a record based array ref: ");
2362 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2363 fprintf (vect_dump, " vectorizing a pointer ref: ");
2364 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2365 }
2366
2367 /** (1) Create the new vector-pointer variable: **/
dd277d48 2368 vect_ptr_type = build_pointer_type (vectype);
fb85abff 2369 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2370 get_name (base_name));
a34701c9 2371
2372 /* Vector types inherit the alias set of their component type by default so
2373 we need to use a ref-all pointer if the data reference does not conflict
2374 with the created vector data reference because it is not addressable. */
2375 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2376 get_alias_set (DR_REF (dr))))
2377 {
98155838 2378 vect_ptr_type
2379 = build_pointer_type_for_mode (vectype,
2380 TYPE_MODE (vect_ptr_type), true);
a34701c9 2381 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2382 get_name (base_name));
2383 }
2384
2385 /* Likewise for any of the data references in the stmt group. */
2386 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
fb85abff 2387 {
dd277d48 2388 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2389 do
2390 {
2391 tree lhs = gimple_assign_lhs (orig_stmt);
2392 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2393 get_alias_set (lhs)))
2394 {
a34701c9 2395 vect_ptr_type
98155838 2396 = build_pointer_type_for_mode (vectype,
2397 TYPE_MODE (vect_ptr_type), true);
a34701c9 2398 vect_ptr
2399 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2400 get_name (base_name));
dd277d48 2401 break;
2402 }
2403
2404 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2405 }
2406 while (orig_stmt);
fb85abff 2407 }
2408
2409 add_referenced_var (vect_ptr);
2410
fb85abff 2411 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2412 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2413 def-use update cycles for the pointer: One relative to the outer-loop
2414 (LOOP), which is what steps (3) and (4) below do. The other is relative
2415 to the inner-loop (which is the inner-most loop containing the dataref),
48e1416a 2416 and this is done be step (5) below.
fb85abff 2417
2418 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2419 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2420 redundant. Steps (3),(4) create the following:
2421
2422 vp0 = &base_addr;
2423 LOOP: vp1 = phi(vp0,vp2)
48e1416a 2424 ...
fb85abff 2425 ...
2426 vp2 = vp1 + step
2427 goto LOOP
48e1416a 2428
fb85abff 2429 If there is an inner-loop nested in loop, then step (5) will also be
2430 applied, and an additional update in the inner-loop will be created:
2431
2432 vp0 = &base_addr;
2433 LOOP: vp1 = phi(vp0,vp2)
2434 ...
2435 inner: vp3 = phi(vp1,vp4)
2436 vp4 = vp3 + inner_step
2437 if () goto inner
2438 ...
2439 vp2 = vp1 + step
2440 if () goto LOOP */
2441
2442 /** (3) Calculate the initial address the vector-pointer, and set
2443 the vector-pointer to point to it before the loop: **/
2444
2445 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2446
2447 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2448 offset, loop);
fb85abff 2449 if (new_stmt_list)
2450 {
37545e54 2451 if (pe)
2452 {
2453 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2454 gcc_assert (!new_bb);
2455 }
2456 else
2457 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
fb85abff 2458 }
2459
2460 *initial_address = new_temp;
2461
2462 /* Create: p = (vectype *) initial_base */
2463 vec_stmt = gimple_build_assign (vect_ptr,
2464 fold_convert (vect_ptr_type, new_temp));
2465 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2466 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
37545e54 2467 if (pe)
2468 {
2469 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2470 gcc_assert (!new_bb);
2471 }
2472 else
2473 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
fb85abff 2474
2475 /** (4) Handle the updating of the vector-pointer inside the loop.
2476 This is needed when ONLY_INIT is false, and also when AT_LOOP
2477 is the inner-loop nested in LOOP (during outer-loop vectorization).
2478 **/
2479
37545e54 2480 /* No update in loop is required. */
48e1416a 2481 if (only_init && (!loop_vinfo || at_loop == loop))
fb85abff 2482 {
2483 /* Copy the points-to information if it exists. */
2484 if (DR_PTR_INFO (dr))
2485 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2486 vptr = vect_ptr_init;
2487 }
2488 else
2489 {
2490 /* The step of the vector pointer is the Vector Size. */
2491 tree step = TYPE_SIZE_UNIT (vectype);
48e1416a 2492 /* One exception to the above is when the scalar step of the load in
fb85abff 2493 LOOP is zero. In this case the step here is also zero. */
2494 if (*inv_p)
2495 step = size_zero_node;
2496
2497 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2498
2499 create_iv (vect_ptr_init,
2500 fold_convert (vect_ptr_type, step),
2501 vect_ptr, loop, &incr_gsi, insert_after,
2502 &indx_before_incr, &indx_after_incr);
2503 incr = gsi_stmt (incr_gsi);
37545e54 2504 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
fb85abff 2505
2506 /* Copy the points-to information if it exists. */
2507 if (DR_PTR_INFO (dr))
2508 {
2509 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2510 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2511 }
fb85abff 2512 if (ptr_incr)
2513 *ptr_incr = incr;
2514
2515 vptr = indx_before_incr;
2516 }
2517
2518 if (!nested_in_vect_loop || only_init)
2519 return vptr;
2520
2521
2522 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2523 nested in LOOP, if exists: **/
2524
2525 gcc_assert (nested_in_vect_loop);
2526 if (!only_init)
2527 {
2528 standard_iv_increment_position (containing_loop, &incr_gsi,
2529 &insert_after);
48e1416a 2530 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
fb85abff 2531 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2532 &indx_after_incr);
2533 incr = gsi_stmt (incr_gsi);
37545e54 2534 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
fb85abff 2535
2536 /* Copy the points-to information if it exists. */
2537 if (DR_PTR_INFO (dr))
2538 {
2539 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2540 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2541 }
fb85abff 2542 if (ptr_incr)
2543 *ptr_incr = incr;
2544
48e1416a 2545 return indx_before_incr;
fb85abff 2546 }
2547 else
2548 gcc_unreachable ();
2549}
2550
2551
2552/* Function bump_vector_ptr
2553
2554 Increment a pointer (to a vector type) by vector-size. If requested,
48e1416a 2555 i.e. if PTR-INCR is given, then also connect the new increment stmt
fb85abff 2556 to the existing def-use update-chain of the pointer, by modifying
2557 the PTR_INCR as illustrated below:
2558
2559 The pointer def-use update-chain before this function:
2560 DATAREF_PTR = phi (p_0, p_2)
2561 ....
48e1416a 2562 PTR_INCR: p_2 = DATAREF_PTR + step
fb85abff 2563
2564 The pointer def-use update-chain after this function:
2565 DATAREF_PTR = phi (p_0, p_2)
2566 ....
2567 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2568 ....
2569 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2570
2571 Input:
48e1416a 2572 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
fb85abff 2573 in the loop.
48e1416a 2574 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
fb85abff 2575 the loop. The increment amount across iterations is expected
48e1416a 2576 to be vector_size.
fb85abff 2577 BSI - location where the new update stmt is to be placed.
2578 STMT - the original scalar memory-access stmt that is being vectorized.
2579 BUMP - optional. The offset by which to bump the pointer. If not given,
2580 the offset is assumed to be vector_size.
2581
2582 Output: Return NEW_DATAREF_PTR as illustrated above.
48e1416a 2583
fb85abff 2584*/
2585
2586tree
2587bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2588 gimple stmt, tree bump)
2589{
2590 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2591 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2592 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2593 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2594 tree update = TYPE_SIZE_UNIT (vectype);
2595 gimple incr_stmt;
2596 ssa_op_iter iter;
2597 use_operand_p use_p;
2598 tree new_dataref_ptr;
2599
2600 if (bump)
2601 update = bump;
48e1416a 2602
fb85abff 2603 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2604 dataref_ptr, update);
2605 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2606 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2607 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2608
2609 /* Copy the points-to information if it exists. */
2610 if (DR_PTR_INFO (dr))
2611 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
fb85abff 2612
2613 if (!ptr_incr)
2614 return new_dataref_ptr;
2615
2616 /* Update the vector-pointer's cross-iteration increment. */
2617 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2618 {
2619 tree use = USE_FROM_PTR (use_p);
2620
2621 if (use == dataref_ptr)
2622 SET_USE (use_p, new_dataref_ptr);
2623 else
2624 gcc_assert (tree_int_cst_compare (use, update) == 0);
2625 }
2626
2627 return new_dataref_ptr;
2628}
2629
2630
2631/* Function vect_create_destination_var.
2632
2633 Create a new temporary of type VECTYPE. */
2634
2635tree
2636vect_create_destination_var (tree scalar_dest, tree vectype)
2637{
2638 tree vec_dest;
2639 const char *new_name;
2640 tree type;
2641 enum vect_var_kind kind;
2642
2643 kind = vectype ? vect_simple_var : vect_scalar_var;
2644 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2645
2646 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2647
2648 new_name = get_name (scalar_dest);
2649 if (!new_name)
2650 new_name = "var_";
2651 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2652 add_referenced_var (vec_dest);
2653
2654 return vec_dest;
2655}
2656
2657/* Function vect_strided_store_supported.
2658
2659 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2660 and FALSE otherwise. */
2661
2662bool
2663vect_strided_store_supported (tree vectype)
2664{
2665 optab interleave_high_optab, interleave_low_optab;
2666 int mode;
2667
2668 mode = (int) TYPE_MODE (vectype);
48e1416a 2669
fb85abff 2670 /* Check that the operation is supported. */
48e1416a 2671 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
fb85abff 2672 vectype, optab_default);
48e1416a 2673 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
fb85abff 2674 vectype, optab_default);
2675 if (!interleave_high_optab || !interleave_low_optab)
2676 {
2677 if (vect_print_dump_info (REPORT_DETAILS))
2678 fprintf (vect_dump, "no optab for interleave.");
2679 return false;
2680 }
2681
48e1416a 2682 if (optab_handler (interleave_high_optab, mode)->insn_code
fb85abff 2683 == CODE_FOR_nothing
48e1416a 2684 || optab_handler (interleave_low_optab, mode)->insn_code
fb85abff 2685 == CODE_FOR_nothing)
2686 {
2687 if (vect_print_dump_info (REPORT_DETAILS))
2688 fprintf (vect_dump, "interleave op not supported by target.");
2689 return false;
2690 }
2691
2692 return true;
2693}
2694
2695
2696/* Function vect_permute_store_chain.
2697
2698 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
48e1416a 2699 a power of 2, generate interleave_high/low stmts to reorder the data
fb85abff 2700 correctly for the stores. Return the final references for stores in
2701 RESULT_CHAIN.
2702
2703 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2704 The input is 4 vectors each containing 8 elements. We assign a number to each
2705 element, the input sequence is:
2706
2707 1st vec: 0 1 2 3 4 5 6 7
2708 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 2709 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 2710 4th vec: 24 25 26 27 28 29 30 31
2711
2712 The output sequence should be:
2713
2714 1st vec: 0 8 16 24 1 9 17 25
2715 2nd vec: 2 10 18 26 3 11 19 27
2716 3rd vec: 4 12 20 28 5 13 21 30
2717 4th vec: 6 14 22 30 7 15 23 31
2718
2719 i.e., we interleave the contents of the four vectors in their order.
2720
48e1416a 2721 We use interleave_high/low instructions to create such output. The input of
fb85abff 2722 each interleave_high/low operation is two vectors:
48e1416a 2723 1st vec 2nd vec
2724 0 1 2 3 4 5 6 7
2725 the even elements of the result vector are obtained left-to-right from the
2726 high/low elements of the first vector. The odd elements of the result are
fb85abff 2727 obtained left-to-right from the high/low elements of the second vector.
2728 The output of interleave_high will be: 0 4 1 5
2729 and of interleave_low: 2 6 3 7
2730
48e1416a 2731
fb85abff 2732 The permutation is done in log LENGTH stages. In each stage interleave_high
48e1416a 2733 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2734 where the first argument is taken from the first half of DR_CHAIN and the
2735 second argument from it's second half.
2736 In our example,
fb85abff 2737
2738 I1: interleave_high (1st vec, 3rd vec)
2739 I2: interleave_low (1st vec, 3rd vec)
2740 I3: interleave_high (2nd vec, 4th vec)
2741 I4: interleave_low (2nd vec, 4th vec)
2742
2743 The output for the first stage is:
2744
2745 I1: 0 16 1 17 2 18 3 19
2746 I2: 4 20 5 21 6 22 7 23
2747 I3: 8 24 9 25 10 26 11 27
2748 I4: 12 28 13 29 14 30 15 31
2749
2750 The output of the second stage, i.e. the final result is:
2751
2752 I1: 0 8 16 24 1 9 17 25
2753 I2: 2 10 18 26 3 11 19 27
2754 I3: 4 12 20 28 5 13 21 30
2755 I4: 6 14 22 30 7 15 23 31. */
48e1416a 2756
fb85abff 2757bool
48e1416a 2758vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2759 unsigned int length,
fb85abff 2760 gimple stmt,
2761 gimple_stmt_iterator *gsi,
2762 VEC(tree,heap) **result_chain)
2763{
2764 tree perm_dest, vect1, vect2, high, low;
2765 gimple perm_stmt;
2766 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2767 tree scalar_dest;
2768 int i;
2769 unsigned int j;
2770 enum tree_code high_code, low_code;
48e1416a 2771
fb85abff 2772 scalar_dest = gimple_assign_lhs (stmt);
2773
2774 /* Check that the operation is supported. */
2775 if (!vect_strided_store_supported (vectype))
2776 return false;
2777
2778 *result_chain = VEC_copy (tree, heap, dr_chain);
2779
2780 for (i = 0; i < exact_log2 (length); i++)
2781 {
2782 for (j = 0; j < length/2; j++)
2783 {
2784 vect1 = VEC_index (tree, dr_chain, j);
2785 vect2 = VEC_index (tree, dr_chain, j+length/2);
2786
2787 /* Create interleaving stmt:
48e1416a 2788 in the case of big endian:
2789 high = interleave_high (vect1, vect2)
2790 and in the case of little endian:
fb85abff 2791 high = interleave_low (vect1, vect2). */
2792 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2793 DECL_GIMPLE_REG_P (perm_dest) = 1;
2794 add_referenced_var (perm_dest);
2795 if (BYTES_BIG_ENDIAN)
2796 {
2797 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2798 low_code = VEC_INTERLEAVE_LOW_EXPR;
2799 }
2800 else
2801 {
2802 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2803 high_code = VEC_INTERLEAVE_LOW_EXPR;
2804 }
2805 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2806 vect1, vect2);
2807 high = make_ssa_name (perm_dest, perm_stmt);
2808 gimple_assign_set_lhs (perm_stmt, high);
2809 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2810 VEC_replace (tree, *result_chain, 2*j, high);
2811
2812 /* Create interleaving stmt:
2813 in the case of big endian:
48e1416a 2814 low = interleave_low (vect1, vect2)
fb85abff 2815 and in the case of little endian:
48e1416a 2816 low = interleave_high (vect1, vect2). */
fb85abff 2817 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2818 DECL_GIMPLE_REG_P (perm_dest) = 1;
2819 add_referenced_var (perm_dest);
2820 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2821 vect1, vect2);
2822 low = make_ssa_name (perm_dest, perm_stmt);
2823 gimple_assign_set_lhs (perm_stmt, low);
2824 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2825 VEC_replace (tree, *result_chain, 2*j+1, low);
2826 }
2827 dr_chain = VEC_copy (tree, heap, *result_chain);
2828 }
2829 return true;
2830}
2831
2832/* Function vect_setup_realignment
48e1416a 2833
fb85abff 2834 This function is called when vectorizing an unaligned load using
2835 the dr_explicit_realign[_optimized] scheme.
2836 This function generates the following code at the loop prolog:
2837
2838 p = initial_addr;
2839 x msq_init = *(floor(p)); # prolog load
48e1416a 2840 realignment_token = call target_builtin;
fb85abff 2841 loop:
2842 x msq = phi (msq_init, ---)
2843
48e1416a 2844 The stmts marked with x are generated only for the case of
fb85abff 2845 dr_explicit_realign_optimized.
2846
48e1416a 2847 The code above sets up a new (vector) pointer, pointing to the first
fb85abff 2848 location accessed by STMT, and a "floor-aligned" load using that pointer.
2849 It also generates code to compute the "realignment-token" (if the relevant
2850 target hook was defined), and creates a phi-node at the loop-header bb
2851 whose arguments are the result of the prolog-load (created by this
2852 function) and the result of a load that takes place in the loop (to be
2853 created by the caller to this function).
2854
2855 For the case of dr_explicit_realign_optimized:
48e1416a 2856 The caller to this function uses the phi-result (msq) to create the
fb85abff 2857 realignment code inside the loop, and sets up the missing phi argument,
2858 as follows:
48e1416a 2859 loop:
fb85abff 2860 msq = phi (msq_init, lsq)
2861 lsq = *(floor(p')); # load in loop
2862 result = realign_load (msq, lsq, realignment_token);
2863
2864 For the case of dr_explicit_realign:
2865 loop:
2866 msq = *(floor(p)); # load in loop
2867 p' = p + (VS-1);
2868 lsq = *(floor(p')); # load in loop
2869 result = realign_load (msq, lsq, realignment_token);
2870
2871 Input:
2872 STMT - (scalar) load stmt to be vectorized. This load accesses
2873 a memory location that may be unaligned.
2874 BSI - place where new code is to be inserted.
2875 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
48e1416a 2876 is used.
2877
fb85abff 2878 Output:
2879 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2880 target hook, if defined.
2881 Return value - the result of the loop-header phi node. */
2882
2883tree
2884vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2885 tree *realignment_token,
2886 enum dr_alignment_support alignment_support_scheme,
2887 tree init_addr,
2888 struct loop **at_loop)
2889{
2890 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2891 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2892 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2893 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2894 edge pe;
2895 tree scalar_dest = gimple_assign_lhs (stmt);
2896 tree vec_dest;
2897 gimple inc;
2898 tree ptr;
2899 tree data_ref;
2900 gimple new_stmt;
2901 basic_block new_bb;
2902 tree msq_init = NULL_TREE;
2903 tree new_temp;
2904 gimple phi_stmt;
2905 tree msq = NULL_TREE;
2906 gimple_seq stmts = NULL;
2907 bool inv_p;
2908 bool compute_in_loop = false;
2909 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2910 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2911 struct loop *loop_for_initial_load;
2912
2913 gcc_assert (alignment_support_scheme == dr_explicit_realign
2914 || alignment_support_scheme == dr_explicit_realign_optimized);
2915
2916 /* We need to generate three things:
2917 1. the misalignment computation
2918 2. the extra vector load (for the optimized realignment scheme).
2919 3. the phi node for the two vectors from which the realignment is
2920 done (for the optimized realignment scheme).
2921 */
2922
2923 /* 1. Determine where to generate the misalignment computation.
2924
2925 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2926 calculation will be generated by this function, outside the loop (in the
2927 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2928 caller, inside the loop.
2929
2930 Background: If the misalignment remains fixed throughout the iterations of
2931 the loop, then both realignment schemes are applicable, and also the
2932 misalignment computation can be done outside LOOP. This is because we are
2933 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2934 are a multiple of VS (the Vector Size), and therefore the misalignment in
2935 different vectorized LOOP iterations is always the same.
2936 The problem arises only if the memory access is in an inner-loop nested
2937 inside LOOP, which is now being vectorized using outer-loop vectorization.
2938 This is the only case when the misalignment of the memory access may not
2939 remain fixed throughout the iterations of the inner-loop (as explained in
2940 detail in vect_supportable_dr_alignment). In this case, not only is the
2941 optimized realignment scheme not applicable, but also the misalignment
2942 computation (and generation of the realignment token that is passed to
2943 REALIGN_LOAD) have to be done inside the loop.
2944
2945 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2946 or not, which in turn determines if the misalignment is computed inside
2947 the inner-loop, or outside LOOP. */
2948
2949 if (init_addr != NULL_TREE)
2950 {
2951 compute_in_loop = true;
2952 gcc_assert (alignment_support_scheme == dr_explicit_realign);
2953 }
2954
2955
2956 /* 2. Determine where to generate the extra vector load.
2957
2958 For the optimized realignment scheme, instead of generating two vector
2959 loads in each iteration, we generate a single extra vector load in the
2960 preheader of the loop, and in each iteration reuse the result of the
2961 vector load from the previous iteration. In case the memory access is in
2962 an inner-loop nested inside LOOP, which is now being vectorized using
2963 outer-loop vectorization, we need to determine whether this initial vector
2964 load should be generated at the preheader of the inner-loop, or can be
2965 generated at the preheader of LOOP. If the memory access has no evolution
2966 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2967 to be generated inside LOOP (in the preheader of the inner-loop). */
2968
2969 if (nested_in_vect_loop)
2970 {
2971 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2972 bool invariant_in_outerloop =
2973 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2974 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2975 }
2976 else
2977 loop_for_initial_load = loop;
2978 if (at_loop)
2979 *at_loop = loop_for_initial_load;
2980
2981 /* 3. For the case of the optimized realignment, create the first vector
2982 load at the loop preheader. */
2983
2984 if (alignment_support_scheme == dr_explicit_realign_optimized)
2985 {
2986 /* Create msq_init = *(floor(p1)) in the loop preheader */
2987
2988 gcc_assert (!compute_in_loop);
2989 pe = loop_preheader_edge (loop_for_initial_load);
2990 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2991 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
dd277d48 2992 &init_addr, &inc, true, &inv_p);
fb85abff 2993 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2994 new_stmt = gimple_build_assign (vec_dest, data_ref);
2995 new_temp = make_ssa_name (vec_dest, new_stmt);
2996 gimple_assign_set_lhs (new_stmt, new_temp);
2997 mark_symbols_for_renaming (new_stmt);
2998 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2999 gcc_assert (!new_bb);
3000 msq_init = gimple_assign_lhs (new_stmt);
3001 }
3002
3003 /* 4. Create realignment token using a target builtin, if available.
3004 It is done either inside the containing loop, or before LOOP (as
3005 determined above). */
3006
3007 if (targetm.vectorize.builtin_mask_for_load)
3008 {
3009 tree builtin_decl;
3010
3011 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3012 if (compute_in_loop)
3013 gcc_assert (init_addr); /* already computed by the caller. */
3014 else
3015 {
3016 /* Generate the INIT_ADDR computation outside LOOP. */
3017 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3018 NULL_TREE, loop);
3019 pe = loop_preheader_edge (loop);
3020 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3021 gcc_assert (!new_bb);
3022 }
3023
3024 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3025 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3026 vec_dest =
3027 vect_create_destination_var (scalar_dest,
3028 gimple_call_return_type (new_stmt));
3029 new_temp = make_ssa_name (vec_dest, new_stmt);
3030 gimple_call_set_lhs (new_stmt, new_temp);
3031
3032 if (compute_in_loop)
3033 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3034 else
3035 {
3036 /* Generate the misalignment computation outside LOOP. */
3037 pe = loop_preheader_edge (loop);
3038 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3039 gcc_assert (!new_bb);
3040 }
3041
3042 *realignment_token = gimple_call_lhs (new_stmt);
3043
3044 /* The result of the CALL_EXPR to this builtin is determined from
3045 the value of the parameter and no global variables are touched
3046 which makes the builtin a "const" function. Requiring the
3047 builtin to have the "const" attribute makes it unnecessary
3048 to call mark_call_clobbered. */
3049 gcc_assert (TREE_READONLY (builtin_decl));
3050 }
3051
3052 if (alignment_support_scheme == dr_explicit_realign)
3053 return msq;
3054
3055 gcc_assert (!compute_in_loop);
3056 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3057
3058
3059 /* 5. Create msq = phi <msq_init, lsq> in loop */
3060
3061 pe = loop_preheader_edge (containing_loop);
3062 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3063 msq = make_ssa_name (vec_dest, NULL);
3064 phi_stmt = create_phi_node (msq, containing_loop->header);
3065 SSA_NAME_DEF_STMT (msq) = phi_stmt;
efbcb6de 3066 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
fb85abff 3067
3068 return msq;
3069}
3070
3071
3072/* Function vect_strided_load_supported.
3073
3074 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3075 and FALSE otherwise. */
3076
3077bool
3078vect_strided_load_supported (tree vectype)
3079{
3080 optab perm_even_optab, perm_odd_optab;
3081 int mode;
3082
3083 mode = (int) TYPE_MODE (vectype);
3084
3085 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3086 optab_default);
3087 if (!perm_even_optab)
3088 {
3089 if (vect_print_dump_info (REPORT_DETAILS))
3090 fprintf (vect_dump, "no optab for perm_even.");
3091 return false;
3092 }
3093
3094 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3095 {
3096 if (vect_print_dump_info (REPORT_DETAILS))
3097 fprintf (vect_dump, "perm_even op not supported by target.");
3098 return false;
3099 }
3100
3101 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3102 optab_default);
3103 if (!perm_odd_optab)
3104 {
3105 if (vect_print_dump_info (REPORT_DETAILS))
3106 fprintf (vect_dump, "no optab for perm_odd.");
3107 return false;
3108 }
3109
3110 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3111 {
3112 if (vect_print_dump_info (REPORT_DETAILS))
3113 fprintf (vect_dump, "perm_odd op not supported by target.");
3114 return false;
3115 }
3116 return true;
3117}
3118
3119
3120/* Function vect_permute_load_chain.
3121
3122 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
48e1416a 3123 a power of 2, generate extract_even/odd stmts to reorder the input data
fb85abff 3124 correctly. Return the final references for loads in RESULT_CHAIN.
3125
3126 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3127 The input is 4 vectors each containing 8 elements. We assign a number to each
3128 element, the input sequence is:
3129
3130 1st vec: 0 1 2 3 4 5 6 7
3131 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 3132 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 3133 4th vec: 24 25 26 27 28 29 30 31
3134
3135 The output sequence should be:
3136
3137 1st vec: 0 4 8 12 16 20 24 28
3138 2nd vec: 1 5 9 13 17 21 25 29
48e1416a 3139 3rd vec: 2 6 10 14 18 22 26 30
fb85abff 3140 4th vec: 3 7 11 15 19 23 27 31
3141
3142 i.e., the first output vector should contain the first elements of each
3143 interleaving group, etc.
3144
3145 We use extract_even/odd instructions to create such output. The input of each
3146 extract_even/odd operation is two vectors
48e1416a 3147 1st vec 2nd vec
3148 0 1 2 3 4 5 6 7
fb85abff 3149
48e1416a 3150 and the output is the vector of extracted even/odd elements. The output of
fb85abff 3151 extract_even will be: 0 2 4 6
3152 and of extract_odd: 1 3 5 7
3153
48e1416a 3154
fb85abff 3155 The permutation is done in log LENGTH stages. In each stage extract_even and
48e1416a 3156 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3157 order. In our example,
fb85abff 3158
3159 E1: extract_even (1st vec, 2nd vec)
3160 E2: extract_odd (1st vec, 2nd vec)
3161 E3: extract_even (3rd vec, 4th vec)
3162 E4: extract_odd (3rd vec, 4th vec)
3163
3164 The output for the first stage will be:
3165
3166 E1: 0 2 4 6 8 10 12 14
3167 E2: 1 3 5 7 9 11 13 15
48e1416a 3168 E3: 16 18 20 22 24 26 28 30
fb85abff 3169 E4: 17 19 21 23 25 27 29 31
3170
3171 In order to proceed and create the correct sequence for the next stage (or
48e1416a 3172 for the correct output, if the second stage is the last one, as in our
3173 example), we first put the output of extract_even operation and then the
fb85abff 3174 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3175 The input for the second stage is:
3176
3177 1st vec (E1): 0 2 4 6 8 10 12 14
48e1416a 3178 2nd vec (E3): 16 18 20 22 24 26 28 30
3179 3rd vec (E2): 1 3 5 7 9 11 13 15
fb85abff 3180 4th vec (E4): 17 19 21 23 25 27 29 31
3181
3182 The output of the second stage:
3183
3184 E1: 0 4 8 12 16 20 24 28
3185 E2: 2 6 10 14 18 22 26 30
3186 E3: 1 5 9 13 17 21 25 29
3187 E4: 3 7 11 15 19 23 27 31
3188
3189 And RESULT_CHAIN after reordering:
3190
3191 1st vec (E1): 0 4 8 12 16 20 24 28
3192 2nd vec (E3): 1 5 9 13 17 21 25 29
48e1416a 3193 3rd vec (E2): 2 6 10 14 18 22 26 30
fb85abff 3194 4th vec (E4): 3 7 11 15 19 23 27 31. */
3195
3196bool
48e1416a 3197vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3198 unsigned int length,
fb85abff 3199 gimple stmt,
3200 gimple_stmt_iterator *gsi,
3201 VEC(tree,heap) **result_chain)
3202{
3203 tree perm_dest, data_ref, first_vect, second_vect;
3204 gimple perm_stmt;
3205 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3206 int i;
3207 unsigned int j;
3208
3209 /* Check that the operation is supported. */
3210 if (!vect_strided_load_supported (vectype))
3211 return false;
3212
3213 *result_chain = VEC_copy (tree, heap, dr_chain);
3214 for (i = 0; i < exact_log2 (length); i++)
3215 {
3216 for (j = 0; j < length; j +=2)
3217 {
3218 first_vect = VEC_index (tree, dr_chain, j);
3219 second_vect = VEC_index (tree, dr_chain, j+1);
3220
3221 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3222 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3223 DECL_GIMPLE_REG_P (perm_dest) = 1;
3224 add_referenced_var (perm_dest);
3225
3226 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3227 perm_dest, first_vect,
3228 second_vect);
3229
3230 data_ref = make_ssa_name (perm_dest, perm_stmt);
3231 gimple_assign_set_lhs (perm_stmt, data_ref);
3232 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3233 mark_symbols_for_renaming (perm_stmt);
3234
48e1416a 3235 VEC_replace (tree, *result_chain, j/2, data_ref);
3236
fb85abff 3237 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3238 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3239 DECL_GIMPLE_REG_P (perm_dest) = 1;
3240 add_referenced_var (perm_dest);
3241
3242 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3243 perm_dest, first_vect,
3244 second_vect);
3245 data_ref = make_ssa_name (perm_dest, perm_stmt);
3246 gimple_assign_set_lhs (perm_stmt, data_ref);
3247 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3248 mark_symbols_for_renaming (perm_stmt);
3249
3250 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3251 }
3252 dr_chain = VEC_copy (tree, heap, *result_chain);
3253 }
3254 return true;
3255}
3256
3257
3258/* Function vect_transform_strided_load.
3259
3260 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3261 to perform their permutation and ascribe the result vectorized statements to
3262 the scalar statements.
3263*/
3264
3265bool
3266vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3267 gimple_stmt_iterator *gsi)
3268{
3269 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3270 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3271 gimple next_stmt, new_stmt;
3272 VEC(tree,heap) *result_chain = NULL;
3273 unsigned int i, gap_count;
3274 tree tmp_data_ref;
3275
48e1416a 3276 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3277 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
fb85abff 3278 vectors, that are ready for vector computation. */
3279 result_chain = VEC_alloc (tree, heap, size);
3280 /* Permute. */
3281 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3282 return false;
3283
48e1416a 3284 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3285 Since we scan the chain starting from it's first node, their order
fb85abff 3286 corresponds the order of data-refs in RESULT_CHAIN. */
3287 next_stmt = first_stmt;
3288 gap_count = 1;
3289 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3290 {
3291 if (!next_stmt)
3292 break;
3293
3294 /* Skip the gaps. Loads created for the gaps will be removed by dead
3295 code elimination pass later. No need to check for the first stmt in
3296 the group, since it always exists.
3297 DR_GROUP_GAP is the number of steps in elements from the previous
3298 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3299 correspond to the gaps.
3300 */
48e1416a 3301 if (next_stmt != first_stmt
fb85abff 3302 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3303 {
3304 gap_count++;
3305 continue;
3306 }
3307
3308 while (next_stmt)
3309 {
3310 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3311 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3312 copies, and we put the new vector statement in the first available
3313 RELATED_STMT. */
3314 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3315 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3316 else
3317 {
3318 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3319 {
3320 gimple prev_stmt =
3321 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3322 gimple rel_stmt =
3323 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3324 while (rel_stmt)
3325 {
3326 prev_stmt = rel_stmt;
48e1416a 3327 rel_stmt =
fb85abff 3328 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3329 }
3330
48e1416a 3331 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
fb85abff 3332 new_stmt;
3333 }
3334 }
3335
3336 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3337 gap_count = 1;
3338 /* If NEXT_STMT accesses the same DR as the previous statement,
3339 put the same TMP_DATA_REF as its vectorized statement; otherwise
3340 get the next data-ref from RESULT_CHAIN. */
3341 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3342 break;
3343 }
3344 }
3345
3346 VEC_free (tree, heap, result_chain);
3347 return true;
3348}
3349
3350/* Function vect_force_dr_alignment_p.
3351
3352 Returns whether the alignment of a DECL can be forced to be aligned
3353 on ALIGNMENT bit boundary. */
3354
48e1416a 3355bool
fb85abff 3356vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3357{
3358 if (TREE_CODE (decl) != VAR_DECL)
3359 return false;
3360
3361 if (DECL_EXTERNAL (decl))
3362 return false;
3363
3364 if (TREE_ASM_WRITTEN (decl))
3365 return false;
3366
3367 if (TREE_STATIC (decl))
3368 return (alignment <= MAX_OFILE_ALIGNMENT);
3369 else
3370 return (alignment <= MAX_STACK_ALIGNMENT);
3371}
3372
3373/* Function vect_supportable_dr_alignment
3374
3375 Return whether the data reference DR is supported with respect to its
3376 alignment. */
3377
3378enum dr_alignment_support
3379vect_supportable_dr_alignment (struct data_reference *dr)
3380{
3381 gimple stmt = DR_STMT (dr);
3382 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3383 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
bc620c5c 3384 enum machine_mode mode = TYPE_MODE (vectype);
fb85abff 3385 bool invariant_in_outerloop = false;
37545e54 3386 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3387 struct loop *vect_loop = NULL;
3388 bool nested_in_vect_loop = false;
fb85abff 3389
3390 if (aligned_access_p (dr))
3391 return dr_aligned;
3392
37545e54 3393 if (!loop_vinfo)
3394 /* FORNOW: Misaligned accesses are supported only in loops. */
3395 return dr_unaligned_unsupported;
3396
3397 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3398 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3399
fb85abff 3400 if (nested_in_vect_loop)
3401 {
3402 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3403 invariant_in_outerloop =
3404 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3405 }
3406
3407 /* Possibly unaligned access. */
3408
3409 /* We can choose between using the implicit realignment scheme (generating
3410 a misaligned_move stmt) and the explicit realignment scheme (generating
3411 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3412 realignment scheme: optimized, and unoptimized.
3413 We can optimize the realignment only if the step between consecutive
3414 vector loads is equal to the vector size. Since the vector memory
3415 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3416 is guaranteed that the misalignment amount remains the same throughout the
3417 execution of the vectorized loop. Therefore, we can create the
3418 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3419 at the loop preheader.
3420
3421 However, in the case of outer-loop vectorization, when vectorizing a
3422 memory access in the inner-loop nested within the LOOP that is now being
3423 vectorized, while it is guaranteed that the misalignment of the
3424 vectorized memory access will remain the same in different outer-loop
3425 iterations, it is *not* guaranteed that is will remain the same throughout
3426 the execution of the inner-loop. This is because the inner-loop advances
3427 with the original scalar step (and not in steps of VS). If the inner-loop
3428 step happens to be a multiple of VS, then the misalignment remains fixed
3429 and we can use the optimized realignment scheme. For example:
3430
3431 for (i=0; i<N; i++)
3432 for (j=0; j<M; j++)
3433 s += a[i+j];
3434
3435 When vectorizing the i-loop in the above example, the step between
3436 consecutive vector loads is 1, and so the misalignment does not remain
3437 fixed across the execution of the inner-loop, and the realignment cannot
3438 be optimized (as illustrated in the following pseudo vectorized loop):
3439
3440 for (i=0; i<N; i+=4)
3441 for (j=0; j<M; j++){
3442 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3443 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3444 // (assuming that we start from an aligned address).
3445 }
3446
3447 We therefore have to use the unoptimized realignment scheme:
3448
3449 for (i=0; i<N; i+=4)
3450 for (j=k; j<M; j+=4)
3451 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3452 // that the misalignment of the initial address is
3453 // 0).
3454
3455 The loop can then be vectorized as follows:
3456
3457 for (k=0; k<4; k++){
3458 rt = get_realignment_token (&vp[k]);
3459 for (i=0; i<N; i+=4){
3460 v1 = vp[i+k];
3461 for (j=k; j<M; j+=4){
3462 v2 = vp[i+j+VS-1];
3463 va = REALIGN_LOAD <v1,v2,rt>;
3464 vs += va;
3465 v1 = v2;
3466 }
3467 }
3468 } */
3469
3470 if (DR_IS_READ (dr))
3471 {
c6b19c5f 3472 bool is_packed = false;
3473 tree type = (TREE_TYPE (DR_REF (dr)));
3474
48e1416a 3475 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
fb85abff 3476 CODE_FOR_nothing
3477 && (!targetm.vectorize.builtin_mask_for_load
3478 || targetm.vectorize.builtin_mask_for_load ()))
3479 {
3480 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3481 if (nested_in_vect_loop
3482 && (TREE_INT_CST_LOW (DR_STEP (dr))
3483 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3484 return dr_explicit_realign;
3485 else
3486 return dr_explicit_realign_optimized;
3487 }
c6b19c5f 3488 if (!known_alignment_for_access_p (dr))
3489 {
3490 tree ba = DR_BASE_OBJECT (dr);
48e1416a 3491
c6b19c5f 3492 if (ba)
3493 is_packed = contains_packed_reference (ba);
3494 }
48e1416a 3495
c6b19c5f 3496 if (targetm.vectorize.
3497 builtin_support_vector_misalignment (mode, type,
3498 DR_MISALIGNMENT (dr), is_packed))
fb85abff 3499 /* Can't software pipeline the loads, but can at least do them. */
3500 return dr_unaligned_supported;
3501 }
c6b19c5f 3502 else
3503 {
3504 bool is_packed = false;
3505 tree type = (TREE_TYPE (DR_REF (dr)));
fb85abff 3506
c6b19c5f 3507 if (!known_alignment_for_access_p (dr))
3508 {
3509 tree ba = DR_BASE_OBJECT (dr);
48e1416a 3510
c6b19c5f 3511 if (ba)
3512 is_packed = contains_packed_reference (ba);
3513 }
48e1416a 3514
c6b19c5f 3515 if (targetm.vectorize.
48e1416a 3516 builtin_support_vector_misalignment (mode, type,
c6b19c5f 3517 DR_MISALIGNMENT (dr), is_packed))
3518 return dr_unaligned_supported;
3519 }
48e1416a 3520
fb85abff 3521 /* Unsupported. */
3522 return dr_unaligned_unsupported;
3523}