]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-data-refs.c
2012-07-16 Dehao Chen <dehao@google.com>
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
CommitLineData
48e1416a 1/* Data References Analysis and Manipulation Utilities for Vectorization.
570e2049 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
7cf0dbf3 3 Free Software Foundation, Inc.
48e1416a 4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 5 and Ira Rosen <irar@il.ibm.com>
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
11Software Foundation; either version 3, or (at your option) any later
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
218a4f88 29#include "tm_p.h"
fb85abff 30#include "target.h"
31#include "basic-block.h"
ce084dfc 32#include "tree-pretty-print.h"
33#include "gimple-pretty-print.h"
fb85abff 34#include "tree-flow.h"
35#include "tree-dump.h"
36#include "cfgloop.h"
fb85abff 37#include "tree-chrec.h"
38#include "tree-scalar-evolution.h"
39#include "tree-vectorizer.h"
0b205f4c 40#include "diagnostic-core.h"
fb85abff 41
8e3cb73b 42/* Need to include rtl.h, expr.h, etc. for optabs. */
43#include "expr.h"
44#include "optabs.h"
fb85abff 45
94b7b4dd 46/* Return true if load- or store-lanes optab OPTAB is implemented for
47 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
48
49static bool
50vect_lanes_optab_supported_p (const char *name, convert_optab optab,
51 tree vectype, unsigned HOST_WIDE_INT count)
52{
53 enum machine_mode mode, array_mode;
54 bool limit_p;
55
56 mode = TYPE_MODE (vectype);
57 limit_p = !targetm.array_mode_supported_p (mode, count);
58 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
59 MODE_INT, limit_p);
60
61 if (array_mode == BLKmode)
62 {
63 if (vect_print_dump_info (REPORT_DETAILS))
64 fprintf (vect_dump, "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]",
65 GET_MODE_NAME (mode), count);
66 return false;
67 }
68
69 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
70 {
71 if (vect_print_dump_info (REPORT_DETAILS))
72 fprintf (vect_dump, "cannot use %s<%s><%s>",
73 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
74 return false;
75 }
76
77 if (vect_print_dump_info (REPORT_DETAILS))
78 fprintf (vect_dump, "can use %s<%s><%s>",
79 name, GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
80
81 return true;
82}
83
84
fb85abff 85/* Return the smallest scalar part of STMT.
282bf14c 86 This is used to determine the vectype of the stmt. We generally set the
87 vectype according to the type of the result (lhs). For stmts whose
fb85abff 88 result-type is different than the type of the arguments (e.g., demotion,
48e1416a 89 promotion), vectype will be reset appropriately (later). Note that we have
fb85abff 90 to visit the smallest datatype in this function, because that determines the
282bf14c 91 VF. If the smallest datatype in the loop is present only as the rhs of a
fb85abff 92 promotion operation - we'd miss it.
93 Such a case, where a variable of this datatype does not appear in the lhs
94 anywhere in the loop, can only occur if it's an invariant: e.g.:
48e1416a 95 'int_x = (int) short_inv', which we'd expect to have been optimized away by
282bf14c 96 invariant motion. However, we cannot rely on invariant motion to always
97 take invariants out of the loop, and so in the case of promotion we also
98 have to check the rhs.
fb85abff 99 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
100 types. */
101
102tree
103vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
104 HOST_WIDE_INT *rhs_size_unit)
105{
106 tree scalar_type = gimple_expr_type (stmt);
107 HOST_WIDE_INT lhs, rhs;
108
109 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
110
111 if (is_gimple_assign (stmt)
112 && (gimple_assign_cast_p (stmt)
113 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
1c928a0c 114 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
fb85abff 115 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
116 {
117 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
118
119 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
120 if (rhs < lhs)
121 scalar_type = rhs_type;
122 }
48e1416a 123
124 *lhs_size_unit = lhs;
fb85abff 125 *rhs_size_unit = rhs;
126 return scalar_type;
127}
128
129
130/* Find the place of the data-ref in STMT in the interleaving chain that starts
282bf14c 131 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
fb85abff 132
48e1416a 133int
fb85abff 134vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
135{
136 gimple next_stmt = first_stmt;
137 int result = 0;
138
21009880 139 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
fb85abff 140 return -1;
141
142 while (next_stmt && next_stmt != stmt)
143 {
144 result++;
21009880 145 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
fb85abff 146 }
147
148 if (next_stmt)
149 return result;
150 else
151 return -1;
152}
153
154
155/* Function vect_insert_into_interleaving_chain.
156
157 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
158
159static void
160vect_insert_into_interleaving_chain (struct data_reference *dra,
161 struct data_reference *drb)
162{
163 gimple prev, next;
164 tree next_init;
48e1416a 165 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
fb85abff 166 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
167
21009880 168 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
169 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
fb85abff 170 while (next)
171 {
172 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
173 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
174 {
175 /* Insert here. */
21009880 176 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
177 GROUP_NEXT_ELEMENT (stmtinfo_a) = next;
fb85abff 178 return;
179 }
180 prev = next;
21009880 181 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
fb85abff 182 }
183
184 /* We got to the end of the list. Insert here. */
21009880 185 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = DR_STMT (dra);
186 GROUP_NEXT_ELEMENT (stmtinfo_a) = NULL;
fb85abff 187}
188
189
190/* Function vect_update_interleaving_chain.
48e1416a 191
192 For two data-refs DRA and DRB that are a part of a chain interleaved data
282bf14c 193 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
fb85abff 194
195 There are four possible cases:
196 1. New stmts - both DRA and DRB are not a part of any chain:
197 FIRST_DR = DRB
198 NEXT_DR (DRB) = DRA
199 2. DRB is a part of a chain and DRA is not:
200 no need to update FIRST_DR
201 no need to insert DRB
202 insert DRA according to init
203 3. DRA is a part of a chain and DRB is not:
204 if (init of FIRST_DR > init of DRB)
205 FIRST_DR = DRB
206 NEXT(FIRST_DR) = previous FIRST_DR
207 else
208 insert DRB according to its init
209 4. both DRA and DRB are in some interleaving chains:
210 choose the chain with the smallest init of FIRST_DR
211 insert the nodes of the second chain into the first one. */
212
213static void
214vect_update_interleaving_chain (struct data_reference *drb,
215 struct data_reference *dra)
216{
48e1416a 217 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
fb85abff 218 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
219 tree next_init, init_dra_chain, init_drb_chain;
220 gimple first_a, first_b;
221 tree node_init;
222 gimple node, prev, next, first_stmt;
223
224 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
21009880 225 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
fb85abff 226 {
21009880 227 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (drb);
228 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
229 GROUP_NEXT_ELEMENT (stmtinfo_b) = DR_STMT (dra);
fb85abff 230 return;
231 }
232
233 /* 2. DRB is a part of a chain and DRA is not. */
21009880 234 if (!GROUP_FIRST_ELEMENT (stmtinfo_a) && GROUP_FIRST_ELEMENT (stmtinfo_b))
fb85abff 235 {
21009880 236 GROUP_FIRST_ELEMENT (stmtinfo_a) = GROUP_FIRST_ELEMENT (stmtinfo_b);
fb85abff 237 /* Insert DRA into the chain of DRB. */
238 vect_insert_into_interleaving_chain (dra, drb);
239 return;
240 }
241
48e1416a 242 /* 3. DRA is a part of a chain and DRB is not. */
21009880 243 if (GROUP_FIRST_ELEMENT (stmtinfo_a) && !GROUP_FIRST_ELEMENT (stmtinfo_b))
fb85abff 244 {
21009880 245 gimple old_first_stmt = GROUP_FIRST_ELEMENT (stmtinfo_a);
fb85abff 246 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
247 old_first_stmt)));
248 gimple tmp;
249
250 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
251 {
48e1416a 252 /* DRB's init is smaller than the init of the stmt previously marked
282bf14c 253 as the first stmt of the interleaving chain of DRA. Therefore, we
fb85abff 254 update FIRST_STMT and put DRB in the head of the list. */
21009880 255 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (drb);
256 GROUP_NEXT_ELEMENT (stmtinfo_b) = old_first_stmt;
48e1416a 257
fb85abff 258 /* Update all the stmts in the list to point to the new FIRST_STMT. */
259 tmp = old_first_stmt;
260 while (tmp)
261 {
21009880 262 GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) = DR_STMT (drb);
263 tmp = GROUP_NEXT_ELEMENT (vinfo_for_stmt (tmp));
fb85abff 264 }
265 }
266 else
267 {
268 /* Insert DRB in the list of DRA. */
269 vect_insert_into_interleaving_chain (drb, dra);
21009880 270 GROUP_FIRST_ELEMENT (stmtinfo_b) = GROUP_FIRST_ELEMENT (stmtinfo_a);
fb85abff 271 }
272 return;
273 }
48e1416a 274
fb85abff 275 /* 4. both DRA and DRB are in some interleaving chains. */
21009880 276 first_a = GROUP_FIRST_ELEMENT (stmtinfo_a);
277 first_b = GROUP_FIRST_ELEMENT (stmtinfo_b);
fb85abff 278 if (first_a == first_b)
279 return;
280 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
281 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
282
283 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
284 {
48e1416a 285 /* Insert the nodes of DRA chain into the DRB chain.
fb85abff 286 After inserting a node, continue from this node of the DRB chain (don't
287 start from the beginning. */
21009880 288 node = GROUP_FIRST_ELEMENT (stmtinfo_a);
289 prev = GROUP_FIRST_ELEMENT (stmtinfo_b);
fb85abff 290 first_stmt = first_b;
291 }
292 else
293 {
48e1416a 294 /* Insert the nodes of DRB chain into the DRA chain.
fb85abff 295 After inserting a node, continue from this node of the DRA chain (don't
296 start from the beginning. */
21009880 297 node = GROUP_FIRST_ELEMENT (stmtinfo_b);
298 prev = GROUP_FIRST_ELEMENT (stmtinfo_a);
fb85abff 299 first_stmt = first_a;
300 }
48e1416a 301
fb85abff 302 while (node)
303 {
304 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
21009880 305 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
fb85abff 306 while (next)
48e1416a 307 {
fb85abff 308 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
309 if (tree_int_cst_compare (next_init, node_init) > 0)
310 {
311 /* Insert here. */
21009880 312 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
313 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = next;
fb85abff 314 prev = node;
315 break;
316 }
317 prev = next;
21009880 318 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev));
fb85abff 319 }
320 if (!next)
321 {
322 /* We got to the end of the list. Insert here. */
21009880 323 GROUP_NEXT_ELEMENT (vinfo_for_stmt (prev)) = node;
324 GROUP_NEXT_ELEMENT (vinfo_for_stmt (node)) = NULL;
fb85abff 325 prev = node;
48e1416a 326 }
21009880 327 GROUP_FIRST_ELEMENT (vinfo_for_stmt (node)) = first_stmt;
328 node = GROUP_NEXT_ELEMENT (vinfo_for_stmt (node));
fb85abff 329 }
330}
331
282bf14c 332/* Check dependence between DRA and DRB for basic block vectorization.
333 If the accesses share same bases and offsets, we can compare their initial
334 constant offsets to decide whether they differ or not. In case of a read-
335 write dependence we check that the load is before the store to ensure that
336 vectorization will not change the order of the accesses. */
d4b21757 337
338static bool
339vect_drs_dependent_in_basic_block (struct data_reference *dra,
340 struct data_reference *drb)
341{
342 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
343 gimple earlier_stmt;
344
345 /* We only call this function for pairs of loads and stores, but we verify
346 it here. */
347 if (DR_IS_READ (dra) == DR_IS_READ (drb))
348 {
349 if (DR_IS_READ (dra))
350 return false;
351 else
352 return true;
353 }
354
282bf14c 355 /* Check that the data-refs have same bases and offsets. If not, we can't
d4b21757 356 determine if they are dependent. */
27b32feb 357 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
ec611e12 358 || !dr_equal_offsets_p (dra, drb))
d4b21757 359 return true;
360
361 /* Check the types. */
362 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
363 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
364
365 if (type_size_a != type_size_b
366 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
367 TREE_TYPE (DR_REF (drb))))
368 return true;
369
370 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
371 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
372
373 /* Two different locations - no dependence. */
374 if (init_a != init_b)
375 return false;
376
282bf14c 377 /* We have a read-write dependence. Check that the load is before the store.
d4b21757 378 When we vectorize basic blocks, vector load can be only before
379 corresponding scalar load, and vector store can be only after its
282bf14c 380 corresponding scalar store. So the order of the acceses is preserved in
d4b21757 381 case the load is before the store. */
382 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
383 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
384 return false;
385
386 return true;
387}
388
389
fb85abff 390/* Function vect_check_interleaving.
391
282bf14c 392 Check if DRA and DRB are a part of interleaving. In case they are, insert
fb85abff 393 DRA and DRB in an interleaving chain. */
394
48e1416a 395static bool
fb85abff 396vect_check_interleaving (struct data_reference *dra,
397 struct data_reference *drb)
398{
399 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
400
401 /* Check that the data-refs have same first location (except init) and they
402 are both either store or load (not load and store). */
27b32feb 403 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
ec611e12 404 || !dr_equal_offsets_p (dra, drb)
48e1416a 405 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
fb85abff 406 || DR_IS_READ (dra) != DR_IS_READ (drb))
f083cd24 407 return false;
fb85abff 408
409 /* Check:
410 1. data-refs are of the same type
411 2. their steps are equal
48e1416a 412 3. the step (if greater than zero) is greater than the difference between
f083cd24 413 data-refs' inits. */
fb85abff 414 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
415 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
416
417 if (type_size_a != type_size_b
418 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
48e1416a 419 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
fb85abff 420 TREE_TYPE (DR_REF (drb))))
f083cd24 421 return false;
fb85abff 422
423 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
424 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
425 step = TREE_INT_CST_LOW (DR_STEP (dra));
426
427 if (init_a > init_b)
428 {
48e1416a 429 /* If init_a == init_b + the size of the type * k, we have an interleaving,
fb85abff 430 and DRB is accessed before DRA. */
431 diff_mod_size = (init_a - init_b) % type_size_a;
432
37545e54 433 if (step && (init_a - init_b) > step)
48e1416a 434 return false;
fb85abff 435
436 if (diff_mod_size == 0)
437 {
48e1416a 438 vect_update_interleaving_chain (drb, dra);
fb85abff 439 if (vect_print_dump_info (REPORT_DR_DETAILS))
440 {
441 fprintf (vect_dump, "Detected interleaving ");
442 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
443 fprintf (vect_dump, " and ");
444 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
445 }
f083cd24 446 return true;
48e1416a 447 }
fb85abff 448 }
48e1416a 449 else
fb85abff 450 {
48e1416a 451 /* If init_b == init_a + the size of the type * k, we have an
fb85abff 452 interleaving, and DRA is accessed before DRB. */
453 diff_mod_size = (init_b - init_a) % type_size_a;
454
37545e54 455 if (step && (init_b - init_a) > step)
f083cd24 456 return false;
fb85abff 457
458 if (diff_mod_size == 0)
459 {
48e1416a 460 vect_update_interleaving_chain (dra, drb);
fb85abff 461 if (vect_print_dump_info (REPORT_DR_DETAILS))
462 {
463 fprintf (vect_dump, "Detected interleaving ");
464 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
465 fprintf (vect_dump, " and ");
466 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
467 }
f083cd24 468 return true;
48e1416a 469 }
fb85abff 470 }
48e1416a 471
f083cd24 472 return false;
fb85abff 473}
474
475/* Check if data references pointed by DR_I and DR_J are same or
476 belong to same interleaving group. Return FALSE if drs are
477 different, otherwise return TRUE. */
478
479static bool
480vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
481{
482 gimple stmt_i = DR_STMT (dr_i);
483 gimple stmt_j = DR_STMT (dr_j);
484
485 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
21009880 486 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
487 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j))
488 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_i))
489 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_j)))))
fb85abff 490 return true;
491 else
492 return false;
493}
494
495/* If address ranges represented by DDR_I and DDR_J are equal,
496 return TRUE, otherwise return FALSE. */
497
498static bool
499vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
500{
501 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
502 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
503 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
504 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
505 return true;
506 else
507 return false;
508}
509
510/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
511 tested at run-time. Return TRUE if DDR was successfully inserted.
512 Return false if versioning is not supported. */
513
514static bool
515vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
516{
517 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
518
519 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
520 return false;
521
522 if (vect_print_dump_info (REPORT_DR_DETAILS))
523 {
524 fprintf (vect_dump, "mark for run-time aliasing test between ");
525 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
526 fprintf (vect_dump, " and ");
527 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
528 }
529
530 if (optimize_loop_nest_for_size_p (loop))
531 {
532 if (vect_print_dump_info (REPORT_DR_DETAILS))
533 fprintf (vect_dump, "versioning not supported when optimizing for size.");
534 return false;
535 }
536
537 /* FORNOW: We don't support versioning with outer-loop vectorization. */
538 if (loop->inner)
539 {
540 if (vect_print_dump_info (REPORT_DR_DETAILS))
541 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
542 return false;
543 }
544
f634c3e9 545 /* FORNOW: We don't support creating runtime alias tests for non-constant
546 step. */
547 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
548 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
549 {
550 if (vect_print_dump_info (REPORT_DR_DETAILS))
551 fprintf (vect_dump, "versioning not yet supported for non-constant "
552 "step");
553 return false;
554 }
555
fb85abff 556 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
557 return true;
558}
559
37545e54 560
fb85abff 561/* Function vect_analyze_data_ref_dependence.
562
563 Return TRUE if there (might) exist a dependence between a memory-reference
564 DRA and a memory-reference DRB. When versioning for alias may check a
91a74fc6 565 dependence at run-time, return FALSE. Adjust *MAX_VF according to
566 the data dependence. */
48e1416a 567
fb85abff 568static bool
569vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
80508571 570 loop_vec_info loop_vinfo, int *max_vf)
fb85abff 571{
572 unsigned int i;
37545e54 573 struct loop *loop = NULL;
fb85abff 574 struct data_reference *dra = DDR_A (ddr);
575 struct data_reference *drb = DDR_B (ddr);
48e1416a 576 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
fb85abff 577 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
fb85abff 578 lambda_vector dist_v;
579 unsigned int loop_depth;
48e1416a 580
6ea6a380 581 /* Don't bother to analyze statements marked as unvectorizable. */
582 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
583 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
584 return false;
585
fb85abff 586 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
587 {
588 /* Independent data accesses. */
589 vect_check_interleaving (dra, drb);
590 return false;
591 }
592
37545e54 593 if (loop_vinfo)
91a74fc6 594 loop = LOOP_VINFO_LOOP (loop_vinfo);
37545e54 595
596 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
fb85abff 597 return false;
48e1416a 598
fb85abff 599 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
600 {
80508571 601 gimple earlier_stmt;
602
48e1416a 603 if (loop_vinfo)
37545e54 604 {
605 if (vect_print_dump_info (REPORT_DR_DETAILS))
606 {
607 fprintf (vect_dump, "versioning for alias required: "
608 "can't determine dependence between ");
609 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610 fprintf (vect_dump, " and ");
611 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
612 }
48e1416a 613
37545e54 614 /* Add to list of ddrs that need to be tested at run-time. */
615 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
616 }
617
618 /* When vectorizing a basic block unknown depnedence can still mean
ee612634 619 grouped access. */
37545e54 620 if (vect_check_interleaving (dra, drb))
621 return false;
622
7d1099cb 623 /* Read-read is OK (we need this check here, after checking for
624 interleaving). */
625 if (DR_IS_READ (dra) && DR_IS_READ (drb))
626 return false;
627
fb85abff 628 if (vect_print_dump_info (REPORT_DR_DETAILS))
629 {
37545e54 630 fprintf (vect_dump, "can't determine dependence between ");
fb85abff 631 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
632 fprintf (vect_dump, " and ");
633 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
634 }
37545e54 635
d4b21757 636 /* We do not vectorize basic blocks with write-write dependencies. */
9ff25603 637 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
d4b21757 638 return true;
6ea6a380 639
80508571 640 /* Check that it's not a load-after-store dependence. */
641 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
642 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
643 return true;
644
6ea6a380 645 return false;
fb85abff 646 }
647
37545e54 648 /* Versioning for alias is not yet supported for basic block SLP, and
48e1416a 649 dependence distance is unapplicable, hence, in case of known data
37545e54 650 dependence, basic block vectorization is impossible for now. */
651 if (!loop_vinfo)
652 {
653 if (dra != drb && vect_check_interleaving (dra, drb))
654 return false;
48e1416a 655
37545e54 656 if (vect_print_dump_info (REPORT_DR_DETAILS))
657 {
658 fprintf (vect_dump, "determined dependence between ");
659 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
660 fprintf (vect_dump, " and ");
661 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
662 }
663
d4b21757 664 /* Do not vectorize basic blcoks with write-write dependences. */
9ff25603 665 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
d4b21757 666 return true;
667
668 /* Check if this dependence is allowed in basic block vectorization. */
669 return vect_drs_dependent_in_basic_block (dra, drb);
37545e54 670 }
671
672 /* Loop-based vectorization and known data dependence. */
fb85abff 673 if (DDR_NUM_DIST_VECTS (ddr) == 0)
674 {
675 if (vect_print_dump_info (REPORT_DR_DETAILS))
676 {
677 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
678 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
679 fprintf (vect_dump, " and ");
680 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
681 }
682 /* Add to list of ddrs that need to be tested at run-time. */
683 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
48e1416a 684 }
fb85abff 685
686 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
48148244 687 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
fb85abff 688 {
689 int dist = dist_v[loop_depth];
690
691 if (vect_print_dump_info (REPORT_DR_DETAILS))
692 fprintf (vect_dump, "dependence distance = %d.", dist);
693
91a74fc6 694 if (dist == 0)
fb85abff 695 {
fb85abff 696 if (vect_print_dump_info (REPORT_DR_DETAILS))
697 {
91a74fc6 698 fprintf (vect_dump, "dependence distance == 0 between ");
fb85abff 699 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
700 fprintf (vect_dump, " and ");
701 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
702 }
703
704 /* For interleaving, mark that there is a read-write dependency if
48e1416a 705 necessary. We check before that one of the data-refs is store. */
fb85abff 706 if (DR_IS_READ (dra))
21009880 707 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
fb85abff 708 else
709 {
710 if (DR_IS_READ (drb))
21009880 711 GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
fb85abff 712 }
48e1416a 713
91a74fc6 714 continue;
715 }
716
717 if (dist > 0 && DDR_REVERSED_P (ddr))
718 {
719 /* If DDR_REVERSED_P the order of the data-refs in DDR was
720 reversed (to make distance vector positive), and the actual
721 distance is negative. */
722 if (vect_print_dump_info (REPORT_DR_DETAILS))
723 fprintf (vect_dump, "dependence distance negative.");
724 continue;
725 }
726
727 if (abs (dist) >= 2
728 && abs (dist) < *max_vf)
729 {
730 /* The dependence distance requires reduction of the maximal
731 vectorization factor. */
732 *max_vf = abs (dist);
733 if (vect_print_dump_info (REPORT_DR_DETAILS))
734 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
735 *max_vf);
fb85abff 736 }
737
91a74fc6 738 if (abs (dist) >= *max_vf)
fb85abff 739 {
48e1416a 740 /* Dependence distance does not create dependence, as far as
91a74fc6 741 vectorization is concerned, in this case. */
fb85abff 742 if (vect_print_dump_info (REPORT_DR_DETAILS))
91a74fc6 743 fprintf (vect_dump, "dependence distance >= VF.");
fb85abff 744 continue;
745 }
746
f083cd24 747 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 748 {
37545e54 749 fprintf (vect_dump, "not vectorized, possible dependence "
750 "between data-refs ");
fb85abff 751 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
752 fprintf (vect_dump, " and ");
753 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
754 }
755
756 return true;
757 }
758
759 return false;
760}
761
762/* Function vect_analyze_data_ref_dependences.
48e1416a 763
fb85abff 764 Examine all the data references in the loop, and make sure there do not
91a74fc6 765 exist any data dependences between them. Set *MAX_VF according to
766 the maximum vectorization factor the data dependences allow. */
48e1416a 767
fb85abff 768bool
48e1416a 769vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
80508571 770 bb_vec_info bb_vinfo, int *max_vf)
fb85abff 771{
772 unsigned int i;
37545e54 773 VEC (ddr_p, heap) *ddrs = NULL;
fb85abff 774 struct data_dependence_relation *ddr;
775
48e1416a 776 if (vect_print_dump_info (REPORT_DETAILS))
fb85abff 777 fprintf (vect_dump, "=== vect_analyze_dependences ===");
48e1416a 778
37545e54 779 if (loop_vinfo)
780 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
781 else
782 ddrs = BB_VINFO_DDRS (bb_vinfo);
48e1416a 783
48148244 784 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
80508571 785 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
fb85abff 786 return false;
787
788 return true;
789}
790
791
792/* Function vect_compute_data_ref_alignment
793
794 Compute the misalignment of the data reference DR.
795
796 Output:
797 1. If during the misalignment computation it is found that the data reference
798 cannot be vectorized then false is returned.
799 2. DR_MISALIGNMENT (DR) is defined.
800
801 FOR NOW: No analysis is actually performed. Misalignment is calculated
802 only for trivial cases. TODO. */
803
804static bool
805vect_compute_data_ref_alignment (struct data_reference *dr)
806{
807 gimple stmt = DR_STMT (dr);
48e1416a 808 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
fb85abff 809 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 810 struct loop *loop = NULL;
fb85abff 811 tree ref = DR_REF (dr);
812 tree vectype;
813 tree base, base_addr;
814 bool base_aligned;
815 tree misalign;
816 tree aligned_to, alignment;
48e1416a 817
fb85abff 818 if (vect_print_dump_info (REPORT_DETAILS))
819 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
820
37545e54 821 if (loop_vinfo)
822 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 823
fb85abff 824 /* Initialize misalignment to unknown. */
825 SET_DR_MISALIGNMENT (dr, -1);
826
a72a38f4 827 /* Strided loads perform only component accesses, misalignment information
828 is irrelevant for them. */
829 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
830 return true;
831
fb85abff 832 misalign = DR_INIT (dr);
833 aligned_to = DR_ALIGNED_TO (dr);
834 base_addr = DR_BASE_ADDRESS (dr);
835 vectype = STMT_VINFO_VECTYPE (stmt_info);
836
837 /* In case the dataref is in an inner-loop of the loop that is being
838 vectorized (LOOP), we use the base and misalignment information
282bf14c 839 relative to the outer-loop (LOOP). This is ok only if the misalignment
fb85abff 840 stays the same throughout the execution of the inner-loop, which is why
841 we have to check that the stride of the dataref in the inner-loop evenly
842 divides by the vector size. */
37545e54 843 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 844 {
845 tree step = DR_STEP (dr);
846 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
48e1416a 847
fb85abff 848 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
849 {
850 if (vect_print_dump_info (REPORT_ALIGNMENT))
851 fprintf (vect_dump, "inner step divides the vector-size.");
852 misalign = STMT_VINFO_DR_INIT (stmt_info);
853 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
854 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
855 }
856 else
857 {
858 if (vect_print_dump_info (REPORT_ALIGNMENT))
859 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
860 misalign = NULL_TREE;
861 }
862 }
863
38682b67 864 /* Similarly, if we're doing basic-block vectorization, we can only use
865 base and misalignment information relative to an innermost loop if the
866 misalignment stays the same throughout the execution of the loop.
867 As above, this is the case if the stride of the dataref evenly divides
868 by the vector size. */
869 if (!loop)
870 {
871 tree step = DR_STEP (dr);
872 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
873
874 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
875 {
876 if (vect_print_dump_info (REPORT_ALIGNMENT))
877 fprintf (vect_dump, "SLP: step doesn't divide the vector-size.");
878 misalign = NULL_TREE;
879 }
880 }
881
fb85abff 882 base = build_fold_indirect_ref (base_addr);
883 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
884
885 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
886 || !misalign)
887 {
888 if (vect_print_dump_info (REPORT_ALIGNMENT))
889 {
890 fprintf (vect_dump, "Unknown alignment for access: ");
891 print_generic_expr (vect_dump, base, TDF_SLIM);
892 }
893 return true;
894 }
895
48e1416a 896 if ((DECL_P (base)
fb85abff 897 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
898 alignment) >= 0)
899 || (TREE_CODE (base_addr) == SSA_NAME
900 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
901 TREE_TYPE (base_addr)))),
607ce623 902 alignment) >= 0)
957d0361 903 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
fb85abff 904 base_aligned = true;
905 else
48e1416a 906 base_aligned = false;
fb85abff 907
48e1416a 908 if (!base_aligned)
fb85abff 909 {
08f83b65 910 /* Do not change the alignment of global variables here if
911 flag_section_anchors is enabled as we already generated
912 RTL for other functions. Most global variables should
913 have been aligned during the IPA increase_alignment pass. */
914 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
915 || (TREE_STATIC (base) && flag_section_anchors))
fb85abff 916 {
917 if (vect_print_dump_info (REPORT_DETAILS))
918 {
919 fprintf (vect_dump, "can't force alignment of ref: ");
920 print_generic_expr (vect_dump, ref, TDF_SLIM);
921 }
922 return true;
923 }
48e1416a 924
fb85abff 925 /* Force the alignment of the decl.
926 NOTE: This is the only change to the code we make during
927 the analysis phase, before deciding to vectorize the loop. */
928 if (vect_print_dump_info (REPORT_DETAILS))
0822b158 929 {
930 fprintf (vect_dump, "force alignment of ");
931 print_generic_expr (vect_dump, ref, TDF_SLIM);
932 }
933
fb85abff 934 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
935 DECL_USER_ALIGN (base) = 1;
936 }
937
938 /* At this point we assume that the base is aligned. */
939 gcc_assert (base_aligned
48e1416a 940 || (TREE_CODE (base) == VAR_DECL
fb85abff 941 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
942
85a846a2 943 /* If this is a backward running DR then first access in the larger
944 vectype actually is N-1 elements before the address in the DR.
945 Adjust misalign accordingly. */
946 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
947 {
948 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
949 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
950 otherwise we wouldn't be here. */
951 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
952 /* PLUS because DR_STEP was negative. */
953 misalign = size_binop (PLUS_EXPR, misalign, offset);
954 }
955
fb85abff 956 /* Modulo alignment. */
957 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
958
959 if (!host_integerp (misalign, 1))
960 {
961 /* Negative or overflowed misalignment value. */
962 if (vect_print_dump_info (REPORT_DETAILS))
963 fprintf (vect_dump, "unexpected misalign value");
964 return false;
965 }
966
967 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
968
969 if (vect_print_dump_info (REPORT_DETAILS))
970 {
971 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
972 print_generic_expr (vect_dump, ref, TDF_SLIM);
973 }
974
975 return true;
976}
977
978
979/* Function vect_compute_data_refs_alignment
980
981 Compute the misalignment of data references in the loop.
982 Return FALSE if a data reference is found that cannot be vectorized. */
983
984static bool
48e1416a 985vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
37545e54 986 bb_vec_info bb_vinfo)
fb85abff 987{
37545e54 988 VEC (data_reference_p, heap) *datarefs;
fb85abff 989 struct data_reference *dr;
990 unsigned int i;
991
37545e54 992 if (loop_vinfo)
993 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
994 else
995 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
48e1416a 996
48148244 997 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
6ea6a380 998 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
999 && !vect_compute_data_ref_alignment (dr))
1000 {
1001 if (bb_vinfo)
1002 {
1003 /* Mark unsupported statement as unvectorizable. */
1004 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1005 continue;
1006 }
1007 else
1008 return false;
1009 }
fb85abff 1010
1011 return true;
1012}
1013
1014
1015/* Function vect_update_misalignment_for_peel
1016
1017 DR - the data reference whose misalignment is to be adjusted.
1018 DR_PEEL - the data reference whose misalignment is being made
1019 zero in the vector loop by the peel.
1020 NPEEL - the number of iterations in the peel loop if the misalignment
1021 of DR_PEEL is known at compile time. */
1022
1023static void
1024vect_update_misalignment_for_peel (struct data_reference *dr,
1025 struct data_reference *dr_peel, int npeel)
1026{
1027 unsigned int i;
1028 VEC(dr_p,heap) *same_align_drs;
1029 struct data_reference *current_dr;
1030 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
1031 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
1032 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
1033 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
1034
1035 /* For interleaved data accesses the step in the loop must be multiplied by
1036 the size of the interleaving group. */
ee612634 1037 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
21009880 1038 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
ee612634 1039 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
21009880 1040 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
fb85abff 1041
1042 /* It can be assumed that the data refs with the same alignment as dr_peel
1043 are aligned in the vector loop. */
1044 same_align_drs
1045 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
48148244 1046 FOR_EACH_VEC_ELT (dr_p, same_align_drs, i, current_dr)
fb85abff 1047 {
1048 if (current_dr != dr)
1049 continue;
1050 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
1051 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
1052 SET_DR_MISALIGNMENT (dr, 0);
1053 return;
1054 }
1055
1056 if (known_alignment_for_access_p (dr)
1057 && known_alignment_for_access_p (dr_peel))
1058 {
f1b8c740 1059 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
fb85abff 1060 int misal = DR_MISALIGNMENT (dr);
1061 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
f1b8c740 1062 misal += negative ? -npeel * dr_size : npeel * dr_size;
1063 misal &= GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
fb85abff 1064 SET_DR_MISALIGNMENT (dr, misal);
1065 return;
1066 }
1067
1068 if (vect_print_dump_info (REPORT_DETAILS))
1069 fprintf (vect_dump, "Setting misalignment to -1.");
1070 SET_DR_MISALIGNMENT (dr, -1);
1071}
1072
1073
1074/* Function vect_verify_datarefs_alignment
1075
1076 Return TRUE if all data references in the loop can be
1077 handled with respect to alignment. */
1078
37545e54 1079bool
1080vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 1081{
37545e54 1082 VEC (data_reference_p, heap) *datarefs;
fb85abff 1083 struct data_reference *dr;
1084 enum dr_alignment_support supportable_dr_alignment;
1085 unsigned int i;
1086
37545e54 1087 if (loop_vinfo)
1088 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1089 else
1090 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1091
48148244 1092 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
fb85abff 1093 {
1094 gimple stmt = DR_STMT (dr);
1095 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1096
1ad41595 1097 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1098 continue;
1099
6ea6a380 1100 /* For interleaving, only the alignment of the first access matters.
1101 Skip statements marked as not vectorizable. */
ee612634 1102 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 1103 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
6ea6a380 1104 || !STMT_VINFO_VECTORIZABLE (stmt_info))
fb85abff 1105 continue;
1106
506aa8fc 1107 /* Strided loads perform only component accesses, alignment is
1108 irrelevant for them. */
1109 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1110 continue;
1111
0822b158 1112 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
fb85abff 1113 if (!supportable_dr_alignment)
1114 {
f083cd24 1115 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 1116 {
1117 if (DR_IS_READ (dr))
48e1416a 1118 fprintf (vect_dump,
fb85abff 1119 "not vectorized: unsupported unaligned load.");
1120 else
48e1416a 1121 fprintf (vect_dump,
fb85abff 1122 "not vectorized: unsupported unaligned store.");
6ea6a380 1123
1124 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
fb85abff 1125 }
1126 return false;
1127 }
1128 if (supportable_dr_alignment != dr_aligned
1129 && vect_print_dump_info (REPORT_ALIGNMENT))
1130 fprintf (vect_dump, "Vectorizing an unaligned access.");
1131 }
1132 return true;
1133}
1134
1135
1136/* Function vector_alignment_reachable_p
1137
1138 Return true if vector alignment for DR is reachable by peeling
1139 a few loop iterations. Return false otherwise. */
1140
1141static bool
1142vector_alignment_reachable_p (struct data_reference *dr)
1143{
1144 gimple stmt = DR_STMT (dr);
1145 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1146 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1147
ee612634 1148 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
fb85abff 1149 {
1150 /* For interleaved access we peel only if number of iterations in
1151 the prolog loop ({VF - misalignment}), is a multiple of the
1152 number of the interleaved accesses. */
1153 int elem_size, mis_in_elements;
1154 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1155
1156 /* FORNOW: handle only known alignment. */
1157 if (!known_alignment_for_access_p (dr))
1158 return false;
1159
1160 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1161 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1162
21009880 1163 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
fb85abff 1164 return false;
1165 }
1166
1167 /* If misalignment is known at the compile time then allow peeling
1168 only if natural alignment is reachable through peeling. */
1169 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1170 {
48e1416a 1171 HOST_WIDE_INT elmsize =
fb85abff 1172 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1173 if (vect_print_dump_info (REPORT_DETAILS))
1174 {
1175 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1176 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1177 }
1178 if (DR_MISALIGNMENT (dr) % elmsize)
1179 {
1180 if (vect_print_dump_info (REPORT_DETAILS))
1181 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1182 return false;
1183 }
1184 }
1185
1186 if (!known_alignment_for_access_p (dr))
1187 {
1188 tree type = (TREE_TYPE (DR_REF (dr)));
95539e1d 1189 bool is_packed = contains_packed_reference (DR_REF (dr));
fb85abff 1190
570e2049 1191 if (compare_tree_int (TYPE_SIZE (type), TYPE_ALIGN (type)) > 0)
1192 is_packed = true;
1193
fb85abff 1194 if (vect_print_dump_info (REPORT_DETAILS))
1195 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1196 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1197 return true;
1198 else
1199 return false;
1200 }
1201
1202 return true;
1203}
1204
0822b158 1205
1206/* Calculate the cost of the memory access represented by DR. */
1207
4db2b577 1208static stmt_vector_for_cost
0822b158 1209vect_get_data_access_cost (struct data_reference *dr,
1210 unsigned int *inside_cost,
1211 unsigned int *outside_cost)
1212{
1213 gimple stmt = DR_STMT (dr);
1214 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1215 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1216 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1217 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1218 int ncopies = vf / nunits;
4db2b577 1219 stmt_vector_for_cost stmt_cost_vec = VEC_alloc (stmt_info_for_cost, heap, 2);
0822b158 1220
1ad41595 1221 if (DR_IS_READ (dr))
4db2b577 1222 vect_get_load_cost (dr, ncopies, true, inside_cost,
1223 outside_cost, &stmt_cost_vec);
0822b158 1224 else
4db2b577 1225 vect_get_store_cost (dr, ncopies, inside_cost, &stmt_cost_vec);
0822b158 1226
1227 if (vect_print_dump_info (REPORT_COST))
1228 fprintf (vect_dump, "vect_get_data_access_cost: inside_cost = %d, "
1229 "outside_cost = %d.", *inside_cost, *outside_cost);
4db2b577 1230
1231 return stmt_cost_vec;
0822b158 1232}
1233
1234
1235static hashval_t
1236vect_peeling_hash (const void *elem)
1237{
1238 const struct _vect_peel_info *peel_info;
1239
1240 peel_info = (const struct _vect_peel_info *) elem;
1241 return (hashval_t) peel_info->npeel;
1242}
1243
1244
1245static int
1246vect_peeling_hash_eq (const void *elem1, const void *elem2)
1247{
1248 const struct _vect_peel_info *a, *b;
1249
1250 a = (const struct _vect_peel_info *) elem1;
1251 b = (const struct _vect_peel_info *) elem2;
1252 return (a->npeel == b->npeel);
1253}
1254
1255
1256/* Insert DR into peeling hash table with NPEEL as key. */
1257
1258static void
1259vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1260 int npeel)
1261{
1262 struct _vect_peel_info elem, *slot;
1263 void **new_slot;
1264 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1265
1266 elem.npeel = npeel;
1267 slot = (vect_peel_info) htab_find (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1268 &elem);
1269 if (slot)
1270 slot->count++;
1271 else
1272 {
1273 slot = XNEW (struct _vect_peel_info);
1274 slot->npeel = npeel;
1275 slot->dr = dr;
1276 slot->count = 1;
1277 new_slot = htab_find_slot (LOOP_VINFO_PEELING_HTAB (loop_vinfo), slot,
1278 INSERT);
1279 *new_slot = slot;
1280 }
1281
1282 if (!supportable_dr_alignment && !flag_vect_cost_model)
1283 slot->count += VECT_MAX_COST;
1284}
1285
1286
1287/* Traverse peeling hash table to find peeling option that aligns maximum
1288 number of data accesses. */
1289
1290static int
1291vect_peeling_hash_get_most_frequent (void **slot, void *data)
1292{
1293 vect_peel_info elem = (vect_peel_info) *slot;
1294 vect_peel_extended_info max = (vect_peel_extended_info) data;
1295
593fa4d1 1296 if (elem->count > max->peel_info.count
1297 || (elem->count == max->peel_info.count
1298 && max->peel_info.npeel > elem->npeel))
0822b158 1299 {
1300 max->peel_info.npeel = elem->npeel;
1301 max->peel_info.count = elem->count;
1302 max->peel_info.dr = elem->dr;
1303 }
1304
1305 return 1;
1306}
1307
1308
282bf14c 1309/* Traverse peeling hash table and calculate cost for each peeling option.
1310 Find the one with the lowest cost. */
0822b158 1311
1312static int
1313vect_peeling_hash_get_lowest_cost (void **slot, void *data)
1314{
1315 vect_peel_info elem = (vect_peel_info) *slot;
1316 vect_peel_extended_info min = (vect_peel_extended_info) data;
1317 int save_misalignment, dummy;
1318 unsigned int inside_cost = 0, outside_cost = 0, i;
1319 gimple stmt = DR_STMT (elem->dr);
1320 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1321 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1322 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1323 struct data_reference *dr;
4db2b577 1324 stmt_vector_for_cost stmt_cost_vec = NULL;
0822b158 1325
48148244 1326 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
0822b158 1327 {
1328 stmt = DR_STMT (dr);
1329 stmt_info = vinfo_for_stmt (stmt);
1330 /* For interleaving, only the alignment of the first access
1331 matters. */
ee612634 1332 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 1333 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
0822b158 1334 continue;
1335
1336 save_misalignment = DR_MISALIGNMENT (dr);
1337 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
4db2b577 1338 stmt_cost_vec = vect_get_data_access_cost (dr, &inside_cost,
1339 &outside_cost);
0822b158 1340 SET_DR_MISALIGNMENT (dr, save_misalignment);
1341 }
1342
1343 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel, &dummy,
f4ac3f3e 1344 vect_get_single_scalar_iteration_cost (loop_vinfo));
0822b158 1345
1346 if (inside_cost < min->inside_cost
1347 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1348 {
1349 min->inside_cost = inside_cost;
1350 min->outside_cost = outside_cost;
4db2b577 1351 min->stmt_cost_vec = stmt_cost_vec;
0822b158 1352 min->peel_info.dr = elem->dr;
1353 min->peel_info.npeel = elem->npeel;
1354 }
1355
1356 return 1;
1357}
1358
1359
1360/* Choose best peeling option by traversing peeling hash table and either
1361 choosing an option with the lowest cost (if cost model is enabled) or the
1362 option that aligns as many accesses as possible. */
1363
1364static struct data_reference *
1365vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
4db2b577 1366 unsigned int *npeel,
1367 stmt_vector_for_cost *stmt_cost_vec)
0822b158 1368{
1369 struct _vect_peel_extended_info res;
1370
1371 res.peel_info.dr = NULL;
4db2b577 1372 res.stmt_cost_vec = NULL;
0822b158 1373
1374 if (flag_vect_cost_model)
1375 {
1376 res.inside_cost = INT_MAX;
1377 res.outside_cost = INT_MAX;
1378 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1379 vect_peeling_hash_get_lowest_cost, &res);
1380 }
1381 else
1382 {
1383 res.peel_info.count = 0;
1384 htab_traverse (LOOP_VINFO_PEELING_HTAB (loop_vinfo),
1385 vect_peeling_hash_get_most_frequent, &res);
1386 }
1387
1388 *npeel = res.peel_info.npeel;
4db2b577 1389 *stmt_cost_vec = res.stmt_cost_vec;
0822b158 1390 return res.peel_info.dr;
1391}
1392
1393
fb85abff 1394/* Function vect_enhance_data_refs_alignment
1395
1396 This pass will use loop versioning and loop peeling in order to enhance
1397 the alignment of data references in the loop.
1398
1399 FOR NOW: we assume that whatever versioning/peeling takes place, only the
282bf14c 1400 original loop is to be vectorized. Any other loops that are created by
fb85abff 1401 the transformations performed in this pass - are not supposed to be
282bf14c 1402 vectorized. This restriction will be relaxed.
fb85abff 1403
1404 This pass will require a cost model to guide it whether to apply peeling
282bf14c 1405 or versioning or a combination of the two. For example, the scheme that
fb85abff 1406 intel uses when given a loop with several memory accesses, is as follows:
1407 choose one memory access ('p') which alignment you want to force by doing
282bf14c 1408 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
fb85abff 1409 other accesses are not necessarily aligned, or (2) use loop versioning to
1410 generate one loop in which all accesses are aligned, and another loop in
1411 which only 'p' is necessarily aligned.
1412
1413 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1414 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1415 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1416
282bf14c 1417 Devising a cost model is the most critical aspect of this work. It will
fb85abff 1418 guide us on which access to peel for, whether to use loop versioning, how
282bf14c 1419 many versions to create, etc. The cost model will probably consist of
fb85abff 1420 generic considerations as well as target specific considerations (on
1421 powerpc for example, misaligned stores are more painful than misaligned
1422 loads).
1423
1424 Here are the general steps involved in alignment enhancements:
1425
1426 -- original loop, before alignment analysis:
1427 for (i=0; i<N; i++){
1428 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1429 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1430 }
1431
1432 -- After vect_compute_data_refs_alignment:
1433 for (i=0; i<N; i++){
1434 x = q[i]; # DR_MISALIGNMENT(q) = 3
1435 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1436 }
1437
1438 -- Possibility 1: we do loop versioning:
1439 if (p is aligned) {
1440 for (i=0; i<N; i++){ # loop 1A
1441 x = q[i]; # DR_MISALIGNMENT(q) = 3
1442 p[i] = y; # DR_MISALIGNMENT(p) = 0
1443 }
1444 }
1445 else {
1446 for (i=0; i<N; i++){ # loop 1B
1447 x = q[i]; # DR_MISALIGNMENT(q) = 3
1448 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1449 }
1450 }
1451
1452 -- Possibility 2: we do loop peeling:
1453 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1454 x = q[i];
1455 p[i] = y;
1456 }
1457 for (i = 3; i < N; i++){ # loop 2A
1458 x = q[i]; # DR_MISALIGNMENT(q) = 0
1459 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1460 }
1461
1462 -- Possibility 3: combination of loop peeling and versioning:
1463 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1464 x = q[i];
1465 p[i] = y;
1466 }
1467 if (p is aligned) {
1468 for (i = 3; i<N; i++){ # loop 3A
1469 x = q[i]; # DR_MISALIGNMENT(q) = 0
1470 p[i] = y; # DR_MISALIGNMENT(p) = 0
1471 }
1472 }
1473 else {
1474 for (i = 3; i<N; i++){ # loop 3B
1475 x = q[i]; # DR_MISALIGNMENT(q) = 0
1476 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1477 }
1478 }
1479
282bf14c 1480 These loops are later passed to loop_transform to be vectorized. The
fb85abff 1481 vectorizer will use the alignment information to guide the transformation
1482 (whether to generate regular loads/stores, or with special handling for
1483 misalignment). */
1484
1485bool
1486vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1487{
1488 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1489 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1490 enum dr_alignment_support supportable_dr_alignment;
0822b158 1491 struct data_reference *dr0 = NULL, *first_store = NULL;
fb85abff 1492 struct data_reference *dr;
0822b158 1493 unsigned int i, j;
fb85abff 1494 bool do_peeling = false;
1495 bool do_versioning = false;
1496 bool stat;
1497 gimple stmt;
1498 stmt_vec_info stmt_info;
1499 int vect_versioning_for_alias_required;
0822b158 1500 unsigned int npeel = 0;
1501 bool all_misalignments_unknown = true;
1502 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1503 unsigned possible_npeel_number = 1;
1504 tree vectype;
1505 unsigned int nelements, mis, same_align_drs_max = 0;
4db2b577 1506 stmt_vector_for_cost stmt_cost_vec = NULL;
fb85abff 1507
1508 if (vect_print_dump_info (REPORT_DETAILS))
1509 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1510
1511 /* While cost model enhancements are expected in the future, the high level
1512 view of the code at this time is as follows:
1513
ec2886ed 1514 A) If there is a misaligned access then see if peeling to align
1515 this access can make all data references satisfy
454f25be 1516 vect_supportable_dr_alignment. If so, update data structures
1517 as needed and return true.
fb85abff 1518
1519 B) If peeling wasn't possible and there is a data reference with an
1520 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1521 then see if loop versioning checks can be used to make all data
1522 references satisfy vect_supportable_dr_alignment. If so, update
1523 data structures as needed and return true.
1524
1525 C) If neither peeling nor versioning were successful then return false if
1526 any data reference does not satisfy vect_supportable_dr_alignment.
1527
1528 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1529
1530 Note, Possibility 3 above (which is peeling and versioning together) is not
1531 being done at this time. */
1532
1533 /* (1) Peeling to force alignment. */
1534
1535 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1536 Considerations:
1537 + How many accesses will become aligned due to the peeling
1538 - How many accesses will become unaligned due to the peeling,
1539 and the cost of misaligned accesses.
48e1416a 1540 - The cost of peeling (the extra runtime checks, the increase
0822b158 1541 in code size). */
fb85abff 1542
48148244 1543 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
fb85abff 1544 {
1545 stmt = DR_STMT (dr);
1546 stmt_info = vinfo_for_stmt (stmt);
1547
1ad41595 1548 if (!STMT_VINFO_RELEVANT_P (stmt_info))
b04940e7 1549 continue;
1550
fb85abff 1551 /* For interleaving, only the alignment of the first access
1552 matters. */
ee612634 1553 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 1554 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
fb85abff 1555 continue;
1556
8faf5d30 1557 /* FORNOW: Any strided load prevents peeling. The induction
1558 variable analysis will fail when the prologue loop is generated,
1559 and so we can't generate the new base for the pointer. */
1560 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1561 {
1562 if (vect_print_dump_info (REPORT_DETAILS))
1563 fprintf (vect_dump, "strided load prevents peeling");
1564 do_peeling = false;
1565 break;
1566 }
1567
b04940e7 1568 /* For invariant accesses there is nothing to enhance. */
1569 if (integer_zerop (DR_STEP (dr)))
1570 continue;
1571
f634c3e9 1572 /* Strided loads perform only component accesses, alignment is
1573 irrelevant for them. */
1574 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1575 continue;
1576
0822b158 1577 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1578 do_peeling = vector_alignment_reachable_p (dr);
1579 if (do_peeling)
fb85abff 1580 {
0822b158 1581 if (known_alignment_for_access_p (dr))
1582 {
1583 unsigned int npeel_tmp;
f1b8c740 1584 bool negative = tree_int_cst_compare (DR_STEP (dr),
1585 size_zero_node) < 0;
0822b158 1586
1587 /* Save info about DR in the hash table. */
1588 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo))
1589 LOOP_VINFO_PEELING_HTAB (loop_vinfo) =
1590 htab_create (1, vect_peeling_hash,
1591 vect_peeling_hash_eq, free);
1592
1593 vectype = STMT_VINFO_VECTYPE (stmt_info);
1594 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1595 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1596 TREE_TYPE (DR_REF (dr))));
f1b8c740 1597 npeel_tmp = (negative
184a9a0f 1598 ? (mis - nelements) : (nelements - mis))
1599 & (nelements - 1);
0822b158 1600
1601 /* For multiple types, it is possible that the bigger type access
282bf14c 1602 will have more than one peeling option. E.g., a loop with two
0822b158 1603 types: one of size (vector size / 4), and the other one of
282bf14c 1604 size (vector size / 8). Vectorization factor will 8. If both
0822b158 1605 access are misaligned by 3, the first one needs one scalar
282bf14c 1606 iteration to be aligned, and the second one needs 5. But the
0822b158 1607 the first one will be aligned also by peeling 5 scalar
1608 iterations, and in that case both accesses will be aligned.
1609 Hence, except for the immediate peeling amount, we also want
1610 to try to add full vector size, while we don't exceed
1611 vectorization factor.
1612 We do this automtically for cost model, since we calculate cost
1613 for every peeling option. */
1614 if (!flag_vect_cost_model)
1615 possible_npeel_number = vf /nelements;
1616
1617 /* Handle the aligned case. We may decide to align some other
1618 access, making DR unaligned. */
1619 if (DR_MISALIGNMENT (dr) == 0)
1620 {
1621 npeel_tmp = 0;
1622 if (!flag_vect_cost_model)
1623 possible_npeel_number++;
1624 }
1625
1626 for (j = 0; j < possible_npeel_number; j++)
1627 {
1628 gcc_assert (npeel_tmp <= vf);
1629 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1630 npeel_tmp += nelements;
1631 }
1632
1633 all_misalignments_unknown = false;
1634 /* Data-ref that was chosen for the case that all the
1635 misalignments are unknown is not relevant anymore, since we
1636 have a data-ref with known alignment. */
1637 dr0 = NULL;
1638 }
1639 else
1640 {
1641 /* If we don't know all the misalignment values, we prefer
1642 peeling for data-ref that has maximum number of data-refs
1643 with the same alignment, unless the target prefers to align
1644 stores over load. */
1645 if (all_misalignments_unknown)
1646 {
1647 if (same_align_drs_max < VEC_length (dr_p,
1648 STMT_VINFO_SAME_ALIGN_REFS (stmt_info))
1649 || !dr0)
1650 {
1651 same_align_drs_max = VEC_length (dr_p,
1652 STMT_VINFO_SAME_ALIGN_REFS (stmt_info));
1653 dr0 = dr;
1654 }
1655
9ff25603 1656 if (!first_store && DR_IS_WRITE (dr))
0822b158 1657 first_store = dr;
1658 }
1659
1660 /* If there are both known and unknown misaligned accesses in the
1661 loop, we choose peeling amount according to the known
1662 accesses. */
1663
1664
1665 if (!supportable_dr_alignment)
1666 {
1667 dr0 = dr;
9ff25603 1668 if (!first_store && DR_IS_WRITE (dr))
0822b158 1669 first_store = dr;
1670 }
1671 }
1672 }
1673 else
1674 {
1675 if (!aligned_access_p (dr))
1676 {
1677 if (vect_print_dump_info (REPORT_DETAILS))
1678 fprintf (vect_dump, "vector alignment may not be reachable");
1679
1680 break;
1681 }
1682 }
fb85abff 1683 }
1684
48e1416a 1685 vect_versioning_for_alias_required
10095225 1686 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
fb85abff 1687
1688 /* Temporarily, if versioning for alias is required, we disable peeling
1689 until we support peeling and versioning. Often peeling for alignment
1690 will require peeling for loop-bound, which in turn requires that we
1691 know how to adjust the loop ivs after the loop. */
1692 if (vect_versioning_for_alias_required
10095225 1693 || !vect_can_advance_ivs_p (loop_vinfo)
fb85abff 1694 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1695 do_peeling = false;
1696
0822b158 1697 if (do_peeling && all_misalignments_unknown
1698 && vect_supportable_dr_alignment (dr0, false))
1699 {
1700
1701 /* Check if the target requires to prefer stores over loads, i.e., if
1702 misaligned stores are more expensive than misaligned loads (taking
1703 drs with same alignment into account). */
1704 if (first_store && DR_IS_READ (dr0))
1705 {
1706 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1707 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1708 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1709 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
1710
4db2b577 1711 (void) vect_get_data_access_cost (dr0, &load_inside_cost,
1712 &load_outside_cost);
1713 (void) vect_get_data_access_cost (first_store, &store_inside_cost,
1714 &store_outside_cost);
0822b158 1715
1716 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1717 aligning the load DR0). */
1718 load_inside_penalty = store_inside_cost;
1719 load_outside_penalty = store_outside_cost;
1720 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1721 (vinfo_for_stmt (DR_STMT (first_store))),
1722 i, dr);
1723 i++)
1724 if (DR_IS_READ (dr))
1725 {
1726 load_inside_penalty += load_inside_cost;
1727 load_outside_penalty += load_outside_cost;
1728 }
1729 else
1730 {
1731 load_inside_penalty += store_inside_cost;
1732 load_outside_penalty += store_outside_cost;
1733 }
1734
1735 /* Calculate the penalty for leaving DR0 unaligned (by
1736 aligning the FIRST_STORE). */
1737 store_inside_penalty = load_inside_cost;
1738 store_outside_penalty = load_outside_cost;
1739 for (i = 0; VEC_iterate (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1740 (vinfo_for_stmt (DR_STMT (dr0))),
1741 i, dr);
1742 i++)
1743 if (DR_IS_READ (dr))
1744 {
1745 store_inside_penalty += load_inside_cost;
1746 store_outside_penalty += load_outside_cost;
1747 }
1748 else
1749 {
1750 store_inside_penalty += store_inside_cost;
1751 store_outside_penalty += store_outside_cost;
1752 }
1753
1754 if (load_inside_penalty > store_inside_penalty
1755 || (load_inside_penalty == store_inside_penalty
1756 && load_outside_penalty > store_outside_penalty))
1757 dr0 = first_store;
1758 }
1759
1760 /* In case there are only loads with different unknown misalignments, use
1761 peeling only if it may help to align other accesses in the loop. */
1762 if (!first_store && !VEC_length (dr_p, STMT_VINFO_SAME_ALIGN_REFS
1763 (vinfo_for_stmt (DR_STMT (dr0))))
1764 && vect_supportable_dr_alignment (dr0, false)
1765 != dr_unaligned_supported)
1766 do_peeling = false;
1767 }
1768
1769 if (do_peeling && !dr0)
1770 {
1771 /* Peeling is possible, but there is no data access that is not supported
1772 unless aligned. So we try to choose the best possible peeling. */
1773
1774 /* We should get here only if there are drs with known misalignment. */
1775 gcc_assert (!all_misalignments_unknown);
1776
1777 /* Choose the best peeling from the hash table. */
4db2b577 1778 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
1779 &stmt_cost_vec);
0822b158 1780 if (!dr0 || !npeel)
1781 do_peeling = false;
1782 }
1783
fb85abff 1784 if (do_peeling)
1785 {
0822b158 1786 stmt = DR_STMT (dr0);
1787 stmt_info = vinfo_for_stmt (stmt);
1788 vectype = STMT_VINFO_VECTYPE (stmt_info);
1789 nelements = TYPE_VECTOR_SUBPARTS (vectype);
fb85abff 1790
1791 if (known_alignment_for_access_p (dr0))
1792 {
f1b8c740 1793 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1794 size_zero_node) < 0;
0822b158 1795 if (!npeel)
1796 {
1797 /* Since it's known at compile time, compute the number of
1798 iterations in the peeled loop (the peeling factor) for use in
1799 updating DR_MISALIGNMENT values. The peeling factor is the
1800 vectorization factor minus the misalignment as an element
1801 count. */
1802 mis = DR_MISALIGNMENT (dr0);
1803 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
184a9a0f 1804 npeel = ((negative ? mis - nelements : nelements - mis)
1805 & (nelements - 1));
0822b158 1806 }
fb85abff 1807
48e1416a 1808 /* For interleaved data access every iteration accesses all the
fb85abff 1809 members of the group, therefore we divide the number of iterations
1810 by the group size. */
48e1416a 1811 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
ee612634 1812 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
21009880 1813 npeel /= GROUP_SIZE (stmt_info);
fb85abff 1814
1815 if (vect_print_dump_info (REPORT_DETAILS))
1816 fprintf (vect_dump, "Try peeling by %d", npeel);
1817 }
1818
1819 /* Ensure that all data refs can be vectorized after the peel. */
48148244 1820 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
fb85abff 1821 {
1822 int save_misalignment;
1823
1824 if (dr == dr0)
1825 continue;
1826
1827 stmt = DR_STMT (dr);
1828 stmt_info = vinfo_for_stmt (stmt);
1829 /* For interleaving, only the alignment of the first access
1830 matters. */
ee612634 1831 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 1832 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
fb85abff 1833 continue;
1834
f634c3e9 1835 /* Strided loads perform only component accesses, alignment is
1836 irrelevant for them. */
1837 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1838 continue;
1839
fb85abff 1840 save_misalignment = DR_MISALIGNMENT (dr);
1841 vect_update_misalignment_for_peel (dr, dr0, npeel);
0822b158 1842 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
fb85abff 1843 SET_DR_MISALIGNMENT (dr, save_misalignment);
48e1416a 1844
fb85abff 1845 if (!supportable_dr_alignment)
1846 {
1847 do_peeling = false;
1848 break;
1849 }
1850 }
1851
0822b158 1852 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1853 {
1854 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1855 if (!stat)
1856 do_peeling = false;
1857 else
1858 return stat;
1859 }
1860
fb85abff 1861 if (do_peeling)
1862 {
4db2b577 1863 stmt_info_for_cost *si;
1864
fb85abff 1865 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1866 If the misalignment of DR_i is identical to that of dr0 then set
1867 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1868 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1869 by the peeling factor times the element size of DR_i (MOD the
1870 vectorization factor times the size). Otherwise, the
1871 misalignment of DR_i must be set to unknown. */
48148244 1872 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
fb85abff 1873 if (dr != dr0)
1874 vect_update_misalignment_for_peel (dr, dr0, npeel);
1875
1876 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
0822b158 1877 if (npeel)
1878 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1879 else
1880 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
fb85abff 1881 SET_DR_MISALIGNMENT (dr0, 0);
1882 if (vect_print_dump_info (REPORT_ALIGNMENT))
1883 fprintf (vect_dump, "Alignment of access forced using peeling.");
1884
1885 if (vect_print_dump_info (REPORT_DETAILS))
1886 fprintf (vect_dump, "Peeling for alignment will be applied.");
1887
4db2b577 1888 /* We've delayed passing the inside-loop peeling costs to the
1889 target cost model until we were sure peeling would happen.
1890 Do so now. */
1891 if (stmt_cost_vec)
1892 {
1893 FOR_EACH_VEC_ELT (stmt_info_for_cost, stmt_cost_vec, i, si)
1894 (void) add_stmt_cost (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo),
1895 si->count, si->kind,
1896 vinfo_for_stmt (si->stmt), si->misalign);
1897 VEC_free (stmt_info_for_cost, heap, stmt_cost_vec);
1898 }
1899
37545e54 1900 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1901 gcc_assert (stat);
1902 return stat;
1903 }
1904 }
1905
1906
1907 /* (2) Versioning to force alignment. */
1908
1909 /* Try versioning if:
1910 1) flag_tree_vect_loop_version is TRUE
1911 2) optimize loop for speed
1912 3) there is at least one unsupported misaligned data ref with an unknown
1913 misalignment, and
1914 4) all misaligned data refs with a known misalignment are supported, and
1915 5) the number of runtime alignment checks is within reason. */
1916
48e1416a 1917 do_versioning =
1918 flag_tree_vect_loop_version
fb85abff 1919 && optimize_loop_nest_for_speed_p (loop)
1920 && (!loop->inner); /* FORNOW */
1921
1922 if (do_versioning)
1923 {
48148244 1924 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
fb85abff 1925 {
1926 stmt = DR_STMT (dr);
1927 stmt_info = vinfo_for_stmt (stmt);
1928
1929 /* For interleaving, only the alignment of the first access
1930 matters. */
1931 if (aligned_access_p (dr)
ee612634 1932 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 1933 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
fb85abff 1934 continue;
1935
f634c3e9 1936 /* Strided loads perform only component accesses, alignment is
1937 irrelevant for them. */
1938 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1939 continue;
1940
0822b158 1941 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
fb85abff 1942
1943 if (!supportable_dr_alignment)
1944 {
1945 gimple stmt;
1946 int mask;
1947 tree vectype;
1948
1949 if (known_alignment_for_access_p (dr)
1950 || VEC_length (gimple,
1951 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1952 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1953 {
1954 do_versioning = false;
1955 break;
1956 }
1957
1958 stmt = DR_STMT (dr);
1959 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1960 gcc_assert (vectype);
48e1416a 1961
fb85abff 1962 /* The rightmost bits of an aligned address must be zeros.
1963 Construct the mask needed for this test. For example,
1964 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1965 mask must be 15 = 0xf. */
1966 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1967
1968 /* FORNOW: use the same mask to test all potentially unaligned
1969 references in the loop. The vectorizer currently supports
1970 a single vector size, see the reference to
1971 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1972 vectorization factor is computed. */
1973 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1974 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1975 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1976 VEC_safe_push (gimple, heap,
1977 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1978 DR_STMT (dr));
1979 }
1980 }
48e1416a 1981
fb85abff 1982 /* Versioning requires at least one misaligned data reference. */
10095225 1983 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
fb85abff 1984 do_versioning = false;
1985 else if (!do_versioning)
1986 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1987 }
1988
1989 if (do_versioning)
1990 {
1991 VEC(gimple,heap) *may_misalign_stmts
1992 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1993 gimple stmt;
1994
1995 /* It can now be assumed that the data references in the statements
1996 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1997 of the loop being vectorized. */
48148244 1998 FOR_EACH_VEC_ELT (gimple, may_misalign_stmts, i, stmt)
fb85abff 1999 {
2000 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2001 dr = STMT_VINFO_DATA_REF (stmt_info);
2002 SET_DR_MISALIGNMENT (dr, 0);
2003 if (vect_print_dump_info (REPORT_ALIGNMENT))
2004 fprintf (vect_dump, "Alignment of access forced using versioning.");
2005 }
2006
2007 if (vect_print_dump_info (REPORT_DETAILS))
2008 fprintf (vect_dump, "Versioning for alignment will be applied.");
2009
2010 /* Peeling and versioning can't be done together at this time. */
2011 gcc_assert (! (do_peeling && do_versioning));
2012
37545e54 2013 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 2014 gcc_assert (stat);
2015 return stat;
2016 }
2017
2018 /* This point is reached if neither peeling nor versioning is being done. */
2019 gcc_assert (! (do_peeling || do_versioning));
2020
37545e54 2021 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 2022 return stat;
2023}
2024
2025
91a74fc6 2026/* Function vect_find_same_alignment_drs.
2027
2028 Update group and alignment relations according to the chosen
2029 vectorization factor. */
2030
2031static void
2032vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
2033 loop_vec_info loop_vinfo)
2034{
2035 unsigned int i;
2036 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2037 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2038 struct data_reference *dra = DDR_A (ddr);
2039 struct data_reference *drb = DDR_B (ddr);
2040 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2041 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2042 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
2043 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
2044 lambda_vector dist_v;
2045 unsigned int loop_depth;
2046
2047 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2048 return;
2049
0822b158 2050 if (dra == drb)
91a74fc6 2051 return;
2052
2053 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
2054 return;
2055
2056 /* Loop-based vectorization and known data dependence. */
2057 if (DDR_NUM_DIST_VECTS (ddr) == 0)
2058 return;
2059
85a846a2 2060 /* Data-dependence analysis reports a distance vector of zero
2061 for data-references that overlap only in the first iteration
2062 but have different sign step (see PR45764).
2063 So as a sanity check require equal DR_STEP. */
2064 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2065 return;
2066
91a74fc6 2067 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
48148244 2068 FOR_EACH_VEC_ELT (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v)
91a74fc6 2069 {
2070 int dist = dist_v[loop_depth];
2071
2072 if (vect_print_dump_info (REPORT_DR_DETAILS))
2073 fprintf (vect_dump, "dependence distance = %d.", dist);
2074
2075 /* Same loop iteration. */
2076 if (dist == 0
2077 || (dist % vectorization_factor == 0 && dra_size == drb_size))
2078 {
2079 /* Two references with distance zero have the same alignment. */
2080 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
2081 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
2082 if (vect_print_dump_info (REPORT_ALIGNMENT))
2083 fprintf (vect_dump, "accesses have the same alignment.");
2084 if (vect_print_dump_info (REPORT_DR_DETAILS))
2085 {
2086 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
2087 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
2088 fprintf (vect_dump, " and ");
2089 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
2090 }
2091 }
2092 }
2093}
2094
2095
fb85abff 2096/* Function vect_analyze_data_refs_alignment
2097
2098 Analyze the alignment of the data-references in the loop.
2099 Return FALSE if a data reference is found that cannot be vectorized. */
2100
2101bool
48e1416a 2102vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
37545e54 2103 bb_vec_info bb_vinfo)
fb85abff 2104{
2105 if (vect_print_dump_info (REPORT_DETAILS))
2106 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
2107
91a74fc6 2108 /* Mark groups of data references with same alignment using
2109 data dependence information. */
2110 if (loop_vinfo)
2111 {
2112 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
2113 struct data_dependence_relation *ddr;
2114 unsigned int i;
2115
48148244 2116 FOR_EACH_VEC_ELT (ddr_p, ddrs, i, ddr)
91a74fc6 2117 vect_find_same_alignment_drs (ddr, loop_vinfo);
2118 }
2119
37545e54 2120 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
fb85abff 2121 {
f083cd24 2122 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
48e1416a 2123 fprintf (vect_dump,
fb85abff 2124 "not vectorized: can't calculate alignment for data ref.");
2125 return false;
2126 }
2127
2128 return true;
2129}
2130
2131
ee612634 2132/* Analyze groups of accesses: check that DR belongs to a group of
2133 accesses of legal size, step, etc. Detect gaps, single element
2134 interleaving, and other special cases. Set grouped access info.
fb85abff 2135 Collect groups of strided stores for further use in SLP analysis. */
2136
2137static bool
2138vect_analyze_group_access (struct data_reference *dr)
2139{
2140 tree step = DR_STEP (dr);
2141 tree scalar_type = TREE_TYPE (DR_REF (dr));
2142 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2143 gimple stmt = DR_STMT (dr);
2144 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2145 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 2146 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
fb85abff 2147 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
ee612634 2148 HOST_WIDE_INT groupsize, last_accessed_element = 1;
fb85abff 2149 bool slp_impossible = false;
d88ecd95 2150 struct loop *loop = NULL;
2151
2152 if (loop_vinfo)
2153 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 2154
ee612634 2155 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2156 size of the interleaving group (including gaps). */
2157 groupsize = dr_step / type_size;
fb85abff 2158
2159 /* Not consecutive access is possible only if it is a part of interleaving. */
21009880 2160 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
fb85abff 2161 {
2162 /* Check if it this DR is a part of interleaving, and is a single
2163 element of the group that is accessed in the loop. */
48e1416a 2164
fb85abff 2165 /* Gaps are supported only for loads. STEP must be a multiple of the type
2166 size. The size of the group must be a power of 2. */
2167 if (DR_IS_READ (dr)
2168 && (dr_step % type_size) == 0
ee612634 2169 && groupsize > 0
2170 && exact_log2 (groupsize) != -1)
fb85abff 2171 {
21009880 2172 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
ee612634 2173 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
fb85abff 2174 if (vect_print_dump_info (REPORT_DR_DETAILS))
2175 {
37545e54 2176 fprintf (vect_dump, "Detected single element interleaving ");
fb85abff 2177 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2178 fprintf (vect_dump, " step ");
2179 print_generic_expr (vect_dump, step, TDF_SLIM);
2180 }
a4ee7fac 2181
2182 if (loop_vinfo)
2183 {
a4ee7fac 2184 if (vect_print_dump_info (REPORT_DETAILS))
2185 fprintf (vect_dump, "Data access with gaps requires scalar "
2186 "epilogue loop");
d88ecd95 2187 if (loop->inner)
2188 {
2189 if (vect_print_dump_info (REPORT_DETAILS))
2190 fprintf (vect_dump, "Peeling for outer loop is not"
2191 " supported");
2192 return false;
2193 }
2194
2195 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
a4ee7fac 2196 }
2197
fb85abff 2198 return true;
2199 }
6ea6a380 2200
fb85abff 2201 if (vect_print_dump_info (REPORT_DETAILS))
6ea6a380 2202 {
2203 fprintf (vect_dump, "not consecutive access ");
2204 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2205 }
2206
2207 if (bb_vinfo)
2208 {
2209 /* Mark the statement as unvectorizable. */
2210 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2211 return true;
2212 }
2213
fb85abff 2214 return false;
2215 }
2216
21009880 2217 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
fb85abff 2218 {
2219 /* First stmt in the interleaving chain. Check the chain. */
21009880 2220 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
fb85abff 2221 struct data_reference *data_ref = dr;
1a0e7d51 2222 unsigned int count = 1;
fb85abff 2223 tree next_step;
2224 tree prev_init = DR_INIT (data_ref);
2225 gimple prev = stmt;
1a0e7d51 2226 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
fb85abff 2227
2228 while (next)
2229 {
282bf14c 2230 /* Skip same data-refs. In case that two or more stmts share
2231 data-ref (supported only for loads), we vectorize only the first
2232 stmt, and the rest get their vectorized loads from the first
2233 one. */
fb85abff 2234 if (!tree_int_cst_compare (DR_INIT (data_ref),
2235 DR_INIT (STMT_VINFO_DATA_REF (
2236 vinfo_for_stmt (next)))))
2237 {
9ff25603 2238 if (DR_IS_WRITE (data_ref))
fb85abff 2239 {
2240 if (vect_print_dump_info (REPORT_DETAILS))
2241 fprintf (vect_dump, "Two store stmts share the same dr.");
2242 return false;
2243 }
2244
2245 /* Check that there is no load-store dependencies for this loads
2246 to prevent a case of load-store-load to the same location. */
21009880 2247 if (GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
2248 || GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
fb85abff 2249 {
2250 if (vect_print_dump_info (REPORT_DETAILS))
2251 fprintf (vect_dump,
2252 "READ_WRITE dependence in interleaving.");
2253 return false;
2254 }
2255
2256 /* For load use the same data-ref load. */
21009880 2257 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
fb85abff 2258
2259 prev = next;
21009880 2260 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
fb85abff 2261 continue;
2262 }
a4ee7fac 2263
fb85abff 2264 prev = next;
2265
2266 /* Check that all the accesses have the same STEP. */
2267 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
2268 if (tree_int_cst_compare (step, next_step))
2269 {
2270 if (vect_print_dump_info (REPORT_DETAILS))
2271 fprintf (vect_dump, "not consecutive access in interleaving");
2272 return false;
2273 }
2274
2275 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
2276 /* Check that the distance between two accesses is equal to the type
2277 size. Otherwise, we have gaps. */
2278 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2279 - TREE_INT_CST_LOW (prev_init)) / type_size;
2280 if (diff != 1)
2281 {
2282 /* FORNOW: SLP of accesses with gaps is not supported. */
2283 slp_impossible = true;
9ff25603 2284 if (DR_IS_WRITE (data_ref))
fb85abff 2285 {
2286 if (vect_print_dump_info (REPORT_DETAILS))
2287 fprintf (vect_dump, "interleaved store with gaps");
2288 return false;
2289 }
b11576bf 2290
2291 gaps += diff - 1;
fb85abff 2292 }
2293
a4ee7fac 2294 last_accessed_element += diff;
2295
fb85abff 2296 /* Store the gap from the previous member of the group. If there is no
21009880 2297 gap in the access, GROUP_GAP is always 1. */
2298 GROUP_GAP (vinfo_for_stmt (next)) = diff;
fb85abff 2299
2300 prev_init = DR_INIT (data_ref);
21009880 2301 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
fb85abff 2302 /* Count the number of data-refs in the chain. */
2303 count++;
2304 }
2305
2306 /* COUNT is the number of accesses found, we multiply it by the size of
2307 the type to get COUNT_IN_BYTES. */
2308 count_in_bytes = type_size * count;
2309
48e1416a 2310 /* Check that the size of the interleaving (including gaps) is not
37545e54 2311 greater than STEP. */
b11576bf 2312 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
fb85abff 2313 {
2314 if (vect_print_dump_info (REPORT_DETAILS))
2315 {
2316 fprintf (vect_dump, "interleaving size is greater than step for ");
2317 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
2318 }
2319 return false;
2320 }
2321
2322 /* Check that the size of the interleaving is equal to STEP for stores,
2323 i.e., that there are no gaps. */
37545e54 2324 if (dr_step && dr_step != count_in_bytes)
fb85abff 2325 {
2326 if (DR_IS_READ (dr))
2327 {
2328 slp_impossible = true;
2329 /* There is a gap after the last load in the group. This gap is a
ee612634 2330 difference between the groupsize and the number of elements.
2331 When there is no gap, this difference should be 0. */
2332 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
fb85abff 2333 }
2334 else
2335 {
2336 if (vect_print_dump_info (REPORT_DETAILS))
2337 fprintf (vect_dump, "interleaved store with gaps");
2338 return false;
2339 }
2340 }
2341
2342 /* Check that STEP is a multiple of type size. */
37545e54 2343 if (dr_step && (dr_step % type_size) != 0)
fb85abff 2344 {
2345 if (vect_print_dump_info (REPORT_DETAILS))
2346 {
2347 fprintf (vect_dump, "step is not a multiple of type size: step ");
2348 print_generic_expr (vect_dump, step, TDF_SLIM);
2349 fprintf (vect_dump, " size ");
2350 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
2351 TDF_SLIM);
2352 }
2353 return false;
2354 }
2355
ee612634 2356 if (groupsize == 0)
2357 groupsize = count;
48e1416a 2358
ee612634 2359 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
fb85abff 2360 if (vect_print_dump_info (REPORT_DETAILS))
ee612634 2361 fprintf (vect_dump, "Detected interleaving of size %d", (int)groupsize);
fb85abff 2362
48e1416a 2363 /* SLP: create an SLP data structure for every interleaving group of
fb85abff 2364 stores for further analysis in vect_analyse_slp. */
9ff25603 2365 if (DR_IS_WRITE (dr) && !slp_impossible)
37545e54 2366 {
2367 if (loop_vinfo)
ee612634 2368 VEC_safe_push (gimple, heap, LOOP_VINFO_GROUPED_STORES (loop_vinfo),
37545e54 2369 stmt);
2370 if (bb_vinfo)
ee612634 2371 VEC_safe_push (gimple, heap, BB_VINFO_GROUPED_STORES (bb_vinfo),
37545e54 2372 stmt);
2373 }
a4ee7fac 2374
2375 /* There is a gap in the end of the group. */
ee612634 2376 if (groupsize - last_accessed_element > 0 && loop_vinfo)
a4ee7fac 2377 {
a4ee7fac 2378 if (vect_print_dump_info (REPORT_DETAILS))
2379 fprintf (vect_dump, "Data access with gaps requires scalar "
2380 "epilogue loop");
d88ecd95 2381 if (loop->inner)
2382 {
2383 if (vect_print_dump_info (REPORT_DETAILS))
2384 fprintf (vect_dump, "Peeling for outer loop is not supported");
2385 return false;
2386 }
2387
2388 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
a4ee7fac 2389 }
fb85abff 2390 }
2391
2392 return true;
2393}
2394
2395
2396/* Analyze the access pattern of the data-reference DR.
2397 In case of non-consecutive accesses call vect_analyze_group_access() to
ee612634 2398 analyze groups of accesses. */
fb85abff 2399
2400static bool
2401vect_analyze_data_ref_access (struct data_reference *dr)
2402{
2403 tree step = DR_STEP (dr);
2404 tree scalar_type = TREE_TYPE (DR_REF (dr));
2405 gimple stmt = DR_STMT (dr);
2406 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2407 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 2408 struct loop *loop = NULL;
fb85abff 2409
37545e54 2410 if (loop_vinfo)
2411 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 2412
37545e54 2413 if (loop_vinfo && !step)
fb85abff 2414 {
2415 if (vect_print_dump_info (REPORT_DETAILS))
37545e54 2416 fprintf (vect_dump, "bad data-ref access in loop");
fb85abff 2417 return false;
2418 }
2419
b5ba3316 2420 /* Allow invariant loads in loops. */
f634c3e9 2421 if (loop_vinfo && integer_zerop (step))
b04940e7 2422 {
2423 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2424 return DR_IS_READ (dr);
2425 }
fb85abff 2426
37545e54 2427 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 2428 {
2429 /* Interleaved accesses are not yet supported within outer-loop
2430 vectorization for references in the inner-loop. */
21009880 2431 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
fb85abff 2432
2433 /* For the rest of the analysis we use the outer-loop step. */
2434 step = STMT_VINFO_DR_STEP (stmt_info);
f634c3e9 2435 if (integer_zerop (step))
fb85abff 2436 {
2437 if (vect_print_dump_info (REPORT_ALIGNMENT))
2438 fprintf (vect_dump, "zero step in outer loop.");
2439 if (DR_IS_READ (dr))
48e1416a 2440 return true;
fb85abff 2441 else
2442 return false;
2443 }
2444 }
2445
2446 /* Consecutive? */
f634c3e9 2447 if (TREE_CODE (step) == INTEGER_CST)
fb85abff 2448 {
f634c3e9 2449 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2450 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2451 || (dr_step < 0
2452 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2453 {
2454 /* Mark that it is not interleaving. */
2455 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2456 return true;
2457 }
fb85abff 2458 }
2459
37545e54 2460 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 2461 {
2462 if (vect_print_dump_info (REPORT_ALIGNMENT))
ee612634 2463 fprintf (vect_dump, "grouped access in outer loop.");
fb85abff 2464 return false;
2465 }
2466
f634c3e9 2467 /* Assume this is a DR handled by non-constant strided load case. */
2468 if (TREE_CODE (step) != INTEGER_CST)
2469 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2470
fb85abff 2471 /* Not consecutive access - check if it's a part of interleaving group. */
2472 return vect_analyze_group_access (dr);
2473}
2474
2475
2476/* Function vect_analyze_data_ref_accesses.
2477
2478 Analyze the access pattern of all the data references in the loop.
2479
2480 FORNOW: the only access pattern that is considered vectorizable is a
2481 simple step 1 (consecutive) access.
2482
2483 FORNOW: handle only arrays and pointer accesses. */
2484
2485bool
37545e54 2486vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 2487{
2488 unsigned int i;
37545e54 2489 VEC (data_reference_p, heap) *datarefs;
fb85abff 2490 struct data_reference *dr;
2491
2492 if (vect_print_dump_info (REPORT_DETAILS))
2493 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
2494
37545e54 2495 if (loop_vinfo)
2496 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2497 else
2498 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2499
48148244 2500 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
6ea6a380 2501 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2502 && !vect_analyze_data_ref_access (dr))
fb85abff 2503 {
f083cd24 2504 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 2505 fprintf (vect_dump, "not vectorized: complicated access pattern.");
6ea6a380 2506
2507 if (bb_vinfo)
2508 {
2509 /* Mark the statement as not vectorizable. */
2510 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2511 continue;
2512 }
2513 else
2514 return false;
fb85abff 2515 }
2516
2517 return true;
2518}
2519
2520/* Function vect_prune_runtime_alias_test_list.
2521
2522 Prune a list of ddrs to be tested at run-time by versioning for alias.
2523 Return FALSE if resulting list of ddrs is longer then allowed by
2524 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2525
2526bool
2527vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2528{
2529 VEC (ddr_p, heap) * ddrs =
2530 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
2531 unsigned i, j;
2532
2533 if (vect_print_dump_info (REPORT_DETAILS))
2534 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
2535
2536 for (i = 0; i < VEC_length (ddr_p, ddrs); )
2537 {
2538 bool found;
2539 ddr_p ddr_i;
2540
2541 ddr_i = VEC_index (ddr_p, ddrs, i);
2542 found = false;
2543
2544 for (j = 0; j < i; j++)
2545 {
2546 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
2547
2548 if (vect_vfa_range_equal (ddr_i, ddr_j))
2549 {
2550 if (vect_print_dump_info (REPORT_DR_DETAILS))
2551 {
2552 fprintf (vect_dump, "found equal ranges ");
2553 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
2554 fprintf (vect_dump, ", ");
2555 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
2556 fprintf (vect_dump, " and ");
2557 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
2558 fprintf (vect_dump, ", ");
2559 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
2560 }
2561 found = true;
2562 break;
2563 }
2564 }
48e1416a 2565
fb85abff 2566 if (found)
2567 {
2568 VEC_ordered_remove (ddr_p, ddrs, i);
2569 continue;
2570 }
2571 i++;
2572 }
2573
2574 if (VEC_length (ddr_p, ddrs) >
2575 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
2576 {
2577 if (vect_print_dump_info (REPORT_DR_DETAILS))
2578 {
2579 fprintf (vect_dump,
2580 "disable versioning for alias - max number of generated "
2581 "checks exceeded.");
2582 }
2583
2584 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
2585
2586 return false;
2587 }
2588
2589 return true;
2590}
2591
16dfb112 2592/* Check whether a non-affine read in stmt is suitable for gather load
2593 and if so, return a builtin decl for that operation. */
2594
2595tree
2596vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2597 tree *offp, int *scalep)
2598{
2599 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2600 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2601 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2602 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2603 tree offtype = NULL_TREE;
2604 tree decl, base, off;
2605 enum machine_mode pmode;
2606 int punsignedp, pvolatilep;
2607
2608 /* The gather builtins need address of the form
2609 loop_invariant + vector * {1, 2, 4, 8}
2610 or
2611 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2612 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2613 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2614 multiplications and additions in it. To get a vector, we need
2615 a single SSA_NAME that will be defined in the loop and will
2616 contain everything that is not loop invariant and that can be
2617 vectorized. The following code attempts to find such a preexistng
2618 SSA_NAME OFF and put the loop invariants into a tree BASE
2619 that can be gimplified before the loop. */
2620 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2621 &pmode, &punsignedp, &pvolatilep, false);
2622 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2623
2624 if (TREE_CODE (base) == MEM_REF)
2625 {
2626 if (!integer_zerop (TREE_OPERAND (base, 1)))
2627 {
2628 if (off == NULL_TREE)
2629 {
2630 double_int moff = mem_ref_offset (base);
2631 off = double_int_to_tree (sizetype, moff);
2632 }
2633 else
2634 off = size_binop (PLUS_EXPR, off,
2635 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2636 }
2637 base = TREE_OPERAND (base, 0);
2638 }
2639 else
2640 base = build_fold_addr_expr (base);
2641
2642 if (off == NULL_TREE)
2643 off = size_zero_node;
2644
2645 /* If base is not loop invariant, either off is 0, then we start with just
2646 the constant offset in the loop invariant BASE and continue with base
2647 as OFF, otherwise give up.
2648 We could handle that case by gimplifying the addition of base + off
2649 into some SSA_NAME and use that as off, but for now punt. */
2650 if (!expr_invariant_in_loop_p (loop, base))
2651 {
2652 if (!integer_zerop (off))
2653 return NULL_TREE;
2654 off = base;
2655 base = size_int (pbitpos / BITS_PER_UNIT);
2656 }
2657 /* Otherwise put base + constant offset into the loop invariant BASE
2658 and continue with OFF. */
2659 else
2660 {
2661 base = fold_convert (sizetype, base);
2662 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
2663 }
2664
2665 /* OFF at this point may be either a SSA_NAME or some tree expression
2666 from get_inner_reference. Try to peel off loop invariants from it
2667 into BASE as long as possible. */
2668 STRIP_NOPS (off);
2669 while (offtype == NULL_TREE)
2670 {
2671 enum tree_code code;
2672 tree op0, op1, add = NULL_TREE;
2673
2674 if (TREE_CODE (off) == SSA_NAME)
2675 {
2676 gimple def_stmt = SSA_NAME_DEF_STMT (off);
2677
2678 if (expr_invariant_in_loop_p (loop, off))
2679 return NULL_TREE;
2680
2681 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
2682 break;
2683
2684 op0 = gimple_assign_rhs1 (def_stmt);
2685 code = gimple_assign_rhs_code (def_stmt);
2686 op1 = gimple_assign_rhs2 (def_stmt);
2687 }
2688 else
2689 {
2690 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
2691 return NULL_TREE;
2692 code = TREE_CODE (off);
2693 extract_ops_from_tree (off, &code, &op0, &op1);
2694 }
2695 switch (code)
2696 {
2697 case POINTER_PLUS_EXPR:
2698 case PLUS_EXPR:
2699 if (expr_invariant_in_loop_p (loop, op0))
2700 {
2701 add = op0;
2702 off = op1;
2703 do_add:
2704 add = fold_convert (sizetype, add);
2705 if (scale != 1)
2706 add = size_binop (MULT_EXPR, add, size_int (scale));
2707 base = size_binop (PLUS_EXPR, base, add);
2708 continue;
2709 }
2710 if (expr_invariant_in_loop_p (loop, op1))
2711 {
2712 add = op1;
2713 off = op0;
2714 goto do_add;
2715 }
2716 break;
2717 case MINUS_EXPR:
2718 if (expr_invariant_in_loop_p (loop, op1))
2719 {
2720 add = fold_convert (sizetype, op1);
2721 add = size_binop (MINUS_EXPR, size_zero_node, add);
2722 off = op0;
2723 goto do_add;
2724 }
2725 break;
2726 case MULT_EXPR:
2727 if (scale == 1 && host_integerp (op1, 0))
2728 {
2729 scale = tree_low_cst (op1, 0);
2730 off = op0;
2731 continue;
2732 }
2733 break;
2734 case SSA_NAME:
2735 off = op0;
2736 continue;
2737 CASE_CONVERT:
2738 if (!POINTER_TYPE_P (TREE_TYPE (op0))
2739 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
2740 break;
2741 if (TYPE_PRECISION (TREE_TYPE (op0))
2742 == TYPE_PRECISION (TREE_TYPE (off)))
2743 {
2744 off = op0;
2745 continue;
2746 }
2747 if (TYPE_PRECISION (TREE_TYPE (op0))
2748 < TYPE_PRECISION (TREE_TYPE (off)))
2749 {
2750 off = op0;
2751 offtype = TREE_TYPE (off);
2752 STRIP_NOPS (off);
2753 continue;
2754 }
2755 break;
2756 default:
2757 break;
2758 }
2759 break;
2760 }
2761
2762 /* If at the end OFF still isn't a SSA_NAME or isn't
2763 defined in the loop, punt. */
2764 if (TREE_CODE (off) != SSA_NAME
2765 || expr_invariant_in_loop_p (loop, off))
2766 return NULL_TREE;
2767
2768 if (offtype == NULL_TREE)
2769 offtype = TREE_TYPE (off);
2770
2771 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
2772 offtype, scale);
2773 if (decl == NULL_TREE)
2774 return NULL_TREE;
2775
2776 if (basep)
2777 *basep = base;
2778 if (offp)
2779 *offp = off;
2780 if (scalep)
2781 *scalep = scale;
2782 return decl;
2783}
2784
7a10f06b 2785/* Check wether a non-affine load in STMT (being in the loop referred to
2786 in LOOP_VINFO) is suitable for handling as strided load. That is the case
2787 if its address is a simple induction variable. If so return the base
2788 of that induction variable in *BASEP and the (loop-invariant) step
2789 in *STEPP, both only when that pointer is non-zero.
2790
2791 This handles ARRAY_REFs (with variant index) and MEM_REFs (with variant
2792 base pointer) only. */
2793
2794bool
2795vect_check_strided_load (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2796 tree *stepp)
2797{
2798 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2799 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2800 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2801 tree base, off;
2802 affine_iv iv;
2803
f634c3e9 2804 if (!DR_IS_READ (dr))
2805 return false;
2806
7a10f06b 2807 base = DR_REF (dr);
2808
2809 if (TREE_CODE (base) == ARRAY_REF)
2810 {
2811 off = TREE_OPERAND (base, 1);
2812 base = TREE_OPERAND (base, 0);
2813 }
2814 else if (TREE_CODE (base) == MEM_REF)
2815 {
2816 off = TREE_OPERAND (base, 0);
2817 base = TREE_OPERAND (base, 1);
2818 }
2819 else
2820 return false;
2821
2822 if (TREE_CODE (off) != SSA_NAME)
2823 return false;
2824
2825 if (!expr_invariant_in_loop_p (loop, base)
2826 || !simple_iv (loop, loop_containing_stmt (stmt), off, &iv, true))
2827 return false;
2828
2829 if (basep)
2830 *basep = iv.base;
2831 if (stepp)
2832 *stepp = iv.step;
2833 return true;
2834}
fb85abff 2835
2836/* Function vect_analyze_data_refs.
2837
37545e54 2838 Find all the data references in the loop or basic block.
fb85abff 2839
2840 The general structure of the analysis of data refs in the vectorizer is as
2841 follows:
48e1416a 2842 1- vect_analyze_data_refs(loop/bb): call
37545e54 2843 compute_data_dependences_for_loop/bb to find and analyze all data-refs
2844 in the loop/bb and their dependences.
fb85abff 2845 2- vect_analyze_dependences(): apply dependence testing using ddrs.
2846 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
2847 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
2848
2849*/
2850
2851bool
91a74fc6 2852vect_analyze_data_refs (loop_vec_info loop_vinfo,
2853 bb_vec_info bb_vinfo,
2854 int *min_vf)
fb85abff 2855{
37545e54 2856 struct loop *loop = NULL;
2857 basic_block bb = NULL;
fb85abff 2858 unsigned int i;
2859 VEC (data_reference_p, heap) *datarefs;
2860 struct data_reference *dr;
2861 tree scalar_type;
3c9feaca 2862 bool res, stop_bb_analysis = false;
fb85abff 2863
2864 if (vect_print_dump_info (REPORT_DETAILS))
2865 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
48e1416a 2866
37545e54 2867 if (loop_vinfo)
2868 {
2869 loop = LOOP_VINFO_LOOP (loop_vinfo);
590a32b6 2870 res = compute_data_dependences_for_loop
a8af2e86 2871 (loop, true,
2872 &LOOP_VINFO_LOOP_NEST (loop_vinfo),
2873 &LOOP_VINFO_DATAREFS (loop_vinfo),
590a32b6 2874 &LOOP_VINFO_DDRS (loop_vinfo));
2875
2876 if (!res)
2877 {
2878 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2879 fprintf (vect_dump, "not vectorized: loop contains function calls"
2880 " or data references that cannot be analyzed");
2881 return false;
2882 }
2883
37545e54 2884 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2885 }
2886 else
2887 {
ec84182d 2888 gimple_stmt_iterator gsi;
2889
37545e54 2890 bb = BB_VINFO_BB (bb_vinfo);
ec84182d 2891 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2892 {
2893 gimple stmt = gsi_stmt (gsi);
2894 if (!find_data_references_in_stmt (NULL, stmt,
2895 &BB_VINFO_DATAREFS (bb_vinfo)))
2896 {
2897 /* Mark the rest of the basic-block as unvectorizable. */
2898 for (; !gsi_end_p (gsi); gsi_next (&gsi))
156b8feb 2899 {
2900 stmt = gsi_stmt (gsi);
2901 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
2902 }
ec84182d 2903 break;
2904 }
2905 }
2906 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
2907 &BB_VINFO_DDRS (bb_vinfo), NULL, true))
590a32b6 2908 {
2909 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2910 fprintf (vect_dump, "not vectorized: basic block contains function"
2911 " calls or data references that cannot be analyzed");
2912 return false;
2913 }
2914
37545e54 2915 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2916 }
fb85abff 2917
282bf14c 2918 /* Go through the data-refs, check that the analysis succeeded. Update
2919 pointer from stmt_vec_info struct to DR and vectype. */
fb85abff 2920
48148244 2921 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr)
fb85abff 2922 {
2923 gimple stmt;
2924 stmt_vec_info stmt_info;
48e1416a 2925 tree base, offset, init;
16dfb112 2926 bool gather = false;
91a74fc6 2927 int vf;
48e1416a 2928
fb85abff 2929 if (!dr || !DR_REF (dr))
2930 {
f083cd24 2931 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 2932 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
5ea0545e 2933
fb85abff 2934 return false;
2935 }
2936
2937 stmt = DR_STMT (dr);
2938 stmt_info = vinfo_for_stmt (stmt);
2939
3c9feaca 2940 if (stop_bb_analysis)
2941 {
2942 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2943 continue;
2944 }
2945
fb85abff 2946 /* Check that analysis of the data-ref succeeded. */
2947 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
16dfb112 2948 || !DR_STEP (dr))
fb85abff 2949 {
16dfb112 2950 /* If target supports vector gather loads, see if they can't
2951 be used. */
2952 if (loop_vinfo
2953 && DR_IS_READ (dr)
2954 && !TREE_THIS_VOLATILE (DR_REF (dr))
2955 && targetm.vectorize.builtin_gather != NULL
2956 && !nested_in_vect_loop_p (loop, stmt))
2957 {
2958 struct data_reference *newdr
2959 = create_data_ref (NULL, loop_containing_stmt (stmt),
2960 DR_REF (dr), stmt, true);
2961 gcc_assert (newdr != NULL && DR_REF (newdr));
2962 if (DR_BASE_ADDRESS (newdr)
2963 && DR_OFFSET (newdr)
2964 && DR_INIT (newdr)
2965 && DR_STEP (newdr)
2966 && integer_zerop (DR_STEP (newdr)))
2967 {
2968 dr = newdr;
2969 gather = true;
2970 }
2971 else
2972 free_data_ref (newdr);
2973 }
6ea6a380 2974
16dfb112 2975 if (!gather)
2976 {
2977 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2978 {
2979 fprintf (vect_dump, "not vectorized: data ref analysis "
2980 "failed ");
2981 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2982 }
3c9feaca 2983
16dfb112 2984 if (bb_vinfo)
2985 {
2986 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2987 stop_bb_analysis = true;
2988 continue;
2989 }
2990
2991 return false;
2992 }
fb85abff 2993 }
2994
2995 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2996 {
f083cd24 2997 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 2998 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2999 "constant");
3c9feaca 3000
3001 if (bb_vinfo)
3002 {
3003 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3004 stop_bb_analysis = true;
3005 continue;
3006 }
3007
16dfb112 3008 if (gather)
3009 free_data_ref (dr);
3010 return false;
fb85abff 3011 }
3012
83ec4165 3013 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3014 {
3015 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3016 {
3017 fprintf (vect_dump, "not vectorized: volatile type ");
3018 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3019 }
3c9feaca 3020
3021 if (bb_vinfo)
3022 {
3023 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3024 stop_bb_analysis = true;
3025 continue;
3026 }
3027
83ec4165 3028 return false;
3029 }
3030
96b07a49 3031 if (stmt_can_throw_internal (stmt))
42a6710e 3032 {
3033 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3034 {
3035 fprintf (vect_dump, "not vectorized: statement can throw an "
3036 "exception ");
3037 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3038 }
3c9feaca 3039
3040 if (bb_vinfo)
3041 {
3042 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3043 stop_bb_analysis = true;
3044 continue;
3045 }
3046
16dfb112 3047 if (gather)
3048 free_data_ref (dr);
42a6710e 3049 return false;
3050 }
3051
87c952b8 3052 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3053 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3054 {
3055 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3056 {
3057 fprintf (vect_dump, "not vectorized: statement is bitfield "
3058 "access ");
3059 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3060 }
3061
3062 if (bb_vinfo)
3063 {
3064 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3065 stop_bb_analysis = true;
3066 continue;
3067 }
3068
3069 if (gather)
3070 free_data_ref (dr);
3071 return false;
3072 }
3073
3074 base = unshare_expr (DR_BASE_ADDRESS (dr));
3075 offset = unshare_expr (DR_OFFSET (dr));
3076 init = unshare_expr (DR_INIT (dr));
3077
635bf3aa 3078 if (is_gimple_call (stmt))
3079 {
3080 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3081 {
3082 fprintf (vect_dump, "not vectorized: dr in a call ");
3083 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3084 }
3085
3086 if (bb_vinfo)
3087 {
3088 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3089 stop_bb_analysis = true;
3090 continue;
3091 }
3092
3093 if (gather)
3094 free_data_ref (dr);
3095 return false;
3096 }
3097
fb85abff 3098 /* Update DR field in stmt_vec_info struct. */
fb85abff 3099
3100 /* If the dataref is in an inner-loop of the loop that is considered for
3101 for vectorization, we also want to analyze the access relative to
48e1416a 3102 the outer-loop (DR contains information only relative to the
fb85abff 3103 inner-most enclosing loop). We do that by building a reference to the
3104 first location accessed by the inner-loop, and analyze it relative to
48e1416a 3105 the outer-loop. */
3106 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 3107 {
3108 tree outer_step, outer_base, outer_init;
3109 HOST_WIDE_INT pbitsize, pbitpos;
3110 tree poffset;
3111 enum machine_mode pmode;
3112 int punsignedp, pvolatilep;
3113 affine_iv base_iv, offset_iv;
3114 tree dinit;
3115
48e1416a 3116 /* Build a reference to the first location accessed by the
282bf14c 3117 inner-loop: *(BASE+INIT). (The first location is actually
fb85abff 3118 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3119 tree inner_base = build_fold_indirect_ref
2cc66f2a 3120 (fold_build_pointer_plus (base, init));
fb85abff 3121
3122 if (vect_print_dump_info (REPORT_DETAILS))
3123 {
3124 fprintf (vect_dump, "analyze in outer-loop: ");
3125 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
3126 }
3127
48e1416a 3128 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
fb85abff 3129 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3130 gcc_assert (outer_base != NULL_TREE);
3131
3132 if (pbitpos % BITS_PER_UNIT != 0)
3133 {
3134 if (vect_print_dump_info (REPORT_DETAILS))
3135 fprintf (vect_dump, "failed: bit offset alignment.\n");
3136 return false;
3137 }
3138
3139 outer_base = build_fold_addr_expr (outer_base);
48e1416a 3140 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
fb85abff 3141 &base_iv, false))
3142 {
3143 if (vect_print_dump_info (REPORT_DETAILS))
3144 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
3145 return false;
3146 }
3147
3148 if (offset)
3149 {
3150 if (poffset)
48e1416a 3151 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
fb85abff 3152 poffset);
3153 else
3154 poffset = offset;
3155 }
3156
3157 if (!poffset)
3158 {
3159 offset_iv.base = ssize_int (0);
3160 offset_iv.step = ssize_int (0);
3161 }
48e1416a 3162 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
fb85abff 3163 &offset_iv, false))
3164 {
3165 if (vect_print_dump_info (REPORT_DETAILS))
3166 fprintf (vect_dump, "evolution of offset is not affine.\n");
3167 return false;
3168 }
3169
3170 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3171 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3172 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3173 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3174 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3175
3176 outer_step = size_binop (PLUS_EXPR,
3177 fold_convert (ssizetype, base_iv.step),
3178 fold_convert (ssizetype, offset_iv.step));
3179
3180 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3181 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
48e1416a 3182 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
fb85abff 3183 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
48e1416a 3184 STMT_VINFO_DR_OFFSET (stmt_info) =
fb85abff 3185 fold_convert (ssizetype, offset_iv.base);
48e1416a 3186 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
fb85abff 3187 size_int (highest_pow2_factor (offset_iv.base));
3188
3189 if (vect_print_dump_info (REPORT_DETAILS))
3190 {
3191 fprintf (vect_dump, "\touter base_address: ");
3192 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
3193 fprintf (vect_dump, "\n\touter offset from base address: ");
3194 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
3195 fprintf (vect_dump, "\n\touter constant offset from base address: ");
3196 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
3197 fprintf (vect_dump, "\n\touter step: ");
3198 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
3199 fprintf (vect_dump, "\n\touter aligned to: ");
3200 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
3201 }
3202 }
3203
3204 if (STMT_VINFO_DATA_REF (stmt_info))
3205 {
f083cd24 3206 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 3207 {
3208 fprintf (vect_dump,
3209 "not vectorized: more than one data ref in stmt: ");
3210 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3211 }
3c9feaca 3212
3213 if (bb_vinfo)
3214 {
3215 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3216 stop_bb_analysis = true;
3217 continue;
3218 }
3219
16dfb112 3220 if (gather)
3221 free_data_ref (dr);
fb85abff 3222 return false;
3223 }
f083cd24 3224
fb85abff 3225 STMT_VINFO_DATA_REF (stmt_info) = dr;
48e1416a 3226
fb85abff 3227 /* Set vectype for STMT. */
3228 scalar_type = TREE_TYPE (DR_REF (dr));
3229 STMT_VINFO_VECTYPE (stmt_info) =
3230 get_vectype_for_scalar_type (scalar_type);
48e1416a 3231 if (!STMT_VINFO_VECTYPE (stmt_info))
fb85abff 3232 {
f083cd24 3233 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
fb85abff 3234 {
3235 fprintf (vect_dump,
3236 "not vectorized: no vectype for stmt: ");
3237 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3238 fprintf (vect_dump, " scalar_type: ");
3239 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
3240 }
6ea6a380 3241
3242 if (bb_vinfo)
3243 {
3244 /* Mark the statement as not vectorizable. */
3245 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
3c9feaca 3246 stop_bb_analysis = true;
6ea6a380 3247 continue;
3248 }
16dfb112 3249
3250 if (gather)
3251 {
3252 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3253 free_data_ref (dr);
3254 }
3255 return false;
fb85abff 3256 }
91a74fc6 3257
3258 /* Adjust the minimal vectorization factor according to the
3259 vector type. */
3260 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3261 if (vf > *min_vf)
3262 *min_vf = vf;
16dfb112 3263
3264 if (gather)
3265 {
3266 unsigned int j, k, n;
3267 struct data_reference *olddr
3268 = VEC_index (data_reference_p, datarefs, i);
3269 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
3270 struct data_dependence_relation *ddr, *newddr;
3271 bool bad = false;
3272 tree off;
3273 VEC (loop_p, heap) *nest = LOOP_VINFO_LOOP_NEST (loop_vinfo);
3274
7a10f06b 3275 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3276 if (gather
3277 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3278 gather = false;
f634c3e9 3279 if (!gather)
16dfb112 3280 {
3281 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3282 {
3283 fprintf (vect_dump,
f634c3e9 3284 "not vectorized: not suitable for gather load ");
16dfb112 3285 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3286 }
3287 return false;
3288 }
3289
3290 n = VEC_length (data_reference_p, datarefs) - 1;
3291 for (j = 0, k = i - 1; j < i; j++)
3292 {
3293 ddr = VEC_index (ddr_p, ddrs, k);
3294 gcc_assert (DDR_B (ddr) == olddr);
3295 newddr = initialize_data_dependence_relation (DDR_A (ddr), dr,
3296 nest);
3297 VEC_replace (ddr_p, ddrs, k, newddr);
3298 free_dependence_relation (ddr);
3299 if (!bad
3300 && DR_IS_WRITE (DDR_A (newddr))
3301 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3302 bad = true;
3303 k += --n;
3304 }
3305
3306 k++;
3307 n = k + VEC_length (data_reference_p, datarefs) - i - 1;
3308 for (; k < n; k++)
3309 {
3310 ddr = VEC_index (ddr_p, ddrs, k);
3311 gcc_assert (DDR_A (ddr) == olddr);
3312 newddr = initialize_data_dependence_relation (dr, DDR_B (ddr),
3313 nest);
3314 VEC_replace (ddr_p, ddrs, k, newddr);
3315 free_dependence_relation (ddr);
3316 if (!bad
3317 && DR_IS_WRITE (DDR_B (newddr))
3318 && DDR_ARE_DEPENDENT (newddr) != chrec_known)
3319 bad = true;
3320 }
3321
3322 k = VEC_length (ddr_p, ddrs)
3323 - VEC_length (data_reference_p, datarefs) + i;
3324 ddr = VEC_index (ddr_p, ddrs, k);
3325 gcc_assert (DDR_A (ddr) == olddr && DDR_B (ddr) == olddr);
3326 newddr = initialize_data_dependence_relation (dr, dr, nest);
16dfb112 3327 VEC_replace (ddr_p, ddrs, k, newddr);
3328 free_dependence_relation (ddr);
3329 VEC_replace (data_reference_p, datarefs, i, dr);
3330
3331 if (bad)
3332 {
3333 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3334 {
3335 fprintf (vect_dump,
3336 "not vectorized: data dependence conflict"
f634c3e9 3337 " prevents gather load");
16dfb112 3338 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3339 }
3340 return false;
3341 }
3342
f634c3e9 3343 STMT_VINFO_GATHER_P (stmt_info) = true;
3344 }
3345 else if (loop_vinfo
3346 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3347 {
3348 bool strided_load = false;
3349 if (!nested_in_vect_loop_p (loop, stmt))
3350 strided_load
3351 = vect_check_strided_load (stmt, loop_vinfo, NULL, NULL);
3352 if (!strided_load)
3353 {
3354 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
3355 {
3356 fprintf (vect_dump,
3357 "not vectorized: not suitable for strided load ");
3358 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
3359 }
3360 return false;
3361 }
3362 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
16dfb112 3363 }
fb85abff 3364 }
48e1416a 3365
fb85abff 3366 return true;
3367}
3368
3369
3370/* Function vect_get_new_vect_var.
3371
282bf14c 3372 Returns a name for a new variable. The current naming scheme appends the
48e1416a 3373 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3374 the name of vectorizer generated variables, and appends that to NAME if
fb85abff 3375 provided. */
3376
3377tree
3378vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3379{
3380 const char *prefix;
3381 tree new_vect_var;
3382
3383 switch (var_kind)
3384 {
3385 case vect_simple_var:
3386 prefix = "vect_";
3387 break;
3388 case vect_scalar_var:
3389 prefix = "stmp_";
3390 break;
3391 case vect_pointer_var:
3392 prefix = "vect_p";
3393 break;
3394 default:
3395 gcc_unreachable ();
3396 }
3397
3398 if (name)
3399 {
3400 char* tmp = concat (prefix, name, NULL);
3401 new_vect_var = create_tmp_var (type, tmp);
3402 free (tmp);
3403 }
3404 else
3405 new_vect_var = create_tmp_var (type, prefix);
3406
3407 /* Mark vector typed variable as a gimple register variable. */
3408 if (TREE_CODE (type) == VECTOR_TYPE)
3409 DECL_GIMPLE_REG_P (new_vect_var) = true;
3410
3411 return new_vect_var;
3412}
3413
3414
3415/* Function vect_create_addr_base_for_vector_ref.
3416
3417 Create an expression that computes the address of the first memory location
3418 that will be accessed for a data reference.
3419
3420 Input:
3421 STMT: The statement containing the data reference.
3422 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3423 OFFSET: Optional. If supplied, it is be added to the initial address.
3424 LOOP: Specify relative to which loop-nest should the address be computed.
3425 For example, when the dataref is in an inner-loop nested in an
3426 outer-loop that is now being vectorized, LOOP can be either the
282bf14c 3427 outer-loop, or the inner-loop. The first memory location accessed
fb85abff 3428 by the following dataref ('in' points to short):
3429
3430 for (i=0; i<N; i++)
3431 for (j=0; j<M; j++)
3432 s += in[i+j]
3433
3434 is as follows:
3435 if LOOP=i_loop: &in (relative to i_loop)
3436 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3437
3438 Output:
48e1416a 3439 1. Return an SSA_NAME whose value is the address of the memory location of
fb85abff 3440 the first vector of the data reference.
3441 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3442 these statement(s) which define the returned SSA_NAME.
3443
3444 FORNOW: We are only handling array accesses with step 1. */
3445
3446tree
3447vect_create_addr_base_for_vector_ref (gimple stmt,
3448 gimple_seq *new_stmt_list,
3449 tree offset,
3450 struct loop *loop)
3451{
3452 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3453 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
fb85abff 3454 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3455 tree base_name;
3456 tree data_ref_base_var;
3457 tree vec_stmt;
3458 tree addr_base, addr_expr;
3459 tree dest;
3460 gimple_seq seq = NULL;
3461 tree base_offset = unshare_expr (DR_OFFSET (dr));
3462 tree init = unshare_expr (DR_INIT (dr));
f083cd24 3463 tree vect_ptr_type;
fb85abff 3464 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
37545e54 3465 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
41a92f78 3466 tree base;
fb85abff 3467
37545e54 3468 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
fb85abff 3469 {
37545e54 3470 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 3471
37545e54 3472 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
fb85abff 3473
3474 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3475 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3476 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3477 }
3478
37545e54 3479 if (loop_vinfo)
3480 base_name = build_fold_indirect_ref (data_ref_base);
3481 else
3482 {
3483 base_offset = ssize_int (0);
3484 init = ssize_int (0);
3485 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
48e1416a 3486 }
37545e54 3487
fb85abff 3488 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
3489 add_referenced_var (data_ref_base_var);
3490 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
3491 data_ref_base_var);
3492 gimple_seq_add_seq (new_stmt_list, seq);
3493
3494 /* Create base_offset */
3495 base_offset = size_binop (PLUS_EXPR,
3496 fold_convert (sizetype, base_offset),
3497 fold_convert (sizetype, init));
3498 dest = create_tmp_var (sizetype, "base_off");
3499 add_referenced_var (dest);
3500 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
3501 gimple_seq_add_seq (new_stmt_list, seq);
3502
3503 if (offset)
3504 {
3505 tree tmp = create_tmp_var (sizetype, "offset");
3506
3507 add_referenced_var (tmp);
3508 offset = fold_build2 (MULT_EXPR, sizetype,
3509 fold_convert (sizetype, offset), step);
3510 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3511 base_offset, offset);
3512 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
3513 gimple_seq_add_seq (new_stmt_list, seq);
3514 }
3515
3516 /* base + base_offset */
37545e54 3517 if (loop_vinfo)
2cc66f2a 3518 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
37545e54 3519 else
3520 {
182cf5a9 3521 addr_base = build1 (ADDR_EXPR,
3522 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3523 unshare_expr (DR_REF (dr)));
37545e54 3524 }
48e1416a 3525
fb85abff 3526 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
41a92f78 3527 base = get_base_address (DR_REF (dr));
3528 if (base
f544b9a4 3529 && TREE_CODE (base) == MEM_REF)
41a92f78 3530 vect_ptr_type
3531 = build_qualified_type (vect_ptr_type,
3532 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
fb85abff 3533
f083cd24 3534 vec_stmt = fold_convert (vect_ptr_type, addr_base);
fb85abff 3535 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
3536 get_name (base_name));
3537 add_referenced_var (addr_expr);
f083cd24 3538 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
fb85abff 3539 gimple_seq_add_seq (new_stmt_list, seq);
3540
f544b9a4 3541 if (DR_PTR_INFO (dr)
3542 && TREE_CODE (vec_stmt) == SSA_NAME)
1259ab70 3543 {
3544 duplicate_ssa_name_ptr_info (vec_stmt, DR_PTR_INFO (dr));
3545 if (offset)
ceea063b 3546 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (vec_stmt));
1259ab70 3547 }
f544b9a4 3548
fb85abff 3549 if (vect_print_dump_info (REPORT_DETAILS))
3550 {
3551 fprintf (vect_dump, "created ");
3552 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
3553 }
f083cd24 3554
fb85abff 3555 return vec_stmt;
3556}
3557
3558
3559/* Function vect_create_data_ref_ptr.
3560
bd5ba09f 3561 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3562 location accessed in the loop by STMT, along with the def-use update
3563 chain to appropriately advance the pointer through the loop iterations.
3564 Also set aliasing information for the pointer. This pointer is used by
3565 the callers to this function to create a memory reference expression for
3566 vector load/store access.
fb85abff 3567
3568 Input:
3569 1. STMT: a stmt that references memory. Expected to be of the form
3570 GIMPLE_ASSIGN <name, data-ref> or
3571 GIMPLE_ASSIGN <data-ref, name>.
bd5ba09f 3572 2. AGGR_TYPE: the type of the reference, which should be either a vector
3573 or an array.
3574 3. AT_LOOP: the loop where the vector memref is to be created.
3575 4. OFFSET (optional): an offset to be added to the initial address accessed
fb85abff 3576 by the data-ref in STMT.
bd5ba09f 3577 5. BSI: location where the new stmts are to be placed if there is no loop
3578 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
fb85abff 3579 pointing to the initial address.
fb85abff 3580
3581 Output:
3582 1. Declare a new ptr to vector_type, and have it point to the base of the
3583 data reference (initial addressed accessed by the data reference).
3584 For example, for vector of type V8HI, the following code is generated:
3585
bd5ba09f 3586 v8hi *ap;
3587 ap = (v8hi *)initial_address;
fb85abff 3588
3589 if OFFSET is not supplied:
3590 initial_address = &a[init];
3591 if OFFSET is supplied:
3592 initial_address = &a[init + OFFSET];
3593
3594 Return the initial_address in INITIAL_ADDRESS.
3595
3596 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
48e1416a 3597 update the pointer in each iteration of the loop.
fb85abff 3598
3599 Return the increment stmt that updates the pointer in PTR_INCR.
3600
48e1416a 3601 3. Set INV_P to true if the access pattern of the data reference in the
282bf14c 3602 vectorized loop is invariant. Set it to false otherwise.
fb85abff 3603
3604 4. Return the pointer. */
3605
3606tree
bd5ba09f 3607vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3608 tree offset, tree *initial_address,
3609 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3610 bool only_init, bool *inv_p)
fb85abff 3611{
3612 tree base_name;
3613 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3614 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 3615 struct loop *loop = NULL;
3616 bool nested_in_vect_loop = false;
3617 struct loop *containing_loop = NULL;
bd5ba09f 3618 tree aggr_ptr_type;
3619 tree aggr_ptr;
fb85abff 3620 tree new_temp;
3621 gimple vec_stmt;
3622 gimple_seq new_stmt_list = NULL;
37545e54 3623 edge pe = NULL;
fb85abff 3624 basic_block new_bb;
bd5ba09f 3625 tree aggr_ptr_init;
fb85abff 3626 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
bd5ba09f 3627 tree aptr;
fb85abff 3628 gimple_stmt_iterator incr_gsi;
3629 bool insert_after;
1aa22f27 3630 bool negative;
fb85abff 3631 tree indx_before_incr, indx_after_incr;
3632 gimple incr;
3633 tree step;
37545e54 3634 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
41a92f78 3635 tree base;
48e1416a 3636
bd5ba09f 3637 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3638 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3639
37545e54 3640 if (loop_vinfo)
3641 {
3642 loop = LOOP_VINFO_LOOP (loop_vinfo);
3643 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3644 containing_loop = (gimple_bb (stmt))->loop_father;
3645 pe = loop_preheader_edge (loop);
3646 }
3647 else
3648 {
3649 gcc_assert (bb_vinfo);
3650 only_init = true;
3651 *ptr_incr = NULL;
3652 }
48e1416a 3653
fb85abff 3654 /* Check the step (evolution) of the load in LOOP, and record
3655 whether it's invariant. */
3656 if (nested_in_vect_loop)
3657 step = STMT_VINFO_DR_STEP (stmt_info);
3658 else
3659 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
48e1416a 3660
fb85abff 3661 if (tree_int_cst_compare (step, size_zero_node) == 0)
3662 *inv_p = true;
3663 else
3664 *inv_p = false;
1aa22f27 3665 negative = tree_int_cst_compare (step, size_zero_node) < 0;
fb85abff 3666
3667 /* Create an expression for the first address accessed by this load
48e1416a 3668 in LOOP. */
fb85abff 3669 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
3670
3671 if (vect_print_dump_info (REPORT_DETAILS))
3672 {
3673 tree data_ref_base = base_name;
bd5ba09f 3674 fprintf (vect_dump, "create %s-pointer variable to type: ",
3675 tree_code_name[(int) TREE_CODE (aggr_type)]);
3676 print_generic_expr (vect_dump, aggr_type, TDF_SLIM);
48e1416a 3677 if (TREE_CODE (data_ref_base) == VAR_DECL
10095225 3678 || TREE_CODE (data_ref_base) == ARRAY_REF)
3679 fprintf (vect_dump, " vectorizing an array ref: ");
fb85abff 3680 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
3681 fprintf (vect_dump, " vectorizing a record based array ref: ");
3682 else if (TREE_CODE (data_ref_base) == SSA_NAME)
3683 fprintf (vect_dump, " vectorizing a pointer ref: ");
3684 print_generic_expr (vect_dump, base_name, TDF_SLIM);
3685 }
3686
bd5ba09f 3687 /* (1) Create the new aggregate-pointer variable. */
3688 aggr_ptr_type = build_pointer_type (aggr_type);
41a92f78 3689 base = get_base_address (DR_REF (dr));
3690 if (base
f544b9a4 3691 && TREE_CODE (base) == MEM_REF)
bd5ba09f 3692 aggr_ptr_type
3693 = build_qualified_type (aggr_ptr_type,
41a92f78 3694 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
bd5ba09f 3695 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
fb85abff 3696 get_name (base_name));
a34701c9 3697
bd5ba09f 3698 /* Vector and array types inherit the alias set of their component
3699 type by default so we need to use a ref-all pointer if the data
3700 reference does not conflict with the created aggregated data
3701 reference because it is not addressable. */
3702 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
a34701c9 3703 get_alias_set (DR_REF (dr))))
3704 {
bd5ba09f 3705 aggr_ptr_type
3706 = build_pointer_type_for_mode (aggr_type,
3707 TYPE_MODE (aggr_ptr_type), true);
3708 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
a34701c9 3709 get_name (base_name));
3710 }
3711
3712 /* Likewise for any of the data references in the stmt group. */
21009880 3713 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
fb85abff 3714 {
21009880 3715 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
dd277d48 3716 do
3717 {
3718 tree lhs = gimple_assign_lhs (orig_stmt);
bd5ba09f 3719 if (!alias_sets_conflict_p (get_deref_alias_set (aggr_ptr),
dd277d48 3720 get_alias_set (lhs)))
3721 {
bd5ba09f 3722 aggr_ptr_type
3723 = build_pointer_type_for_mode (aggr_type,
3724 TYPE_MODE (aggr_ptr_type), true);
3725 aggr_ptr
3726 = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var,
a34701c9 3727 get_name (base_name));
dd277d48 3728 break;
3729 }
3730
21009880 3731 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo_for_stmt (orig_stmt));
dd277d48 3732 }
3733 while (orig_stmt);
fb85abff 3734 }
3735
bd5ba09f 3736 add_referenced_var (aggr_ptr);
fb85abff 3737
282bf14c 3738 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
3739 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
3740 def-use update cycles for the pointer: one relative to the outer-loop
3741 (LOOP), which is what steps (3) and (4) below do. The other is relative
3742 to the inner-loop (which is the inner-most loop containing the dataref),
3743 and this is done be step (5) below.
fb85abff 3744
282bf14c 3745 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
3746 inner-most loop, and so steps (3),(4) work the same, and step (5) is
3747 redundant. Steps (3),(4) create the following:
fb85abff 3748
3749 vp0 = &base_addr;
3750 LOOP: vp1 = phi(vp0,vp2)
48e1416a 3751 ...
fb85abff 3752 ...
3753 vp2 = vp1 + step
3754 goto LOOP
48e1416a 3755
282bf14c 3756 If there is an inner-loop nested in loop, then step (5) will also be
3757 applied, and an additional update in the inner-loop will be created:
fb85abff 3758
3759 vp0 = &base_addr;
3760 LOOP: vp1 = phi(vp0,vp2)
3761 ...
3762 inner: vp3 = phi(vp1,vp4)
3763 vp4 = vp3 + inner_step
3764 if () goto inner
3765 ...
3766 vp2 = vp1 + step
3767 if () goto LOOP */
3768
bd5ba09f 3769 /* (2) Calculate the initial address of the aggregate-pointer, and set
3770 the aggregate-pointer to point to it before the loop. */
fb85abff 3771
3772 /* Create: (&(base[init_val+offset]) in the loop preheader. */
3773
3774 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
3775 offset, loop);
fb85abff 3776 if (new_stmt_list)
3777 {
37545e54 3778 if (pe)
3779 {
3780 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
3781 gcc_assert (!new_bb);
3782 }
3783 else
bee862b6 3784 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
fb85abff 3785 }
3786
3787 *initial_address = new_temp;
3788
bd5ba09f 3789 /* Create: p = (aggr_type *) initial_base */
f544b9a4 3790 if (TREE_CODE (new_temp) != SSA_NAME
bd5ba09f 3791 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
37545e54 3792 {
bd5ba09f 3793 vec_stmt = gimple_build_assign (aggr_ptr,
3794 fold_convert (aggr_ptr_type, new_temp));
3795 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
f544b9a4 3796 /* Copy the points-to information if it exists. */
3797 if (DR_PTR_INFO (dr))
bd5ba09f 3798 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
3799 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
f544b9a4 3800 if (pe)
3801 {
3802 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
3803 gcc_assert (!new_bb);
3804 }
3805 else
bee862b6 3806 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
37545e54 3807 }
3808 else
bd5ba09f 3809 aggr_ptr_init = new_temp;
fb85abff 3810
bd5ba09f 3811 /* (3) Handle the updating of the aggregate-pointer inside the loop.
282bf14c 3812 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
3813 inner-loop nested in LOOP (during outer-loop vectorization). */
fb85abff 3814
37545e54 3815 /* No update in loop is required. */
48e1416a 3816 if (only_init && (!loop_vinfo || at_loop == loop))
bd5ba09f 3817 aptr = aggr_ptr_init;
fb85abff 3818 else
3819 {
bd5ba09f 3820 /* The step of the aggregate pointer is the type size. */
3821 tree step = TYPE_SIZE_UNIT (aggr_type);
48e1416a 3822 /* One exception to the above is when the scalar step of the load in
fb85abff 3823 LOOP is zero. In this case the step here is also zero. */
3824 if (*inv_p)
3825 step = size_zero_node;
1aa22f27 3826 else if (negative)
3827 step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
fb85abff 3828
3829 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
3830
bd5ba09f 3831 create_iv (aggr_ptr_init,
3832 fold_convert (aggr_ptr_type, step),
3833 aggr_ptr, loop, &incr_gsi, insert_after,
fb85abff 3834 &indx_before_incr, &indx_after_incr);
3835 incr = gsi_stmt (incr_gsi);
37545e54 3836 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
fb85abff 3837
3838 /* Copy the points-to information if it exists. */
3839 if (DR_PTR_INFO (dr))
3840 {
3841 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3842 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3843 }
fb85abff 3844 if (ptr_incr)
3845 *ptr_incr = incr;
3846
bd5ba09f 3847 aptr = indx_before_incr;
fb85abff 3848 }
3849
3850 if (!nested_in_vect_loop || only_init)
bd5ba09f 3851 return aptr;
fb85abff 3852
3853
bd5ba09f 3854 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
282bf14c 3855 nested in LOOP, if exists. */
fb85abff 3856
3857 gcc_assert (nested_in_vect_loop);
3858 if (!only_init)
3859 {
3860 standard_iv_increment_position (containing_loop, &incr_gsi,
3861 &insert_after);
bd5ba09f 3862 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
fb85abff 3863 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
3864 &indx_after_incr);
3865 incr = gsi_stmt (incr_gsi);
37545e54 3866 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
fb85abff 3867
3868 /* Copy the points-to information if it exists. */
3869 if (DR_PTR_INFO (dr))
3870 {
3871 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
3872 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
3873 }
fb85abff 3874 if (ptr_incr)
3875 *ptr_incr = incr;
3876
48e1416a 3877 return indx_before_incr;
fb85abff 3878 }
3879 else
3880 gcc_unreachable ();
3881}
3882
3883
3884/* Function bump_vector_ptr
3885
3886 Increment a pointer (to a vector type) by vector-size. If requested,
48e1416a 3887 i.e. if PTR-INCR is given, then also connect the new increment stmt
fb85abff 3888 to the existing def-use update-chain of the pointer, by modifying
3889 the PTR_INCR as illustrated below:
3890
3891 The pointer def-use update-chain before this function:
3892 DATAREF_PTR = phi (p_0, p_2)
3893 ....
48e1416a 3894 PTR_INCR: p_2 = DATAREF_PTR + step
fb85abff 3895
3896 The pointer def-use update-chain after this function:
3897 DATAREF_PTR = phi (p_0, p_2)
3898 ....
3899 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
3900 ....
3901 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
3902
3903 Input:
48e1416a 3904 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
fb85abff 3905 in the loop.
48e1416a 3906 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
fb85abff 3907 the loop. The increment amount across iterations is expected
48e1416a 3908 to be vector_size.
fb85abff 3909 BSI - location where the new update stmt is to be placed.
3910 STMT - the original scalar memory-access stmt that is being vectorized.
3911 BUMP - optional. The offset by which to bump the pointer. If not given,
3912 the offset is assumed to be vector_size.
3913
3914 Output: Return NEW_DATAREF_PTR as illustrated above.
48e1416a 3915
fb85abff 3916*/
3917
3918tree
3919bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
3920 gimple stmt, tree bump)
3921{
3922 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3923 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3924 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3925 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
3926 tree update = TYPE_SIZE_UNIT (vectype);
3927 gimple incr_stmt;
3928 ssa_op_iter iter;
3929 use_operand_p use_p;
3930 tree new_dataref_ptr;
3931
3932 if (bump)
3933 update = bump;
48e1416a 3934
fb85abff 3935 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
3936 dataref_ptr, update);
3937 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
3938 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
3939 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
3940
3941 /* Copy the points-to information if it exists. */
3942 if (DR_PTR_INFO (dr))
1259ab70 3943 {
3944 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
ceea063b 3945 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
1259ab70 3946 }
fb85abff 3947
3948 if (!ptr_incr)
3949 return new_dataref_ptr;
3950
3951 /* Update the vector-pointer's cross-iteration increment. */
3952 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
3953 {
3954 tree use = USE_FROM_PTR (use_p);
3955
3956 if (use == dataref_ptr)
3957 SET_USE (use_p, new_dataref_ptr);
3958 else
3959 gcc_assert (tree_int_cst_compare (use, update) == 0);
3960 }
3961
3962 return new_dataref_ptr;
3963}
3964
3965
3966/* Function vect_create_destination_var.
3967
3968 Create a new temporary of type VECTYPE. */
3969
3970tree
3971vect_create_destination_var (tree scalar_dest, tree vectype)
3972{
3973 tree vec_dest;
3974 const char *new_name;
3975 tree type;
3976 enum vect_var_kind kind;
3977
3978 kind = vectype ? vect_simple_var : vect_scalar_var;
3979 type = vectype ? vectype : TREE_TYPE (scalar_dest);
3980
3981 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
3982
3983 new_name = get_name (scalar_dest);
3984 if (!new_name)
3985 new_name = "var_";
3986 vec_dest = vect_get_new_vect_var (type, kind, new_name);
3987 add_referenced_var (vec_dest);
3988
3989 return vec_dest;
3990}
3991
ee612634 3992/* Function vect_grouped_store_supported.
fb85abff 3993
42f6a6e8 3994 Returns TRUE if interleave high and interleave low permutations
3995 are supported, and FALSE otherwise. */
fb85abff 3996
3997bool
ee612634 3998vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
fb85abff 3999{
42f6a6e8 4000 enum machine_mode mode = TYPE_MODE (vectype);
48e1416a 4001
481fc474 4002 /* vect_permute_store_chain requires the group size to be a power of two. */
4003 if (exact_log2 (count) == -1)
4004 {
4005 if (vect_print_dump_info (REPORT_DETAILS))
ee612634 4006 fprintf (vect_dump, "the size of the group of accesses"
481fc474 4007 " is not a power of 2");
4008 return false;
4009 }
4010
42f6a6e8 4011 /* Check that the permutation is supported. */
8bec2124 4012 if (VECTOR_MODE_P (mode))
4013 {
4014 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4015 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4016 for (i = 0; i < nelt / 2; i++)
4017 {
4018 sel[i * 2] = i;
4019 sel[i * 2 + 1] = i + nelt;
4020 }
4021 if (can_vec_perm_p (mode, false, sel))
4022 {
4023 for (i = 0; i < nelt; i++)
4024 sel[i] += nelt / 2;
4025 if (can_vec_perm_p (mode, false, sel))
4026 return true;
4027 }
4028 }
fb85abff 4029
6620d7d7 4030 if (vect_print_dump_info (REPORT_DETAILS))
4031 fprintf (vect_dump, "interleave op not supported by target.");
4032 return false;
fb85abff 4033}
4034
4035
94b7b4dd 4036/* Return TRUE if vec_store_lanes is available for COUNT vectors of
4037 type VECTYPE. */
4038
4039bool
4040vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4041{
4042 return vect_lanes_optab_supported_p ("vec_store_lanes",
4043 vec_store_lanes_optab,
4044 vectype, count);
4045}
4046
4047
fb85abff 4048/* Function vect_permute_store_chain.
4049
4050 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
48e1416a 4051 a power of 2, generate interleave_high/low stmts to reorder the data
282bf14c 4052 correctly for the stores. Return the final references for stores in
fb85abff 4053 RESULT_CHAIN.
4054
4055 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
282bf14c 4056 The input is 4 vectors each containing 8 elements. We assign a number to
4057 each element, the input sequence is:
fb85abff 4058
4059 1st vec: 0 1 2 3 4 5 6 7
4060 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 4061 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 4062 4th vec: 24 25 26 27 28 29 30 31
4063
4064 The output sequence should be:
4065
4066 1st vec: 0 8 16 24 1 9 17 25
4067 2nd vec: 2 10 18 26 3 11 19 27
4068 3rd vec: 4 12 20 28 5 13 21 30
4069 4th vec: 6 14 22 30 7 15 23 31
4070
4071 i.e., we interleave the contents of the four vectors in their order.
4072
282bf14c 4073 We use interleave_high/low instructions to create such output. The input of
fb85abff 4074 each interleave_high/low operation is two vectors:
48e1416a 4075 1st vec 2nd vec
4076 0 1 2 3 4 5 6 7
4077 the even elements of the result vector are obtained left-to-right from the
282bf14c 4078 high/low elements of the first vector. The odd elements of the result are
fb85abff 4079 obtained left-to-right from the high/low elements of the second vector.
4080 The output of interleave_high will be: 0 4 1 5
4081 and of interleave_low: 2 6 3 7
4082
48e1416a 4083
282bf14c 4084 The permutation is done in log LENGTH stages. In each stage interleave_high
48e1416a 4085 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4086 where the first argument is taken from the first half of DR_CHAIN and the
4087 second argument from it's second half.
4088 In our example,
fb85abff 4089
4090 I1: interleave_high (1st vec, 3rd vec)
4091 I2: interleave_low (1st vec, 3rd vec)
4092 I3: interleave_high (2nd vec, 4th vec)
4093 I4: interleave_low (2nd vec, 4th vec)
4094
4095 The output for the first stage is:
4096
4097 I1: 0 16 1 17 2 18 3 19
4098 I2: 4 20 5 21 6 22 7 23
4099 I3: 8 24 9 25 10 26 11 27
4100 I4: 12 28 13 29 14 30 15 31
4101
4102 The output of the second stage, i.e. the final result is:
4103
4104 I1: 0 8 16 24 1 9 17 25
4105 I2: 2 10 18 26 3 11 19 27
4106 I3: 4 12 20 28 5 13 21 30
4107 I4: 6 14 22 30 7 15 23 31. */
48e1416a 4108
481fc474 4109void
48e1416a 4110vect_permute_store_chain (VEC(tree,heap) *dr_chain,
4111 unsigned int length,
fb85abff 4112 gimple stmt,
4113 gimple_stmt_iterator *gsi,
4114 VEC(tree,heap) **result_chain)
4115{
4116 tree perm_dest, vect1, vect2, high, low;
4117 gimple perm_stmt;
4118 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
8bec2124 4119 tree perm_mask_low, perm_mask_high;
4120 unsigned int i, n;
42f6a6e8 4121 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
8bec2124 4122 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
48e1416a 4123
fb85abff 4124 *result_chain = VEC_copy (tree, heap, dr_chain);
4125
8bec2124 4126 for (i = 0, n = nelt / 2; i < n; i++)
4127 {
4128 sel[i * 2] = i;
4129 sel[i * 2 + 1] = i + nelt;
4130 }
4131 perm_mask_high = vect_gen_perm_mask (vectype, sel);
42f6a6e8 4132 gcc_assert (perm_mask_high != NULL);
4133
8bec2124 4134 for (i = 0; i < nelt; i++)
4135 sel[i] += nelt / 2;
4136 perm_mask_low = vect_gen_perm_mask (vectype, sel);
42f6a6e8 4137 gcc_assert (perm_mask_low != NULL);
8bec2124 4138
4139 for (i = 0, n = exact_log2 (length); i < n; i++)
fb85abff 4140 {
4141 for (j = 0; j < length/2; j++)
4142 {
4143 vect1 = VEC_index (tree, dr_chain, j);
4144 vect2 = VEC_index (tree, dr_chain, j+length/2);
4145
4146 /* Create interleaving stmt:
8bec2124 4147 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
fb85abff 4148 perm_dest = create_tmp_var (vectype, "vect_inter_high");
4149 DECL_GIMPLE_REG_P (perm_dest) = 1;
4150 add_referenced_var (perm_dest);
8bec2124 4151 high = make_ssa_name (perm_dest, NULL);
4152 perm_stmt
4153 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, high,
4154 vect1, vect2, perm_mask_high);
fb85abff 4155 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4156 VEC_replace (tree, *result_chain, 2*j, high);
4157
4158 /* Create interleaving stmt:
8bec2124 4159 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4160 nelt*3/2+1, ...}> */
fb85abff 4161 perm_dest = create_tmp_var (vectype, "vect_inter_low");
4162 DECL_GIMPLE_REG_P (perm_dest) = 1;
4163 add_referenced_var (perm_dest);
8bec2124 4164 low = make_ssa_name (perm_dest, NULL);
4165 perm_stmt
4166 = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, low,
4167 vect1, vect2, perm_mask_low);
fb85abff 4168 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
4169 VEC_replace (tree, *result_chain, 2*j+1, low);
4170 }
4171 dr_chain = VEC_copy (tree, heap, *result_chain);
4172 }
fb85abff 4173}
4174
4175/* Function vect_setup_realignment
48e1416a 4176
fb85abff 4177 This function is called when vectorizing an unaligned load using
4178 the dr_explicit_realign[_optimized] scheme.
4179 This function generates the following code at the loop prolog:
4180
4181 p = initial_addr;
4182 x msq_init = *(floor(p)); # prolog load
48e1416a 4183 realignment_token = call target_builtin;
fb85abff 4184 loop:
4185 x msq = phi (msq_init, ---)
4186
48e1416a 4187 The stmts marked with x are generated only for the case of
fb85abff 4188 dr_explicit_realign_optimized.
4189
48e1416a 4190 The code above sets up a new (vector) pointer, pointing to the first
fb85abff 4191 location accessed by STMT, and a "floor-aligned" load using that pointer.
4192 It also generates code to compute the "realignment-token" (if the relevant
4193 target hook was defined), and creates a phi-node at the loop-header bb
4194 whose arguments are the result of the prolog-load (created by this
4195 function) and the result of a load that takes place in the loop (to be
4196 created by the caller to this function).
4197
4198 For the case of dr_explicit_realign_optimized:
48e1416a 4199 The caller to this function uses the phi-result (msq) to create the
fb85abff 4200 realignment code inside the loop, and sets up the missing phi argument,
4201 as follows:
48e1416a 4202 loop:
fb85abff 4203 msq = phi (msq_init, lsq)
4204 lsq = *(floor(p')); # load in loop
4205 result = realign_load (msq, lsq, realignment_token);
4206
4207 For the case of dr_explicit_realign:
4208 loop:
4209 msq = *(floor(p)); # load in loop
4210 p' = p + (VS-1);
4211 lsq = *(floor(p')); # load in loop
4212 result = realign_load (msq, lsq, realignment_token);
4213
4214 Input:
4215 STMT - (scalar) load stmt to be vectorized. This load accesses
4216 a memory location that may be unaligned.
4217 BSI - place where new code is to be inserted.
4218 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
48e1416a 4219 is used.
4220
fb85abff 4221 Output:
4222 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4223 target hook, if defined.
4224 Return value - the result of the loop-header phi node. */
4225
4226tree
4227vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4228 tree *realignment_token,
4229 enum dr_alignment_support alignment_support_scheme,
4230 tree init_addr,
4231 struct loop **at_loop)
4232{
4233 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4234 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4235 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2cb9ef39 4236 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
ad074595 4237 struct loop *loop = NULL;
4238 edge pe = NULL;
fb85abff 4239 tree scalar_dest = gimple_assign_lhs (stmt);
4240 tree vec_dest;
4241 gimple inc;
4242 tree ptr;
4243 tree data_ref;
4244 gimple new_stmt;
4245 basic_block new_bb;
4246 tree msq_init = NULL_TREE;
4247 tree new_temp;
4248 gimple phi_stmt;
4249 tree msq = NULL_TREE;
4250 gimple_seq stmts = NULL;
4251 bool inv_p;
4252 bool compute_in_loop = false;
ad074595 4253 bool nested_in_vect_loop = false;
fb85abff 4254 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
ad074595 4255 struct loop *loop_for_initial_load = NULL;
4256
4257 if (loop_vinfo)
4258 {
4259 loop = LOOP_VINFO_LOOP (loop_vinfo);
4260 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4261 }
fb85abff 4262
4263 gcc_assert (alignment_support_scheme == dr_explicit_realign
4264 || alignment_support_scheme == dr_explicit_realign_optimized);
4265
4266 /* We need to generate three things:
4267 1. the misalignment computation
4268 2. the extra vector load (for the optimized realignment scheme).
4269 3. the phi node for the two vectors from which the realignment is
282bf14c 4270 done (for the optimized realignment scheme). */
fb85abff 4271
4272 /* 1. Determine where to generate the misalignment computation.
4273
4274 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4275 calculation will be generated by this function, outside the loop (in the
4276 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4277 caller, inside the loop.
4278
4279 Background: If the misalignment remains fixed throughout the iterations of
4280 the loop, then both realignment schemes are applicable, and also the
4281 misalignment computation can be done outside LOOP. This is because we are
4282 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4283 are a multiple of VS (the Vector Size), and therefore the misalignment in
4284 different vectorized LOOP iterations is always the same.
4285 The problem arises only if the memory access is in an inner-loop nested
4286 inside LOOP, which is now being vectorized using outer-loop vectorization.
4287 This is the only case when the misalignment of the memory access may not
4288 remain fixed throughout the iterations of the inner-loop (as explained in
4289 detail in vect_supportable_dr_alignment). In this case, not only is the
4290 optimized realignment scheme not applicable, but also the misalignment
4291 computation (and generation of the realignment token that is passed to
4292 REALIGN_LOAD) have to be done inside the loop.
4293
4294 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4295 or not, which in turn determines if the misalignment is computed inside
4296 the inner-loop, or outside LOOP. */
4297
ad074595 4298 if (init_addr != NULL_TREE || !loop_vinfo)
fb85abff 4299 {
4300 compute_in_loop = true;
4301 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4302 }
4303
4304
4305 /* 2. Determine where to generate the extra vector load.
4306
4307 For the optimized realignment scheme, instead of generating two vector
4308 loads in each iteration, we generate a single extra vector load in the
4309 preheader of the loop, and in each iteration reuse the result of the
4310 vector load from the previous iteration. In case the memory access is in
4311 an inner-loop nested inside LOOP, which is now being vectorized using
4312 outer-loop vectorization, we need to determine whether this initial vector
4313 load should be generated at the preheader of the inner-loop, or can be
4314 generated at the preheader of LOOP. If the memory access has no evolution
4315 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4316 to be generated inside LOOP (in the preheader of the inner-loop). */
4317
4318 if (nested_in_vect_loop)
4319 {
4320 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4321 bool invariant_in_outerloop =
4322 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4323 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4324 }
4325 else
4326 loop_for_initial_load = loop;
4327 if (at_loop)
4328 *at_loop = loop_for_initial_load;
4329
ad074595 4330 if (loop_for_initial_load)
4331 pe = loop_preheader_edge (loop_for_initial_load);
4332
fb85abff 4333 /* 3. For the case of the optimized realignment, create the first vector
4334 load at the loop preheader. */
4335
4336 if (alignment_support_scheme == dr_explicit_realign_optimized)
4337 {
4338 /* Create msq_init = *(floor(p1)) in the loop preheader */
4339
4340 gcc_assert (!compute_in_loop);
fb85abff 4341 vec_dest = vect_create_destination_var (scalar_dest, vectype);
bd5ba09f 4342 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4343 NULL_TREE, &init_addr, NULL, &inc,
4344 true, &inv_p);
86638c2e 4345 new_stmt = gimple_build_assign_with_ops
4346 (BIT_AND_EXPR, NULL_TREE, ptr,
4347 build_int_cst (TREE_TYPE (ptr),
4348 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
4349 new_temp = make_ssa_name (SSA_NAME_VAR (ptr), new_stmt);
4350 gimple_assign_set_lhs (new_stmt, new_temp);
4351 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4352 gcc_assert (!new_bb);
2cb9ef39 4353 data_ref
4354 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4355 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
fb85abff 4356 new_stmt = gimple_build_assign (vec_dest, data_ref);
4357 new_temp = make_ssa_name (vec_dest, new_stmt);
4358 gimple_assign_set_lhs (new_stmt, new_temp);
ad074595 4359 if (pe)
4360 {
4361 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4362 gcc_assert (!new_bb);
4363 }
4364 else
4365 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4366
fb85abff 4367 msq_init = gimple_assign_lhs (new_stmt);
4368 }
4369
4370 /* 4. Create realignment token using a target builtin, if available.
4371 It is done either inside the containing loop, or before LOOP (as
4372 determined above). */
4373
4374 if (targetm.vectorize.builtin_mask_for_load)
4375 {
4376 tree builtin_decl;
4377
4378 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
ad074595 4379 if (!init_addr)
fb85abff 4380 {
4381 /* Generate the INIT_ADDR computation outside LOOP. */
4382 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4383 NULL_TREE, loop);
ad074595 4384 if (loop)
4385 {
4386 pe = loop_preheader_edge (loop);
4387 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4388 gcc_assert (!new_bb);
4389 }
4390 else
4391 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
fb85abff 4392 }
4393
4394 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4395 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4396 vec_dest =
4397 vect_create_destination_var (scalar_dest,
4398 gimple_call_return_type (new_stmt));
4399 new_temp = make_ssa_name (vec_dest, new_stmt);
4400 gimple_call_set_lhs (new_stmt, new_temp);
4401
4402 if (compute_in_loop)
4403 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4404 else
4405 {
4406 /* Generate the misalignment computation outside LOOP. */
4407 pe = loop_preheader_edge (loop);
4408 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4409 gcc_assert (!new_bb);
4410 }
4411
4412 *realignment_token = gimple_call_lhs (new_stmt);
4413
4414 /* The result of the CALL_EXPR to this builtin is determined from
4415 the value of the parameter and no global variables are touched
4416 which makes the builtin a "const" function. Requiring the
4417 builtin to have the "const" attribute makes it unnecessary
4418 to call mark_call_clobbered. */
4419 gcc_assert (TREE_READONLY (builtin_decl));
4420 }
4421
4422 if (alignment_support_scheme == dr_explicit_realign)
4423 return msq;
4424
4425 gcc_assert (!compute_in_loop);
4426 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4427
4428
4429 /* 5. Create msq = phi <msq_init, lsq> in loop */
4430
4431 pe = loop_preheader_edge (containing_loop);
4432 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4433 msq = make_ssa_name (vec_dest, NULL);
4434 phi_stmt = create_phi_node (msq, containing_loop->header);
4435 SSA_NAME_DEF_STMT (msq) = phi_stmt;
60d535d2 4436 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
fb85abff 4437
4438 return msq;
4439}
4440
4441
ee612634 4442/* Function vect_grouped_load_supported.
fb85abff 4443
42f6a6e8 4444 Returns TRUE if even and odd permutations are supported,
fb85abff 4445 and FALSE otherwise. */
4446
4447bool
ee612634 4448vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
fb85abff 4449{
42f6a6e8 4450 enum machine_mode mode = TYPE_MODE (vectype);
fb85abff 4451
481fc474 4452 /* vect_permute_load_chain requires the group size to be a power of two. */
4453 if (exact_log2 (count) == -1)
4454 {
4455 if (vect_print_dump_info (REPORT_DETAILS))
ee612634 4456 fprintf (vect_dump, "the size of the group of accesses"
481fc474 4457 " is not a power of 2");
4458 return false;
4459 }
4460
42f6a6e8 4461 /* Check that the permutation is supported. */
4462 if (VECTOR_MODE_P (mode))
4463 {
4464 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4465 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
fb85abff 4466
42f6a6e8 4467 for (i = 0; i < nelt; i++)
4468 sel[i] = i * 2;
4469 if (can_vec_perm_p (mode, false, sel))
4470 {
4471 for (i = 0; i < nelt; i++)
4472 sel[i] = i * 2 + 1;
4473 if (can_vec_perm_p (mode, false, sel))
4474 return true;
4475 }
4476 }
fb85abff 4477
6620d7d7 4478 if (vect_print_dump_info (REPORT_DETAILS))
4479 fprintf (vect_dump, "extract even/odd not supported by target");
4480 return false;
fb85abff 4481}
4482
94b7b4dd 4483/* Return TRUE if vec_load_lanes is available for COUNT vectors of
4484 type VECTYPE. */
4485
4486bool
4487vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4488{
4489 return vect_lanes_optab_supported_p ("vec_load_lanes",
4490 vec_load_lanes_optab,
4491 vectype, count);
4492}
fb85abff 4493
4494/* Function vect_permute_load_chain.
4495
4496 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
48e1416a 4497 a power of 2, generate extract_even/odd stmts to reorder the input data
282bf14c 4498 correctly. Return the final references for loads in RESULT_CHAIN.
fb85abff 4499
4500 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4501 The input is 4 vectors each containing 8 elements. We assign a number to each
4502 element, the input sequence is:
4503
4504 1st vec: 0 1 2 3 4 5 6 7
4505 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 4506 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 4507 4th vec: 24 25 26 27 28 29 30 31
4508
4509 The output sequence should be:
4510
4511 1st vec: 0 4 8 12 16 20 24 28
4512 2nd vec: 1 5 9 13 17 21 25 29
48e1416a 4513 3rd vec: 2 6 10 14 18 22 26 30
fb85abff 4514 4th vec: 3 7 11 15 19 23 27 31
4515
4516 i.e., the first output vector should contain the first elements of each
4517 interleaving group, etc.
4518
282bf14c 4519 We use extract_even/odd instructions to create such output. The input of
4520 each extract_even/odd operation is two vectors
48e1416a 4521 1st vec 2nd vec
4522 0 1 2 3 4 5 6 7
fb85abff 4523
282bf14c 4524 and the output is the vector of extracted even/odd elements. The output of
fb85abff 4525 extract_even will be: 0 2 4 6
4526 and of extract_odd: 1 3 5 7
4527
48e1416a 4528
282bf14c 4529 The permutation is done in log LENGTH stages. In each stage extract_even
4530 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4531 their order. In our example,
fb85abff 4532
4533 E1: extract_even (1st vec, 2nd vec)
4534 E2: extract_odd (1st vec, 2nd vec)
4535 E3: extract_even (3rd vec, 4th vec)
4536 E4: extract_odd (3rd vec, 4th vec)
4537
4538 The output for the first stage will be:
4539
4540 E1: 0 2 4 6 8 10 12 14
4541 E2: 1 3 5 7 9 11 13 15
48e1416a 4542 E3: 16 18 20 22 24 26 28 30
fb85abff 4543 E4: 17 19 21 23 25 27 29 31
4544
4545 In order to proceed and create the correct sequence for the next stage (or
48e1416a 4546 for the correct output, if the second stage is the last one, as in our
4547 example), we first put the output of extract_even operation and then the
fb85abff 4548 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4549 The input for the second stage is:
4550
4551 1st vec (E1): 0 2 4 6 8 10 12 14
48e1416a 4552 2nd vec (E3): 16 18 20 22 24 26 28 30
4553 3rd vec (E2): 1 3 5 7 9 11 13 15
fb85abff 4554 4th vec (E4): 17 19 21 23 25 27 29 31
4555
4556 The output of the second stage:
4557
4558 E1: 0 4 8 12 16 20 24 28
4559 E2: 2 6 10 14 18 22 26 30
4560 E3: 1 5 9 13 17 21 25 29
4561 E4: 3 7 11 15 19 23 27 31
4562
4563 And RESULT_CHAIN after reordering:
4564
4565 1st vec (E1): 0 4 8 12 16 20 24 28
4566 2nd vec (E3): 1 5 9 13 17 21 25 29
48e1416a 4567 3rd vec (E2): 2 6 10 14 18 22 26 30
fb85abff 4568 4th vec (E4): 3 7 11 15 19 23 27 31. */
4569
481fc474 4570static void
48e1416a 4571vect_permute_load_chain (VEC(tree,heap) *dr_chain,
4572 unsigned int length,
fb85abff 4573 gimple stmt,
4574 gimple_stmt_iterator *gsi,
4575 VEC(tree,heap) **result_chain)
4576{
4577 tree perm_dest, data_ref, first_vect, second_vect;
42f6a6e8 4578 tree perm_mask_even, perm_mask_odd;
fb85abff 4579 gimple perm_stmt;
4580 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
42f6a6e8 4581 unsigned int i, j, log_length = exact_log2 (length);
4582 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4583 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
fb85abff 4584
4585 *result_chain = VEC_copy (tree, heap, dr_chain);
42f6a6e8 4586
4587 for (i = 0; i < nelt; ++i)
4588 sel[i] = i * 2;
4589 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4590 gcc_assert (perm_mask_even != NULL);
4591
4592 for (i = 0; i < nelt; ++i)
4593 sel[i] = i * 2 + 1;
4594 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4595 gcc_assert (perm_mask_odd != NULL);
4596
4597 for (i = 0; i < log_length; i++)
fb85abff 4598 {
42f6a6e8 4599 for (j = 0; j < length; j += 2)
fb85abff 4600 {
4601 first_vect = VEC_index (tree, dr_chain, j);
4602 second_vect = VEC_index (tree, dr_chain, j+1);
4603
4604 /* data_ref = permute_even (first_data_ref, second_data_ref); */
4605 perm_dest = create_tmp_var (vectype, "vect_perm_even");
4606 DECL_GIMPLE_REG_P (perm_dest) = 1;
4607 add_referenced_var (perm_dest);
4608
42f6a6e8 4609 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4610 first_vect, second_vect,
4611 perm_mask_even);
fb85abff 4612
4613 data_ref = make_ssa_name (perm_dest, perm_stmt);
4614 gimple_assign_set_lhs (perm_stmt, data_ref);
4615 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
fb85abff 4616
48e1416a 4617 VEC_replace (tree, *result_chain, j/2, data_ref);
4618
fb85abff 4619 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
4620 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
4621 DECL_GIMPLE_REG_P (perm_dest) = 1;
4622 add_referenced_var (perm_dest);
4623
42f6a6e8 4624 perm_stmt = gimple_build_assign_with_ops3 (VEC_PERM_EXPR, perm_dest,
4625 first_vect, second_vect,
4626 perm_mask_odd);
4627
fb85abff 4628 data_ref = make_ssa_name (perm_dest, perm_stmt);
4629 gimple_assign_set_lhs (perm_stmt, data_ref);
4630 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
fb85abff 4631
4632 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
4633 }
4634 dr_chain = VEC_copy (tree, heap, *result_chain);
4635 }
fb85abff 4636}
4637
4638
ee612634 4639/* Function vect_transform_grouped_load.
fb85abff 4640
4641 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4642 to perform their permutation and ascribe the result vectorized statements to
4643 the scalar statements.
4644*/
4645
481fc474 4646void
ee612634 4647vect_transform_grouped_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
fb85abff 4648 gimple_stmt_iterator *gsi)
4649{
fb85abff 4650 VEC(tree,heap) *result_chain = NULL;
fb85abff 4651
48e1416a 4652 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4653 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
fb85abff 4654 vectors, that are ready for vector computation. */
4655 result_chain = VEC_alloc (tree, heap, size);
481fc474 4656 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
ee612634 4657 vect_record_grouped_load_vectors (stmt, result_chain);
94b7b4dd 4658 VEC_free (tree, heap, result_chain);
4659}
4660
ee612634 4661/* RESULT_CHAIN contains the output of a group of grouped loads that were
94b7b4dd 4662 generated as part of the vectorization of STMT. Assign the statement
4663 for each vector to the associated scalar statement. */
4664
4665void
ee612634 4666vect_record_grouped_load_vectors (gimple stmt, VEC(tree,heap) *result_chain)
94b7b4dd 4667{
21009880 4668 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
94b7b4dd 4669 gimple next_stmt, new_stmt;
4670 unsigned int i, gap_count;
4671 tree tmp_data_ref;
fb85abff 4672
48e1416a 4673 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4674 Since we scan the chain starting from it's first node, their order
fb85abff 4675 corresponds the order of data-refs in RESULT_CHAIN. */
4676 next_stmt = first_stmt;
4677 gap_count = 1;
48148244 4678 FOR_EACH_VEC_ELT (tree, result_chain, i, tmp_data_ref)
fb85abff 4679 {
4680 if (!next_stmt)
4681 break;
4682
282bf14c 4683 /* Skip the gaps. Loads created for the gaps will be removed by dead
4684 code elimination pass later. No need to check for the first stmt in
fb85abff 4685 the group, since it always exists.
21009880 4686 GROUP_GAP is the number of steps in elements from the previous
4687 access (if there is no gap GROUP_GAP is 1). We skip loads that
282bf14c 4688 correspond to the gaps. */
48e1416a 4689 if (next_stmt != first_stmt
21009880 4690 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
fb85abff 4691 {
4692 gap_count++;
4693 continue;
4694 }
4695
4696 while (next_stmt)
4697 {
4698 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4699 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4700 copies, and we put the new vector statement in the first available
4701 RELATED_STMT. */
4702 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4703 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4704 else
4705 {
21009880 4706 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
fb85abff 4707 {
4708 gimple prev_stmt =
4709 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4710 gimple rel_stmt =
4711 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4712 while (rel_stmt)
4713 {
4714 prev_stmt = rel_stmt;
48e1416a 4715 rel_stmt =
fb85abff 4716 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4717 }
4718
48e1416a 4719 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
fb85abff 4720 new_stmt;
4721 }
4722 }
4723
21009880 4724 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
fb85abff 4725 gap_count = 1;
4726 /* If NEXT_STMT accesses the same DR as the previous statement,
4727 put the same TMP_DATA_REF as its vectorized statement; otherwise
4728 get the next data-ref from RESULT_CHAIN. */
21009880 4729 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
fb85abff 4730 break;
4731 }
4732 }
fb85abff 4733}
4734
4735/* Function vect_force_dr_alignment_p.
4736
4737 Returns whether the alignment of a DECL can be forced to be aligned
4738 on ALIGNMENT bit boundary. */
4739
48e1416a 4740bool
fb85abff 4741vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
4742{
4743 if (TREE_CODE (decl) != VAR_DECL)
4744 return false;
4745
1f0f7e1e 4746 /* We cannot change alignment of common or external symbols as another
4747 translation unit may contain a definition with lower alignment.
4748 The rules of common symbol linking mean that the definition
4749 will override the common symbol. */
4750 if (DECL_EXTERNAL (decl)
4751 || DECL_COMMON (decl))
fb85abff 4752 return false;
4753
4754 if (TREE_ASM_WRITTEN (decl))
4755 return false;
4756
0bbf0fd8 4757 /* Do not override the alignment as specified by the ABI when the used
4758 attribute is set. */
4759 if (DECL_PRESERVE_P (decl))
5f8265c3 4760 return false;
4761
fb85abff 4762 if (TREE_STATIC (decl))
4763 return (alignment <= MAX_OFILE_ALIGNMENT);
4764 else
4765 return (alignment <= MAX_STACK_ALIGNMENT);
4766}
4767
fb85abff 4768
0822b158 4769/* Return whether the data reference DR is supported with respect to its
4770 alignment.
4771 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
4772 it is aligned, i.e., check if it is possible to vectorize it with different
fb85abff 4773 alignment. */
4774
4775enum dr_alignment_support
0822b158 4776vect_supportable_dr_alignment (struct data_reference *dr,
4777 bool check_aligned_accesses)
fb85abff 4778{
4779 gimple stmt = DR_STMT (dr);
4780 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4781 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
bc620c5c 4782 enum machine_mode mode = TYPE_MODE (vectype);
37545e54 4783 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4784 struct loop *vect_loop = NULL;
4785 bool nested_in_vect_loop = false;
fb85abff 4786
0822b158 4787 if (aligned_access_p (dr) && !check_aligned_accesses)
fb85abff 4788 return dr_aligned;
4789
ad074595 4790 if (loop_vinfo)
4791 {
4792 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
4793 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
4794 }
37545e54 4795
fb85abff 4796 /* Possibly unaligned access. */
4797
4798 /* We can choose between using the implicit realignment scheme (generating
4799 a misaligned_move stmt) and the explicit realignment scheme (generating
282bf14c 4800 aligned loads with a REALIGN_LOAD). There are two variants to the
4801 explicit realignment scheme: optimized, and unoptimized.
fb85abff 4802 We can optimize the realignment only if the step between consecutive
4803 vector loads is equal to the vector size. Since the vector memory
4804 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
4805 is guaranteed that the misalignment amount remains the same throughout the
4806 execution of the vectorized loop. Therefore, we can create the
4807 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
4808 at the loop preheader.
4809
4810 However, in the case of outer-loop vectorization, when vectorizing a
4811 memory access in the inner-loop nested within the LOOP that is now being
4812 vectorized, while it is guaranteed that the misalignment of the
4813 vectorized memory access will remain the same in different outer-loop
4814 iterations, it is *not* guaranteed that is will remain the same throughout
4815 the execution of the inner-loop. This is because the inner-loop advances
4816 with the original scalar step (and not in steps of VS). If the inner-loop
4817 step happens to be a multiple of VS, then the misalignment remains fixed
4818 and we can use the optimized realignment scheme. For example:
4819
4820 for (i=0; i<N; i++)
4821 for (j=0; j<M; j++)
4822 s += a[i+j];
4823
4824 When vectorizing the i-loop in the above example, the step between
4825 consecutive vector loads is 1, and so the misalignment does not remain
4826 fixed across the execution of the inner-loop, and the realignment cannot
4827 be optimized (as illustrated in the following pseudo vectorized loop):
4828
4829 for (i=0; i<N; i+=4)
4830 for (j=0; j<M; j++){
4831 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
4832 // when j is {0,1,2,3,4,5,6,7,...} respectively.
4833 // (assuming that we start from an aligned address).
4834 }
4835
4836 We therefore have to use the unoptimized realignment scheme:
4837
4838 for (i=0; i<N; i+=4)
4839 for (j=k; j<M; j+=4)
4840 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
4841 // that the misalignment of the initial address is
4842 // 0).
4843
4844 The loop can then be vectorized as follows:
4845
4846 for (k=0; k<4; k++){
4847 rt = get_realignment_token (&vp[k]);
4848 for (i=0; i<N; i+=4){
4849 v1 = vp[i+k];
4850 for (j=k; j<M; j+=4){
4851 v2 = vp[i+j+VS-1];
4852 va = REALIGN_LOAD <v1,v2,rt>;
4853 vs += va;
4854 v1 = v2;
4855 }
4856 }
4857 } */
4858
4859 if (DR_IS_READ (dr))
4860 {
c6b19c5f 4861 bool is_packed = false;
4862 tree type = (TREE_TYPE (DR_REF (dr)));
4863
d6bf3b14 4864 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
fb85abff 4865 && (!targetm.vectorize.builtin_mask_for_load
4866 || targetm.vectorize.builtin_mask_for_load ()))
4867 {
4868 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ad074595 4869 if ((nested_in_vect_loop
4870 && (TREE_INT_CST_LOW (DR_STEP (dr))
4871 != GET_MODE_SIZE (TYPE_MODE (vectype))))
4872 || !loop_vinfo)
fb85abff 4873 return dr_explicit_realign;
4874 else
4875 return dr_explicit_realign_optimized;
4876 }
c6b19c5f 4877 if (!known_alignment_for_access_p (dr))
95539e1d 4878 is_packed = contains_packed_reference (DR_REF (dr));
48e1416a 4879
c6b19c5f 4880 if (targetm.vectorize.
202d6e5f 4881 support_vector_misalignment (mode, type,
4882 DR_MISALIGNMENT (dr), is_packed))
fb85abff 4883 /* Can't software pipeline the loads, but can at least do them. */
4884 return dr_unaligned_supported;
4885 }
c6b19c5f 4886 else
4887 {
4888 bool is_packed = false;
4889 tree type = (TREE_TYPE (DR_REF (dr)));
fb85abff 4890
c6b19c5f 4891 if (!known_alignment_for_access_p (dr))
95539e1d 4892 is_packed = contains_packed_reference (DR_REF (dr));
48e1416a 4893
c6b19c5f 4894 if (targetm.vectorize.
202d6e5f 4895 support_vector_misalignment (mode, type,
4896 DR_MISALIGNMENT (dr), is_packed))
c6b19c5f 4897 return dr_unaligned_supported;
4898 }
48e1416a 4899
fb85abff 4900 /* Unsupported. */
4901 return dr_unaligned_unsupported;
4902}