]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-data-refs.c
* expr.h: Remove prototypes of functions defined in builtins.c.
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
CommitLineData
48e1416a 1/* Data References Analysis and Manipulation Utilities for Vectorization.
3aea1f79 2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
7bd765d4 25#include "dumpfile.h"
fb85abff 26#include "tm.h"
fb85abff 27#include "tree.h"
9ed99284 28#include "stor-layout.h"
218a4f88 29#include "tm_p.h"
fb85abff 30#include "target.h"
31#include "basic-block.h"
ce084dfc 32#include "gimple-pretty-print.h"
bc61cadb 33#include "tree-ssa-alias.h"
34#include "internal-fn.h"
35#include "tree-eh.h"
36#include "gimple-expr.h"
37#include "is-a.h"
073c1fd5 38#include "gimple.h"
a8783bee 39#include "gimplify.h"
dcf1a1ec 40#include "gimple-iterator.h"
e795d6e1 41#include "gimplify-me.h"
073c1fd5 42#include "gimple-ssa.h"
43#include "tree-phinodes.h"
44#include "ssa-iterators.h"
9ed99284 45#include "stringpool.h"
073c1fd5 46#include "tree-ssanames.h"
05d9c18a 47#include "tree-ssa-loop-ivopts.h"
48#include "tree-ssa-loop-manip.h"
073c1fd5 49#include "tree-ssa-loop.h"
b9ed1410 50#include "dumpfile.h"
fb85abff 51#include "cfgloop.h"
fb85abff 52#include "tree-chrec.h"
53#include "tree-scalar-evolution.h"
54#include "tree-vectorizer.h"
0b205f4c 55#include "diagnostic-core.h"
d09768a4 56#include "cgraph.h"
8e3cb73b 57/* Need to include rtl.h, expr.h, etc. for optabs. */
58#include "expr.h"
59#include "optabs.h"
f7715905 60#include "builtins.h"
fb85abff 61
94b7b4dd 62/* Return true if load- or store-lanes optab OPTAB is implemented for
63 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
64
65static bool
66vect_lanes_optab_supported_p (const char *name, convert_optab optab,
67 tree vectype, unsigned HOST_WIDE_INT count)
68{
69 enum machine_mode mode, array_mode;
70 bool limit_p;
71
72 mode = TYPE_MODE (vectype);
73 limit_p = !targetm.array_mode_supported_p (mode, count);
74 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
75 MODE_INT, limit_p);
76
77 if (array_mode == BLKmode)
78 {
6d8fb6cf 79 if (dump_enabled_p ())
78bb46f5 80 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
81 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]\n",
7bd765d4 82 GET_MODE_NAME (mode), count);
94b7b4dd 83 return false;
84 }
85
86 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
87 {
6d8fb6cf 88 if (dump_enabled_p ())
7bd765d4 89 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 90 "cannot use %s<%s><%s>\n", name,
7bd765d4 91 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
94b7b4dd 92 return false;
93 }
94
6d8fb6cf 95 if (dump_enabled_p ())
7bd765d4 96 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 97 "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
7bd765d4 98 GET_MODE_NAME (mode));
94b7b4dd 99
100 return true;
101}
102
103
fb85abff 104/* Return the smallest scalar part of STMT.
282bf14c 105 This is used to determine the vectype of the stmt. We generally set the
106 vectype according to the type of the result (lhs). For stmts whose
fb85abff 107 result-type is different than the type of the arguments (e.g., demotion,
48e1416a 108 promotion), vectype will be reset appropriately (later). Note that we have
fb85abff 109 to visit the smallest datatype in this function, because that determines the
282bf14c 110 VF. If the smallest datatype in the loop is present only as the rhs of a
fb85abff 111 promotion operation - we'd miss it.
112 Such a case, where a variable of this datatype does not appear in the lhs
113 anywhere in the loop, can only occur if it's an invariant: e.g.:
48e1416a 114 'int_x = (int) short_inv', which we'd expect to have been optimized away by
282bf14c 115 invariant motion. However, we cannot rely on invariant motion to always
116 take invariants out of the loop, and so in the case of promotion we also
117 have to check the rhs.
fb85abff 118 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
119 types. */
120
121tree
122vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
123 HOST_WIDE_INT *rhs_size_unit)
124{
125 tree scalar_type = gimple_expr_type (stmt);
126 HOST_WIDE_INT lhs, rhs;
127
f9ae6f95 128 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
fb85abff 129
130 if (is_gimple_assign (stmt)
131 && (gimple_assign_cast_p (stmt)
132 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
1c928a0c 133 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
fb85abff 134 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
135 {
136 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
137
f9ae6f95 138 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
fb85abff 139 if (rhs < lhs)
140 scalar_type = rhs_type;
141 }
48e1416a 142
143 *lhs_size_unit = lhs;
fb85abff 144 *rhs_size_unit = rhs;
145 return scalar_type;
146}
147
148
fb85abff 149/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
150 tested at run-time. Return TRUE if DDR was successfully inserted.
151 Return false if versioning is not supported. */
152
153static bool
154vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
155{
156 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
157
158 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
159 return false;
160
6d8fb6cf 161 if (dump_enabled_p ())
fb85abff 162 {
7bd765d4 163 dump_printf_loc (MSG_NOTE, vect_location,
164 "mark for run-time aliasing test between ");
165 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
166 dump_printf (MSG_NOTE, " and ");
167 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
78bb46f5 168 dump_printf (MSG_NOTE, "\n");
fb85abff 169 }
170
171 if (optimize_loop_nest_for_size_p (loop))
172 {
6d8fb6cf 173 if (dump_enabled_p ())
78bb46f5 174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
175 "versioning not supported when optimizing"
176 " for size.\n");
fb85abff 177 return false;
178 }
179
180 /* FORNOW: We don't support versioning with outer-loop vectorization. */
181 if (loop->inner)
182 {
6d8fb6cf 183 if (dump_enabled_p ())
78bb46f5 184 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
185 "versioning not yet supported for outer-loops.\n");
fb85abff 186 return false;
187 }
188
f634c3e9 189 /* FORNOW: We don't support creating runtime alias tests for non-constant
190 step. */
191 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
192 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
193 {
6d8fb6cf 194 if (dump_enabled_p ())
78bb46f5 195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 196 "versioning not yet supported for non-constant "
78bb46f5 197 "step\n");
f634c3e9 198 return false;
199 }
200
f1f41a6c 201 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
fb85abff 202 return true;
203}
204
37545e54 205
fb85abff 206/* Function vect_analyze_data_ref_dependence.
207
208 Return TRUE if there (might) exist a dependence between a memory-reference
209 DRA and a memory-reference DRB. When versioning for alias may check a
91a74fc6 210 dependence at run-time, return FALSE. Adjust *MAX_VF according to
211 the data dependence. */
48e1416a 212
fb85abff 213static bool
214vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
80508571 215 loop_vec_info loop_vinfo, int *max_vf)
fb85abff 216{
217 unsigned int i;
68f15e9d 218 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 219 struct data_reference *dra = DDR_A (ddr);
220 struct data_reference *drb = DDR_B (ddr);
48e1416a 221 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
fb85abff 222 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
fb85abff 223 lambda_vector dist_v;
224 unsigned int loop_depth;
48e1416a 225
68f15e9d 226 /* In loop analysis all data references should be vectorizable. */
6ea6a380 227 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
228 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
68f15e9d 229 gcc_unreachable ();
6ea6a380 230
68f15e9d 231 /* Independent data accesses. */
fb85abff 232 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
68f15e9d 233 return false;
37545e54 234
68f15e9d 235 if (dra == drb
236 || (DR_IS_READ (dra) && DR_IS_READ (drb)))
fb85abff 237 return false;
48e1416a 238
0f52e33a 239 /* Even if we have an anti-dependence then, as the vectorized loop covers at
240 least two scalar iterations, there is always also a true dependence.
241 As the vectorizer does not re-order loads and stores we can ignore
242 the anti-dependence if TBAA can disambiguate both DRs similar to the
243 case with known negative distance anti-dependences (positive
244 distance anti-dependences would violate TBAA constraints). */
245 if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
246 || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
247 && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
248 get_alias_set (DR_REF (drb))))
249 return false;
48e1416a 250
68f15e9d 251 /* Unknown data dependence. */
fb85abff 252 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
253 {
3d483a94 254 /* If user asserted safelen consecutive iterations can be
255 executed concurrently, assume independence. */
256 if (loop->safelen >= 2)
257 {
258 if (loop->safelen < *max_vf)
259 *max_vf = loop->safelen;
c7a8722c 260 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
3d483a94 261 return false;
262 }
263
95e19962 264 if (STMT_VINFO_GATHER_P (stmtinfo_a)
265 || STMT_VINFO_GATHER_P (stmtinfo_b))
266 {
267 if (dump_enabled_p ())
268 {
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
270 "versioning for alias not supported for: "
271 "can't determine dependence between ");
272 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
273 DR_REF (dra));
274 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
275 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
276 DR_REF (drb));
78bb46f5 277 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
95e19962 278 }
4c2623ad 279 return true;
95e19962 280 }
281
6d8fb6cf 282 if (dump_enabled_p ())
68f15e9d 283 {
284 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
285 "versioning for alias required: "
286 "can't determine dependence between ");
287 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
288 DR_REF (dra));
289 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
290 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
291 DR_REF (drb));
78bb46f5 292 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
68f15e9d 293 }
d4b21757 294
68f15e9d 295 /* Add to list of ddrs that need to be tested at run-time. */
296 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
37545e54 297 }
298
68f15e9d 299 /* Known data dependence. */
fb85abff 300 if (DDR_NUM_DIST_VECTS (ddr) == 0)
301 {
3d483a94 302 /* If user asserted safelen consecutive iterations can be
303 executed concurrently, assume independence. */
304 if (loop->safelen >= 2)
305 {
306 if (loop->safelen < *max_vf)
307 *max_vf = loop->safelen;
c7a8722c 308 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
3d483a94 309 return false;
310 }
311
95e19962 312 if (STMT_VINFO_GATHER_P (stmtinfo_a)
313 || STMT_VINFO_GATHER_P (stmtinfo_b))
314 {
315 if (dump_enabled_p ())
316 {
317 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
318 "versioning for alias not supported for: "
319 "bad dist vector for ");
320 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
321 DR_REF (dra));
322 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
323 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
324 DR_REF (drb));
78bb46f5 325 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
95e19962 326 }
4c2623ad 327 return true;
95e19962 328 }
329
6d8fb6cf 330 if (dump_enabled_p ())
fb85abff 331 {
78bb46f5 332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 333 "versioning for alias required: "
334 "bad dist vector for ");
335 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
336 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
337 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
78bb46f5 338 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 339 }
340 /* Add to list of ddrs that need to be tested at run-time. */
341 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
48e1416a 342 }
fb85abff 343
344 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
f1f41a6c 345 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
fb85abff 346 {
347 int dist = dist_v[loop_depth];
348
6d8fb6cf 349 if (dump_enabled_p ())
7bd765d4 350 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 351 "dependence distance = %d.\n", dist);
fb85abff 352
91a74fc6 353 if (dist == 0)
fb85abff 354 {
6d8fb6cf 355 if (dump_enabled_p ())
fb85abff 356 {
78bb46f5 357 dump_printf_loc (MSG_NOTE, vect_location,
358 "dependence distance == 0 between ");
7bd765d4 359 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
360 dump_printf (MSG_NOTE, " and ");
361 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
78bb46f5 362 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 363 }
364
4d525783 365 /* When we perform grouped accesses and perform implicit CSE
366 by detecting equal accesses and doing disambiguation with
367 runtime alias tests like for
368 .. = a[i];
369 .. = a[i+1];
370 a[i] = ..;
371 a[i+1] = ..;
372 *p = ..;
373 .. = a[i];
374 .. = a[i+1];
375 where we will end up loading { a[i], a[i+1] } once, make
376 sure that inserting group loads before the first load and
377 stores after the last store will do the right thing. */
378 if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
379 && GROUP_SAME_DR_STMT (stmtinfo_a))
380 || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)
381 && GROUP_SAME_DR_STMT (stmtinfo_b)))
382 {
383 gimple earlier_stmt;
384 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
385 if (DR_IS_WRITE
386 (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
387 {
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 390 "READ_WRITE dependence in interleaving."
391 "\n");
4d525783 392 return true;
393 }
fb85abff 394 }
48e1416a 395
91a74fc6 396 continue;
397 }
398
399 if (dist > 0 && DDR_REVERSED_P (ddr))
400 {
401 /* If DDR_REVERSED_P the order of the data-refs in DDR was
402 reversed (to make distance vector positive), and the actual
403 distance is negative. */
6d8fb6cf 404 if (dump_enabled_p ())
7bd765d4 405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 406 "dependence distance negative.\n");
a8cf7702 407 /* Record a negative dependence distance to later limit the
408 amount of stmt copying / unrolling we can perform.
409 Only need to handle read-after-write dependence. */
410 if (DR_IS_READ (drb)
411 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
412 || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
413 STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
91a74fc6 414 continue;
415 }
416
417 if (abs (dist) >= 2
418 && abs (dist) < *max_vf)
419 {
420 /* The dependence distance requires reduction of the maximal
421 vectorization factor. */
422 *max_vf = abs (dist);
6d8fb6cf 423 if (dump_enabled_p ())
7bd765d4 424 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 425 "adjusting maximal vectorization factor to %i\n",
426 *max_vf);
fb85abff 427 }
428
91a74fc6 429 if (abs (dist) >= *max_vf)
fb85abff 430 {
48e1416a 431 /* Dependence distance does not create dependence, as far as
91a74fc6 432 vectorization is concerned, in this case. */
6d8fb6cf 433 if (dump_enabled_p ())
7bd765d4 434 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 435 "dependence distance >= VF.\n");
fb85abff 436 continue;
437 }
438
6d8fb6cf 439 if (dump_enabled_p ())
fb85abff 440 {
7bd765d4 441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 442 "not vectorized, possible dependence "
443 "between data-refs ");
7bd765d4 444 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
445 dump_printf (MSG_NOTE, " and ");
446 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
78bb46f5 447 dump_printf (MSG_NOTE, "\n");
fb85abff 448 }
449
450 return true;
451 }
452
453 return false;
454}
455
456/* Function vect_analyze_data_ref_dependences.
48e1416a 457
fb85abff 458 Examine all the data references in the loop, and make sure there do not
91a74fc6 459 exist any data dependences between them. Set *MAX_VF according to
460 the maximum vectorization factor the data dependences allow. */
48e1416a 461
fb85abff 462bool
68f15e9d 463vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
fb85abff 464{
465 unsigned int i;
fb85abff 466 struct data_dependence_relation *ddr;
467
6d8fb6cf 468 if (dump_enabled_p ())
7bd765d4 469 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 470 "=== vect_analyze_data_ref_dependences ===\n");
68f15e9d 471
c7a8722c 472 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
68f15e9d 473 if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
474 &LOOP_VINFO_DDRS (loop_vinfo),
475 LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
476 return false;
477
478 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
479 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
480 return false;
481
482 return true;
483}
484
485
486/* Function vect_slp_analyze_data_ref_dependence.
487
488 Return TRUE if there (might) exist a dependence between a memory-reference
489 DRA and a memory-reference DRB. When versioning for alias may check a
490 dependence at run-time, return FALSE. Adjust *MAX_VF according to
491 the data dependence. */
492
493static bool
494vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
495{
496 struct data_reference *dra = DDR_A (ddr);
497 struct data_reference *drb = DDR_B (ddr);
498
499 /* We need to check dependences of statements marked as unvectorizable
500 as well, they still can prohibit vectorization. */
501
502 /* Independent data accesses. */
503 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
504 return false;
505
506 if (dra == drb)
507 return false;
508
509 /* Read-read is OK. */
510 if (DR_IS_READ (dra) && DR_IS_READ (drb))
511 return false;
512
1fa434e3 513 /* If dra and drb are part of the same interleaving chain consider
514 them independent. */
515 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
516 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
517 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
518 return false;
519
68f15e9d 520 /* Unknown data dependence. */
521 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
07e3bcbf 522 {
50e6c257 523 if (dump_enabled_p ())
524 {
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
526 "can't determine dependence between ");
527 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
528 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
529 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
78bb46f5 530 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
50e6c257 531 }
07e3bcbf 532 }
50e6c257 533 else if (dump_enabled_p ())
07e3bcbf 534 {
68f15e9d 535 dump_printf_loc (MSG_NOTE, vect_location,
536 "determined dependence between ");
537 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
538 dump_printf (MSG_NOTE, " and ");
539 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
78bb46f5 540 dump_printf (MSG_NOTE, "\n");
07e3bcbf 541 }
48e1416a 542
50e6c257 543 /* We do not vectorize basic blocks with write-write dependencies. */
68f15e9d 544 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
545 return true;
546
50e6c257 547 /* If we have a read-write dependence check that the load is before the store.
68f15e9d 548 When we vectorize basic blocks, vector load can be only before
549 corresponding scalar load, and vector store can be only after its
550 corresponding scalar store. So the order of the acceses is preserved in
551 case the load is before the store. */
50e6c257 552 gimple earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
68f15e9d 553 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
50e6c257 554 {
555 /* That only holds for load-store pairs taking part in vectorization. */
556 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dra)))
557 && STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (drb))))
558 return false;
559 }
68f15e9d 560
561 return true;
562}
563
564
565/* Function vect_analyze_data_ref_dependences.
566
567 Examine all the data references in the basic-block, and make sure there
568 do not exist any data dependences between them. Set *MAX_VF according to
569 the maximum vectorization factor the data dependences allow. */
570
571bool
572vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
573{
574 struct data_dependence_relation *ddr;
575 unsigned int i;
576
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 579 "=== vect_slp_analyze_data_ref_dependences ===\n");
68f15e9d 580
581 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
582 &BB_VINFO_DDRS (bb_vinfo),
583 vNULL, true))
584 return false;
585
586 FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
587 if (vect_slp_analyze_data_ref_dependence (ddr))
fb85abff 588 return false;
589
590 return true;
591}
592
593
594/* Function vect_compute_data_ref_alignment
595
596 Compute the misalignment of the data reference DR.
597
598 Output:
599 1. If during the misalignment computation it is found that the data reference
600 cannot be vectorized then false is returned.
601 2. DR_MISALIGNMENT (DR) is defined.
602
603 FOR NOW: No analysis is actually performed. Misalignment is calculated
604 only for trivial cases. TODO. */
605
606static bool
607vect_compute_data_ref_alignment (struct data_reference *dr)
608{
609 gimple stmt = DR_STMT (dr);
48e1416a 610 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
fb85abff 611 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 612 struct loop *loop = NULL;
fb85abff 613 tree ref = DR_REF (dr);
614 tree vectype;
615 tree base, base_addr;
616 bool base_aligned;
617 tree misalign;
618 tree aligned_to, alignment;
48e1416a 619
6d8fb6cf 620 if (dump_enabled_p ())
7bd765d4 621 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 622 "vect_compute_data_ref_alignment:\n");
fb85abff 623
37545e54 624 if (loop_vinfo)
625 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 626
fb85abff 627 /* Initialize misalignment to unknown. */
628 SET_DR_MISALIGNMENT (dr, -1);
629
a72a38f4 630 /* Strided loads perform only component accesses, misalignment information
631 is irrelevant for them. */
632 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
633 return true;
634
fb85abff 635 misalign = DR_INIT (dr);
636 aligned_to = DR_ALIGNED_TO (dr);
637 base_addr = DR_BASE_ADDRESS (dr);
638 vectype = STMT_VINFO_VECTYPE (stmt_info);
639
640 /* In case the dataref is in an inner-loop of the loop that is being
641 vectorized (LOOP), we use the base and misalignment information
282bf14c 642 relative to the outer-loop (LOOP). This is ok only if the misalignment
fb85abff 643 stays the same throughout the execution of the inner-loop, which is why
644 we have to check that the stride of the dataref in the inner-loop evenly
645 divides by the vector size. */
37545e54 646 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 647 {
648 tree step = DR_STEP (dr);
f9ae6f95 649 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
48e1416a 650
fb85abff 651 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
652 {
6d8fb6cf 653 if (dump_enabled_p ())
7bd765d4 654 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 655 "inner step divides the vector-size.\n");
fb85abff 656 misalign = STMT_VINFO_DR_INIT (stmt_info);
657 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
658 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
659 }
660 else
661 {
6d8fb6cf 662 if (dump_enabled_p ())
7bd765d4 663 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 664 "inner step doesn't divide the vector-size.\n");
fb85abff 665 misalign = NULL_TREE;
666 }
667 }
668
38682b67 669 /* Similarly, if we're doing basic-block vectorization, we can only use
670 base and misalignment information relative to an innermost loop if the
671 misalignment stays the same throughout the execution of the loop.
672 As above, this is the case if the stride of the dataref evenly divides
673 by the vector size. */
674 if (!loop)
675 {
676 tree step = DR_STEP (dr);
f9ae6f95 677 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
38682b67 678
679 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
680 {
6d8fb6cf 681 if (dump_enabled_p ())
78bb46f5 682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683 "SLP: step doesn't divide the vector-size.\n");
38682b67 684 misalign = NULL_TREE;
685 }
686 }
687
fb85abff 688 base = build_fold_indirect_ref (base_addr);
689 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
690
691 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
692 || !misalign)
693 {
6d8fb6cf 694 if (dump_enabled_p ())
fb85abff 695 {
7bd765d4 696 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 697 "Unknown alignment for access: ");
7bd765d4 698 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
78bb46f5 699 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 700 }
701 return true;
702 }
703
48e1416a 704 if ((DECL_P (base)
fb85abff 705 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
706 alignment) >= 0)
707 || (TREE_CODE (base_addr) == SSA_NAME
708 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
709 TREE_TYPE (base_addr)))),
607ce623 710 alignment) >= 0)
957d0361 711 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
fb85abff 712 base_aligned = true;
713 else
48e1416a 714 base_aligned = false;
fb85abff 715
48e1416a 716 if (!base_aligned)
fb85abff 717 {
08f83b65 718 /* Do not change the alignment of global variables here if
719 flag_section_anchors is enabled as we already generated
720 RTL for other functions. Most global variables should
721 have been aligned during the IPA increase_alignment pass. */
722 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
723 || (TREE_STATIC (base) && flag_section_anchors))
fb85abff 724 {
6d8fb6cf 725 if (dump_enabled_p ())
fb85abff 726 {
7bd765d4 727 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 728 "can't force alignment of ref: ");
7bd765d4 729 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
78bb46f5 730 dump_printf (MSG_NOTE, "\n");
fb85abff 731 }
732 return true;
733 }
48e1416a 734
fb85abff 735 /* Force the alignment of the decl.
736 NOTE: This is the only change to the code we make during
737 the analysis phase, before deciding to vectorize the loop. */
6d8fb6cf 738 if (dump_enabled_p ())
0822b158 739 {
7bd765d4 740 dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
741 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
78bb46f5 742 dump_printf (MSG_NOTE, "\n");
0822b158 743 }
744
23e1875f 745 ((dataref_aux *)dr->aux)->base_decl = base;
746 ((dataref_aux *)dr->aux)->base_misaligned = true;
fb85abff 747 }
748
85a846a2 749 /* If this is a backward running DR then first access in the larger
750 vectype actually is N-1 elements before the address in the DR.
751 Adjust misalign accordingly. */
752 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
753 {
754 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
755 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
756 otherwise we wouldn't be here. */
757 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
758 /* PLUS because DR_STEP was negative. */
759 misalign = size_binop (PLUS_EXPR, misalign, offset);
760 }
761
fb85abff 762 /* Modulo alignment. */
763 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
764
e913b5cd 765 if (!tree_fits_uhwi_p (misalign))
fb85abff 766 {
767 /* Negative or overflowed misalignment value. */
6d8fb6cf 768 if (dump_enabled_p ())
7bd765d4 769 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 770 "unexpected misalign value\n");
fb85abff 771 return false;
772 }
773
8c53c46c 774 SET_DR_MISALIGNMENT (dr, tree_to_uhwi (misalign));
fb85abff 775
6d8fb6cf 776 if (dump_enabled_p ())
fb85abff 777 {
7bd765d4 778 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
779 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
780 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
78bb46f5 781 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 782 }
783
784 return true;
785}
786
787
788/* Function vect_compute_data_refs_alignment
789
790 Compute the misalignment of data references in the loop.
791 Return FALSE if a data reference is found that cannot be vectorized. */
792
793static bool
48e1416a 794vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
37545e54 795 bb_vec_info bb_vinfo)
fb85abff 796{
f1f41a6c 797 vec<data_reference_p> datarefs;
fb85abff 798 struct data_reference *dr;
799 unsigned int i;
800
37545e54 801 if (loop_vinfo)
802 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
803 else
804 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
48e1416a 805
f1f41a6c 806 FOR_EACH_VEC_ELT (datarefs, i, dr)
6ea6a380 807 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
808 && !vect_compute_data_ref_alignment (dr))
809 {
810 if (bb_vinfo)
811 {
812 /* Mark unsupported statement as unvectorizable. */
813 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
814 continue;
815 }
816 else
817 return false;
818 }
fb85abff 819
820 return true;
821}
822
823
824/* Function vect_update_misalignment_for_peel
825
826 DR - the data reference whose misalignment is to be adjusted.
827 DR_PEEL - the data reference whose misalignment is being made
828 zero in the vector loop by the peel.
829 NPEEL - the number of iterations in the peel loop if the misalignment
830 of DR_PEEL is known at compile time. */
831
832static void
833vect_update_misalignment_for_peel (struct data_reference *dr,
834 struct data_reference *dr_peel, int npeel)
835{
836 unsigned int i;
f1f41a6c 837 vec<dr_p> same_align_drs;
fb85abff 838 struct data_reference *current_dr;
839 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
840 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
841 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
842 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
843
844 /* For interleaved data accesses the step in the loop must be multiplied by
845 the size of the interleaving group. */
ee612634 846 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
21009880 847 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
ee612634 848 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
21009880 849 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
fb85abff 850
851 /* It can be assumed that the data refs with the same alignment as dr_peel
852 are aligned in the vector loop. */
853 same_align_drs
854 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
f1f41a6c 855 FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
fb85abff 856 {
857 if (current_dr != dr)
858 continue;
859 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
860 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
861 SET_DR_MISALIGNMENT (dr, 0);
862 return;
863 }
864
865 if (known_alignment_for_access_p (dr)
866 && known_alignment_for_access_p (dr_peel))
867 {
f1b8c740 868 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
fb85abff 869 int misal = DR_MISALIGNMENT (dr);
870 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
f1b8c740 871 misal += negative ? -npeel * dr_size : npeel * dr_size;
482a44fa 872 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
fb85abff 873 SET_DR_MISALIGNMENT (dr, misal);
874 return;
875 }
876
6d8fb6cf 877 if (dump_enabled_p ())
78bb46f5 878 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.\n");
fb85abff 879 SET_DR_MISALIGNMENT (dr, -1);
880}
881
882
883/* Function vect_verify_datarefs_alignment
884
885 Return TRUE if all data references in the loop can be
886 handled with respect to alignment. */
887
37545e54 888bool
889vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 890{
f1f41a6c 891 vec<data_reference_p> datarefs;
fb85abff 892 struct data_reference *dr;
893 enum dr_alignment_support supportable_dr_alignment;
894 unsigned int i;
895
37545e54 896 if (loop_vinfo)
897 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
898 else
899 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
900
f1f41a6c 901 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 902 {
903 gimple stmt = DR_STMT (dr);
904 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
905
1ad41595 906 if (!STMT_VINFO_RELEVANT_P (stmt_info))
907 continue;
908
6ea6a380 909 /* For interleaving, only the alignment of the first access matters.
910 Skip statements marked as not vectorizable. */
ee612634 911 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 912 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
6ea6a380 913 || !STMT_VINFO_VECTORIZABLE (stmt_info))
fb85abff 914 continue;
915
506aa8fc 916 /* Strided loads perform only component accesses, alignment is
917 irrelevant for them. */
918 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
919 continue;
920
0822b158 921 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
fb85abff 922 if (!supportable_dr_alignment)
923 {
6d8fb6cf 924 if (dump_enabled_p ())
fb85abff 925 {
926 if (DR_IS_READ (dr))
7bd765d4 927 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
928 "not vectorized: unsupported unaligned load.");
fb85abff 929 else
7bd765d4 930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
931 "not vectorized: unsupported unaligned "
932 "store.");
6ea6a380 933
7bd765d4 934 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
935 DR_REF (dr));
78bb46f5 936 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 937 }
938 return false;
939 }
6d8fb6cf 940 if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
7bd765d4 941 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 942 "Vectorizing an unaligned access.\n");
fb85abff 943 }
944 return true;
945}
946
cfa724cf 947/* Given an memory reference EXP return whether its alignment is less
948 than its size. */
949
950static bool
951not_size_aligned (tree exp)
952{
e913b5cd 953 if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
cfa724cf 954 return true;
955
e913b5cd 956 return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
cfa724cf 957 > get_object_alignment (exp));
958}
fb85abff 959
960/* Function vector_alignment_reachable_p
961
962 Return true if vector alignment for DR is reachable by peeling
963 a few loop iterations. Return false otherwise. */
964
965static bool
966vector_alignment_reachable_p (struct data_reference *dr)
967{
968 gimple stmt = DR_STMT (dr);
969 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
970 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
971
ee612634 972 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
fb85abff 973 {
974 /* For interleaved access we peel only if number of iterations in
975 the prolog loop ({VF - misalignment}), is a multiple of the
976 number of the interleaved accesses. */
977 int elem_size, mis_in_elements;
978 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
979
980 /* FORNOW: handle only known alignment. */
981 if (!known_alignment_for_access_p (dr))
982 return false;
983
984 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
985 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
986
21009880 987 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
fb85abff 988 return false;
989 }
990
991 /* If misalignment is known at the compile time then allow peeling
992 only if natural alignment is reachable through peeling. */
993 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
994 {
48e1416a 995 HOST_WIDE_INT elmsize =
fb85abff 996 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6d8fb6cf 997 if (dump_enabled_p ())
fb85abff 998 {
78bb46f5 999 dump_printf_loc (MSG_NOTE, vect_location,
1000 "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1001 dump_printf (MSG_NOTE,
1002 ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
fb85abff 1003 }
1004 if (DR_MISALIGNMENT (dr) % elmsize)
1005 {
6d8fb6cf 1006 if (dump_enabled_p ())
78bb46f5 1007 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1008 "data size does not divide the misalignment.\n");
fb85abff 1009 return false;
1010 }
1011 }
1012
1013 if (!known_alignment_for_access_p (dr))
1014 {
cfa724cf 1015 tree type = TREE_TYPE (DR_REF (dr));
1016 bool is_packed = not_size_aligned (DR_REF (dr));
6d8fb6cf 1017 if (dump_enabled_p ())
78bb46f5 1018 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1019 "Unknown misalignment, is_packed = %d\n",is_packed);
2cd0995e 1020 if ((TYPE_USER_ALIGN (type) && !is_packed)
1021 || targetm.vectorize.vector_alignment_reachable (type, is_packed))
fb85abff 1022 return true;
1023 else
1024 return false;
1025 }
1026
1027 return true;
1028}
1029
0822b158 1030
1031/* Calculate the cost of the memory access represented by DR. */
1032
f97dec81 1033static void
0822b158 1034vect_get_data_access_cost (struct data_reference *dr,
1035 unsigned int *inside_cost,
f97dec81 1036 unsigned int *outside_cost,
1037 stmt_vector_for_cost *body_cost_vec)
0822b158 1038{
1039 gimple stmt = DR_STMT (dr);
1040 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1041 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1042 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1043 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1044 int ncopies = vf / nunits;
0822b158 1045
1ad41595 1046 if (DR_IS_READ (dr))
f97dec81 1047 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1048 NULL, body_cost_vec, false);
0822b158 1049 else
f97dec81 1050 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
0822b158 1051
6d8fb6cf 1052 if (dump_enabled_p ())
7bd765d4 1053 dump_printf_loc (MSG_NOTE, vect_location,
1054 "vect_get_data_access_cost: inside_cost = %d, "
78bb46f5 1055 "outside_cost = %d.\n", *inside_cost, *outside_cost);
0822b158 1056}
1057
1058
0822b158 1059/* Insert DR into peeling hash table with NPEEL as key. */
1060
1061static void
1062vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1063 int npeel)
1064{
1065 struct _vect_peel_info elem, *slot;
3e871d4d 1066 _vect_peel_info **new_slot;
0822b158 1067 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1068
1069 elem.npeel = npeel;
3e871d4d 1070 slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find (&elem);
0822b158 1071 if (slot)
1072 slot->count++;
1073 else
1074 {
1075 slot = XNEW (struct _vect_peel_info);
1076 slot->npeel = npeel;
1077 slot->dr = dr;
1078 slot->count = 1;
3e871d4d 1079 new_slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find_slot (slot, INSERT);
0822b158 1080 *new_slot = slot;
1081 }
1082
3e398f5b 1083 if (!supportable_dr_alignment
1084 && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
0822b158 1085 slot->count += VECT_MAX_COST;
1086}
1087
1088
1089/* Traverse peeling hash table to find peeling option that aligns maximum
1090 number of data accesses. */
1091
3e871d4d 1092int
1093vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1094 _vect_peel_extended_info *max)
0822b158 1095{
3e871d4d 1096 vect_peel_info elem = *slot;
0822b158 1097
593fa4d1 1098 if (elem->count > max->peel_info.count
1099 || (elem->count == max->peel_info.count
1100 && max->peel_info.npeel > elem->npeel))
0822b158 1101 {
1102 max->peel_info.npeel = elem->npeel;
1103 max->peel_info.count = elem->count;
1104 max->peel_info.dr = elem->dr;
1105 }
1106
1107 return 1;
1108}
1109
1110
282bf14c 1111/* Traverse peeling hash table and calculate cost for each peeling option.
1112 Find the one with the lowest cost. */
0822b158 1113
3e871d4d 1114int
1115vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1116 _vect_peel_extended_info *min)
0822b158 1117{
3e871d4d 1118 vect_peel_info elem = *slot;
0822b158 1119 int save_misalignment, dummy;
1120 unsigned int inside_cost = 0, outside_cost = 0, i;
1121 gimple stmt = DR_STMT (elem->dr);
1122 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
f1f41a6c 1124 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
0822b158 1125 struct data_reference *dr;
f97dec81 1126 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1127 int single_iter_cost;
1128
f1f41a6c 1129 prologue_cost_vec.create (2);
1130 body_cost_vec.create (2);
1131 epilogue_cost_vec.create (2);
0822b158 1132
f1f41a6c 1133 FOR_EACH_VEC_ELT (datarefs, i, dr)
0822b158 1134 {
1135 stmt = DR_STMT (dr);
1136 stmt_info = vinfo_for_stmt (stmt);
1137 /* For interleaving, only the alignment of the first access
1138 matters. */
ee612634 1139 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 1140 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
0822b158 1141 continue;
1142
1143 save_misalignment = DR_MISALIGNMENT (dr);
1144 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
f97dec81 1145 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1146 &body_cost_vec);
0822b158 1147 SET_DR_MISALIGNMENT (dr, save_misalignment);
1148 }
1149
f97dec81 1150 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1151 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1152 &dummy, single_iter_cost,
1153 &prologue_cost_vec,
1154 &epilogue_cost_vec);
1155
1156 /* Prologue and epilogue costs are added to the target model later.
1157 These costs depend only on the scalar iteration cost, the
1158 number of peeling iterations finally chosen, and the number of
1159 misaligned statements. So discard the information found here. */
f1f41a6c 1160 prologue_cost_vec.release ();
1161 epilogue_cost_vec.release ();
0822b158 1162
1163 if (inside_cost < min->inside_cost
1164 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1165 {
1166 min->inside_cost = inside_cost;
1167 min->outside_cost = outside_cost;
f1f41a6c 1168 min->body_cost_vec.release ();
f97dec81 1169 min->body_cost_vec = body_cost_vec;
0822b158 1170 min->peel_info.dr = elem->dr;
1171 min->peel_info.npeel = elem->npeel;
1172 }
f97dec81 1173 else
f1f41a6c 1174 body_cost_vec.release ();
0822b158 1175
1176 return 1;
1177}
1178
1179
1180/* Choose best peeling option by traversing peeling hash table and either
1181 choosing an option with the lowest cost (if cost model is enabled) or the
1182 option that aligns as many accesses as possible. */
1183
1184static struct data_reference *
1185vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
4db2b577 1186 unsigned int *npeel,
f97dec81 1187 stmt_vector_for_cost *body_cost_vec)
0822b158 1188{
1189 struct _vect_peel_extended_info res;
1190
1191 res.peel_info.dr = NULL;
9af5ce0c 1192 res.body_cost_vec = stmt_vector_for_cost ();
0822b158 1193
3e398f5b 1194 if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
0822b158 1195 {
1196 res.inside_cost = INT_MAX;
1197 res.outside_cost = INT_MAX;
3e871d4d 1198 LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1199 .traverse <_vect_peel_extended_info *,
1200 vect_peeling_hash_get_lowest_cost> (&res);
0822b158 1201 }
1202 else
1203 {
1204 res.peel_info.count = 0;
3e871d4d 1205 LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1206 .traverse <_vect_peel_extended_info *,
1207 vect_peeling_hash_get_most_frequent> (&res);
0822b158 1208 }
1209
1210 *npeel = res.peel_info.npeel;
f97dec81 1211 *body_cost_vec = res.body_cost_vec;
0822b158 1212 return res.peel_info.dr;
1213}
1214
1215
fb85abff 1216/* Function vect_enhance_data_refs_alignment
1217
1218 This pass will use loop versioning and loop peeling in order to enhance
1219 the alignment of data references in the loop.
1220
1221 FOR NOW: we assume that whatever versioning/peeling takes place, only the
282bf14c 1222 original loop is to be vectorized. Any other loops that are created by
fb85abff 1223 the transformations performed in this pass - are not supposed to be
282bf14c 1224 vectorized. This restriction will be relaxed.
fb85abff 1225
1226 This pass will require a cost model to guide it whether to apply peeling
282bf14c 1227 or versioning or a combination of the two. For example, the scheme that
fb85abff 1228 intel uses when given a loop with several memory accesses, is as follows:
1229 choose one memory access ('p') which alignment you want to force by doing
282bf14c 1230 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
fb85abff 1231 other accesses are not necessarily aligned, or (2) use loop versioning to
1232 generate one loop in which all accesses are aligned, and another loop in
1233 which only 'p' is necessarily aligned.
1234
1235 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1236 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1237 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1238
282bf14c 1239 Devising a cost model is the most critical aspect of this work. It will
fb85abff 1240 guide us on which access to peel for, whether to use loop versioning, how
282bf14c 1241 many versions to create, etc. The cost model will probably consist of
fb85abff 1242 generic considerations as well as target specific considerations (on
1243 powerpc for example, misaligned stores are more painful than misaligned
1244 loads).
1245
1246 Here are the general steps involved in alignment enhancements:
1247
1248 -- original loop, before alignment analysis:
1249 for (i=0; i<N; i++){
1250 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1251 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1252 }
1253
1254 -- After vect_compute_data_refs_alignment:
1255 for (i=0; i<N; i++){
1256 x = q[i]; # DR_MISALIGNMENT(q) = 3
1257 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1258 }
1259
1260 -- Possibility 1: we do loop versioning:
1261 if (p is aligned) {
1262 for (i=0; i<N; i++){ # loop 1A
1263 x = q[i]; # DR_MISALIGNMENT(q) = 3
1264 p[i] = y; # DR_MISALIGNMENT(p) = 0
1265 }
1266 }
1267 else {
1268 for (i=0; i<N; i++){ # loop 1B
1269 x = q[i]; # DR_MISALIGNMENT(q) = 3
1270 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1271 }
1272 }
1273
1274 -- Possibility 2: we do loop peeling:
1275 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1276 x = q[i];
1277 p[i] = y;
1278 }
1279 for (i = 3; i < N; i++){ # loop 2A
1280 x = q[i]; # DR_MISALIGNMENT(q) = 0
1281 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1282 }
1283
1284 -- Possibility 3: combination of loop peeling and versioning:
1285 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1286 x = q[i];
1287 p[i] = y;
1288 }
1289 if (p is aligned) {
1290 for (i = 3; i<N; i++){ # loop 3A
1291 x = q[i]; # DR_MISALIGNMENT(q) = 0
1292 p[i] = y; # DR_MISALIGNMENT(p) = 0
1293 }
1294 }
1295 else {
1296 for (i = 3; i<N; i++){ # loop 3B
1297 x = q[i]; # DR_MISALIGNMENT(q) = 0
1298 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1299 }
1300 }
1301
282bf14c 1302 These loops are later passed to loop_transform to be vectorized. The
fb85abff 1303 vectorizer will use the alignment information to guide the transformation
1304 (whether to generate regular loads/stores, or with special handling for
1305 misalignment). */
1306
1307bool
1308vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1309{
f1f41a6c 1310 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
fb85abff 1311 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1312 enum dr_alignment_support supportable_dr_alignment;
0822b158 1313 struct data_reference *dr0 = NULL, *first_store = NULL;
fb85abff 1314 struct data_reference *dr;
0822b158 1315 unsigned int i, j;
fb85abff 1316 bool do_peeling = false;
1317 bool do_versioning = false;
1318 bool stat;
1319 gimple stmt;
1320 stmt_vec_info stmt_info;
0822b158 1321 unsigned int npeel = 0;
1322 bool all_misalignments_unknown = true;
1323 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1324 unsigned possible_npeel_number = 1;
1325 tree vectype;
1326 unsigned int nelements, mis, same_align_drs_max = 0;
9af5ce0c 1327 stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
fb85abff 1328
6d8fb6cf 1329 if (dump_enabled_p ())
7bd765d4 1330 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1331 "=== vect_enhance_data_refs_alignment ===\n");
fb85abff 1332
1333 /* While cost model enhancements are expected in the future, the high level
1334 view of the code at this time is as follows:
1335
ec2886ed 1336 A) If there is a misaligned access then see if peeling to align
1337 this access can make all data references satisfy
454f25be 1338 vect_supportable_dr_alignment. If so, update data structures
1339 as needed and return true.
fb85abff 1340
1341 B) If peeling wasn't possible and there is a data reference with an
1342 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1343 then see if loop versioning checks can be used to make all data
1344 references satisfy vect_supportable_dr_alignment. If so, update
1345 data structures as needed and return true.
1346
1347 C) If neither peeling nor versioning were successful then return false if
1348 any data reference does not satisfy vect_supportable_dr_alignment.
1349
1350 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1351
1352 Note, Possibility 3 above (which is peeling and versioning together) is not
1353 being done at this time. */
1354
1355 /* (1) Peeling to force alignment. */
1356
1357 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1358 Considerations:
1359 + How many accesses will become aligned due to the peeling
1360 - How many accesses will become unaligned due to the peeling,
1361 and the cost of misaligned accesses.
48e1416a 1362 - The cost of peeling (the extra runtime checks, the increase
0822b158 1363 in code size). */
fb85abff 1364
f1f41a6c 1365 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 1366 {
1367 stmt = DR_STMT (dr);
1368 stmt_info = vinfo_for_stmt (stmt);
1369
1ad41595 1370 if (!STMT_VINFO_RELEVANT_P (stmt_info))
b04940e7 1371 continue;
1372
fb85abff 1373 /* For interleaving, only the alignment of the first access
1374 matters. */
ee612634 1375 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 1376 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
fb85abff 1377 continue;
1378
b04940e7 1379 /* For invariant accesses there is nothing to enhance. */
1380 if (integer_zerop (DR_STEP (dr)))
1381 continue;
1382
f634c3e9 1383 /* Strided loads perform only component accesses, alignment is
1384 irrelevant for them. */
1385 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1386 continue;
1387
0822b158 1388 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1389 do_peeling = vector_alignment_reachable_p (dr);
1390 if (do_peeling)
fb85abff 1391 {
0822b158 1392 if (known_alignment_for_access_p (dr))
1393 {
1394 unsigned int npeel_tmp;
f1b8c740 1395 bool negative = tree_int_cst_compare (DR_STEP (dr),
1396 size_zero_node) < 0;
0822b158 1397
1398 /* Save info about DR in the hash table. */
3e871d4d 1399 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
1400 LOOP_VINFO_PEELING_HTAB (loop_vinfo).create (1);
0822b158 1401
1402 vectype = STMT_VINFO_VECTYPE (stmt_info);
1403 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1404 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1405 TREE_TYPE (DR_REF (dr))));
f1b8c740 1406 npeel_tmp = (negative
184a9a0f 1407 ? (mis - nelements) : (nelements - mis))
1408 & (nelements - 1);
0822b158 1409
1410 /* For multiple types, it is possible that the bigger type access
282bf14c 1411 will have more than one peeling option. E.g., a loop with two
0822b158 1412 types: one of size (vector size / 4), and the other one of
282bf14c 1413 size (vector size / 8). Vectorization factor will 8. If both
0822b158 1414 access are misaligned by 3, the first one needs one scalar
282bf14c 1415 iteration to be aligned, and the second one needs 5. But the
0822b158 1416 the first one will be aligned also by peeling 5 scalar
1417 iterations, and in that case both accesses will be aligned.
1418 Hence, except for the immediate peeling amount, we also want
1419 to try to add full vector size, while we don't exceed
1420 vectorization factor.
1421 We do this automtically for cost model, since we calculate cost
1422 for every peeling option. */
3e398f5b 1423 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
0822b158 1424 possible_npeel_number = vf /nelements;
1425
1426 /* Handle the aligned case. We may decide to align some other
1427 access, making DR unaligned. */
1428 if (DR_MISALIGNMENT (dr) == 0)
1429 {
1430 npeel_tmp = 0;
3e398f5b 1431 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
0822b158 1432 possible_npeel_number++;
1433 }
1434
1435 for (j = 0; j < possible_npeel_number; j++)
1436 {
1437 gcc_assert (npeel_tmp <= vf);
1438 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1439 npeel_tmp += nelements;
1440 }
1441
1442 all_misalignments_unknown = false;
1443 /* Data-ref that was chosen for the case that all the
1444 misalignments are unknown is not relevant anymore, since we
1445 have a data-ref with known alignment. */
1446 dr0 = NULL;
1447 }
1448 else
1449 {
6046367e 1450 /* If we don't know any misalignment values, we prefer
1451 peeling for data-ref that has the maximum number of data-refs
0822b158 1452 with the same alignment, unless the target prefers to align
1453 stores over load. */
1454 if (all_misalignments_unknown)
1455 {
6046367e 1456 unsigned same_align_drs
1457 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1458 if (!dr0
1459 || same_align_drs_max < same_align_drs)
0822b158 1460 {
6046367e 1461 same_align_drs_max = same_align_drs;
0822b158 1462 dr0 = dr;
1463 }
6046367e 1464 /* For data-refs with the same number of related
1465 accesses prefer the one where the misalign
1466 computation will be invariant in the outermost loop. */
1467 else if (same_align_drs_max == same_align_drs)
1468 {
1469 struct loop *ivloop0, *ivloop;
1470 ivloop0 = outermost_invariant_loop_for_expr
1471 (loop, DR_BASE_ADDRESS (dr0));
1472 ivloop = outermost_invariant_loop_for_expr
1473 (loop, DR_BASE_ADDRESS (dr));
1474 if ((ivloop && !ivloop0)
1475 || (ivloop && ivloop0
1476 && flow_loop_nested_p (ivloop, ivloop0)))
1477 dr0 = dr;
1478 }
0822b158 1479
9ff25603 1480 if (!first_store && DR_IS_WRITE (dr))
0822b158 1481 first_store = dr;
1482 }
1483
1484 /* If there are both known and unknown misaligned accesses in the
1485 loop, we choose peeling amount according to the known
1486 accesses. */
0822b158 1487 if (!supportable_dr_alignment)
1488 {
1489 dr0 = dr;
9ff25603 1490 if (!first_store && DR_IS_WRITE (dr))
0822b158 1491 first_store = dr;
1492 }
1493 }
1494 }
1495 else
1496 {
1497 if (!aligned_access_p (dr))
1498 {
6d8fb6cf 1499 if (dump_enabled_p ())
78bb46f5 1500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1501 "vector alignment may not be reachable\n");
0822b158 1502 break;
1503 }
1504 }
fb85abff 1505 }
1506
2cd0995e 1507 /* Check if we can possibly peel the loop. */
1508 if (!vect_can_advance_ivs_p (loop_vinfo)
fb85abff 1509 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1510 do_peeling = false;
1511
0822b158 1512 if (do_peeling && all_misalignments_unknown
1513 && vect_supportable_dr_alignment (dr0, false))
1514 {
1515
1516 /* Check if the target requires to prefer stores over loads, i.e., if
1517 misaligned stores are more expensive than misaligned loads (taking
1518 drs with same alignment into account). */
1519 if (first_store && DR_IS_READ (dr0))
1520 {
1521 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1522 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1523 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1524 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
f1f41a6c 1525 stmt_vector_for_cost dummy;
1526 dummy.create (2);
f97dec81 1527
1528 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1529 &dummy);
1530 vect_get_data_access_cost (first_store, &store_inside_cost,
1531 &store_outside_cost, &dummy);
0822b158 1532
f1f41a6c 1533 dummy.release ();
0822b158 1534
1535 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1536 aligning the load DR0). */
1537 load_inside_penalty = store_inside_cost;
1538 load_outside_penalty = store_outside_cost;
f1f41a6c 1539 for (i = 0;
1540 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1541 DR_STMT (first_store))).iterate (i, &dr);
0822b158 1542 i++)
1543 if (DR_IS_READ (dr))
1544 {
1545 load_inside_penalty += load_inside_cost;
1546 load_outside_penalty += load_outside_cost;
1547 }
1548 else
1549 {
1550 load_inside_penalty += store_inside_cost;
1551 load_outside_penalty += store_outside_cost;
1552 }
1553
1554 /* Calculate the penalty for leaving DR0 unaligned (by
1555 aligning the FIRST_STORE). */
1556 store_inside_penalty = load_inside_cost;
1557 store_outside_penalty = load_outside_cost;
f1f41a6c 1558 for (i = 0;
1559 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1560 DR_STMT (dr0))).iterate (i, &dr);
0822b158 1561 i++)
1562 if (DR_IS_READ (dr))
1563 {
1564 store_inside_penalty += load_inside_cost;
1565 store_outside_penalty += load_outside_cost;
1566 }
1567 else
1568 {
1569 store_inside_penalty += store_inside_cost;
1570 store_outside_penalty += store_outside_cost;
1571 }
1572
1573 if (load_inside_penalty > store_inside_penalty
1574 || (load_inside_penalty == store_inside_penalty
1575 && load_outside_penalty > store_outside_penalty))
1576 dr0 = first_store;
1577 }
1578
1579 /* In case there are only loads with different unknown misalignments, use
1580 peeling only if it may help to align other accesses in the loop. */
f1f41a6c 1581 if (!first_store
1582 && !STMT_VINFO_SAME_ALIGN_REFS (
1583 vinfo_for_stmt (DR_STMT (dr0))).length ()
0822b158 1584 && vect_supportable_dr_alignment (dr0, false)
1585 != dr_unaligned_supported)
1586 do_peeling = false;
1587 }
1588
1589 if (do_peeling && !dr0)
1590 {
1591 /* Peeling is possible, but there is no data access that is not supported
1592 unless aligned. So we try to choose the best possible peeling. */
1593
1594 /* We should get here only if there are drs with known misalignment. */
1595 gcc_assert (!all_misalignments_unknown);
1596
1597 /* Choose the best peeling from the hash table. */
4db2b577 1598 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
f97dec81 1599 &body_cost_vec);
0822b158 1600 if (!dr0 || !npeel)
1601 do_peeling = false;
1602 }
1603
fb85abff 1604 if (do_peeling)
1605 {
0822b158 1606 stmt = DR_STMT (dr0);
1607 stmt_info = vinfo_for_stmt (stmt);
1608 vectype = STMT_VINFO_VECTYPE (stmt_info);
1609 nelements = TYPE_VECTOR_SUBPARTS (vectype);
fb85abff 1610
1611 if (known_alignment_for_access_p (dr0))
1612 {
f1b8c740 1613 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1614 size_zero_node) < 0;
0822b158 1615 if (!npeel)
1616 {
1617 /* Since it's known at compile time, compute the number of
1618 iterations in the peeled loop (the peeling factor) for use in
1619 updating DR_MISALIGNMENT values. The peeling factor is the
1620 vectorization factor minus the misalignment as an element
1621 count. */
1622 mis = DR_MISALIGNMENT (dr0);
1623 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
184a9a0f 1624 npeel = ((negative ? mis - nelements : nelements - mis)
1625 & (nelements - 1));
0822b158 1626 }
fb85abff 1627
48e1416a 1628 /* For interleaved data access every iteration accesses all the
fb85abff 1629 members of the group, therefore we divide the number of iterations
1630 by the group size. */
48e1416a 1631 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
ee612634 1632 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
21009880 1633 npeel /= GROUP_SIZE (stmt_info);
fb85abff 1634
6d8fb6cf 1635 if (dump_enabled_p ())
7bd765d4 1636 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1637 "Try peeling by %d\n", npeel);
fb85abff 1638 }
1639
1640 /* Ensure that all data refs can be vectorized after the peel. */
f1f41a6c 1641 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 1642 {
1643 int save_misalignment;
1644
1645 if (dr == dr0)
1646 continue;
1647
1648 stmt = DR_STMT (dr);
1649 stmt_info = vinfo_for_stmt (stmt);
1650 /* For interleaving, only the alignment of the first access
1651 matters. */
ee612634 1652 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 1653 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
fb85abff 1654 continue;
1655
f634c3e9 1656 /* Strided loads perform only component accesses, alignment is
1657 irrelevant for them. */
1658 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1659 continue;
1660
fb85abff 1661 save_misalignment = DR_MISALIGNMENT (dr);
1662 vect_update_misalignment_for_peel (dr, dr0, npeel);
0822b158 1663 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
fb85abff 1664 SET_DR_MISALIGNMENT (dr, save_misalignment);
48e1416a 1665
fb85abff 1666 if (!supportable_dr_alignment)
1667 {
1668 do_peeling = false;
1669 break;
1670 }
1671 }
1672
0822b158 1673 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1674 {
1675 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1676 if (!stat)
1677 do_peeling = false;
1678 else
9f793cdf 1679 {
f1f41a6c 1680 body_cost_vec.release ();
9f793cdf 1681 return stat;
1682 }
0822b158 1683 }
1684
d7d7032a 1685 if (do_peeling)
1686 {
1687 unsigned max_allowed_peel
1688 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
1689 if (max_allowed_peel != (unsigned)-1)
1690 {
1691 unsigned max_peel = npeel;
1692 if (max_peel == 0)
1693 {
1694 gimple dr_stmt = DR_STMT (dr0);
1695 stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt);
1696 tree vtype = STMT_VINFO_VECTYPE (vinfo);
1697 max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1;
1698 }
1699 if (max_peel > max_allowed_peel)
1700 {
1701 do_peeling = false;
1702 if (dump_enabled_p ())
1703 dump_printf_loc (MSG_NOTE, vect_location,
1704 "Disable peeling, max peels reached: %d\n", max_peel);
1705 }
1706 }
1707 }
1708
fb85abff 1709 if (do_peeling)
1710 {
4db2b577 1711 stmt_info_for_cost *si;
f97dec81 1712 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
4db2b577 1713
fb85abff 1714 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1715 If the misalignment of DR_i is identical to that of dr0 then set
1716 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1717 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1718 by the peeling factor times the element size of DR_i (MOD the
1719 vectorization factor times the size). Otherwise, the
1720 misalignment of DR_i must be set to unknown. */
f1f41a6c 1721 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 1722 if (dr != dr0)
1723 vect_update_misalignment_for_peel (dr, dr0, npeel);
1724
1725 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
0822b158 1726 if (npeel)
313a5120 1727 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
0822b158 1728 else
313a5120 1729 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
1730 = DR_MISALIGNMENT (dr0);
fb85abff 1731 SET_DR_MISALIGNMENT (dr0, 0);
6d8fb6cf 1732 if (dump_enabled_p ())
7bd765d4 1733 {
1734 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1735 "Alignment of access forced using peeling.\n");
7bd765d4 1736 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1737 "Peeling for alignment will be applied.\n");
7bd765d4 1738 }
4db2b577 1739 /* We've delayed passing the inside-loop peeling costs to the
1740 target cost model until we were sure peeling would happen.
1741 Do so now. */
f1f41a6c 1742 if (body_cost_vec.exists ())
4db2b577 1743 {
f1f41a6c 1744 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
f97dec81 1745 {
1746 struct _stmt_vec_info *stmt_info
1747 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1748 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1749 si->misalign, vect_body);
1750 }
f1f41a6c 1751 body_cost_vec.release ();
4db2b577 1752 }
1753
37545e54 1754 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1755 gcc_assert (stat);
1756 return stat;
1757 }
1758 }
1759
f1f41a6c 1760 body_cost_vec.release ();
fb85abff 1761
1762 /* (2) Versioning to force alignment. */
1763
1764 /* Try versioning if:
1dbf9bd1 1765 1) optimize loop for speed
1766 2) there is at least one unsupported misaligned data ref with an unknown
fb85abff 1767 misalignment, and
1dbf9bd1 1768 3) all misaligned data refs with a known misalignment are supported, and
1769 4) the number of runtime alignment checks is within reason. */
fb85abff 1770
48e1416a 1771 do_versioning =
1dbf9bd1 1772 optimize_loop_nest_for_speed_p (loop)
fb85abff 1773 && (!loop->inner); /* FORNOW */
1774
1775 if (do_versioning)
1776 {
f1f41a6c 1777 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 1778 {
1779 stmt = DR_STMT (dr);
1780 stmt_info = vinfo_for_stmt (stmt);
1781
1782 /* For interleaving, only the alignment of the first access
1783 matters. */
1784 if (aligned_access_p (dr)
ee612634 1785 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
21009880 1786 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
fb85abff 1787 continue;
1788
f634c3e9 1789 /* Strided loads perform only component accesses, alignment is
1790 irrelevant for them. */
1791 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1792 continue;
1793
0822b158 1794 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
fb85abff 1795
1796 if (!supportable_dr_alignment)
1797 {
1798 gimple stmt;
1799 int mask;
1800 tree vectype;
1801
1802 if (known_alignment_for_access_p (dr)
f1f41a6c 1803 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
fb85abff 1804 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1805 {
1806 do_versioning = false;
1807 break;
1808 }
1809
1810 stmt = DR_STMT (dr);
1811 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1812 gcc_assert (vectype);
48e1416a 1813
fb85abff 1814 /* The rightmost bits of an aligned address must be zeros.
1815 Construct the mask needed for this test. For example,
1816 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1817 mask must be 15 = 0xf. */
1818 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1819
1820 /* FORNOW: use the same mask to test all potentially unaligned
1821 references in the loop. The vectorizer currently supports
1822 a single vector size, see the reference to
1823 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1824 vectorization factor is computed. */
1825 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1826 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1827 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
f1f41a6c 1828 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
1829 DR_STMT (dr));
fb85abff 1830 }
1831 }
48e1416a 1832
fb85abff 1833 /* Versioning requires at least one misaligned data reference. */
10095225 1834 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
fb85abff 1835 do_versioning = false;
1836 else if (!do_versioning)
f1f41a6c 1837 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
fb85abff 1838 }
1839
1840 if (do_versioning)
1841 {
f1f41a6c 1842 vec<gimple> may_misalign_stmts
fb85abff 1843 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1844 gimple stmt;
1845
1846 /* It can now be assumed that the data references in the statements
1847 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1848 of the loop being vectorized. */
f1f41a6c 1849 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
fb85abff 1850 {
1851 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1852 dr = STMT_VINFO_DATA_REF (stmt_info);
1853 SET_DR_MISALIGNMENT (dr, 0);
6d8fb6cf 1854 if (dump_enabled_p ())
78bb46f5 1855 dump_printf_loc (MSG_NOTE, vect_location,
1856 "Alignment of access forced using versioning.\n");
fb85abff 1857 }
1858
6d8fb6cf 1859 if (dump_enabled_p ())
78bb46f5 1860 dump_printf_loc (MSG_NOTE, vect_location,
1861 "Versioning for alignment will be applied.\n");
fb85abff 1862
1863 /* Peeling and versioning can't be done together at this time. */
1864 gcc_assert (! (do_peeling && do_versioning));
1865
37545e54 1866 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1867 gcc_assert (stat);
1868 return stat;
1869 }
1870
1871 /* This point is reached if neither peeling nor versioning is being done. */
1872 gcc_assert (! (do_peeling || do_versioning));
1873
37545e54 1874 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
fb85abff 1875 return stat;
1876}
1877
1878
91a74fc6 1879/* Function vect_find_same_alignment_drs.
1880
1881 Update group and alignment relations according to the chosen
1882 vectorization factor. */
1883
1884static void
1885vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1886 loop_vec_info loop_vinfo)
1887{
1888 unsigned int i;
1889 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1890 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1891 struct data_reference *dra = DDR_A (ddr);
1892 struct data_reference *drb = DDR_B (ddr);
1893 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1894 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1895 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1896 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1897 lambda_vector dist_v;
1898 unsigned int loop_depth;
1899
1900 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1901 return;
1902
0822b158 1903 if (dra == drb)
91a74fc6 1904 return;
1905
1906 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1907 return;
1908
1909 /* Loop-based vectorization and known data dependence. */
1910 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1911 return;
1912
85a846a2 1913 /* Data-dependence analysis reports a distance vector of zero
1914 for data-references that overlap only in the first iteration
1915 but have different sign step (see PR45764).
1916 So as a sanity check require equal DR_STEP. */
1917 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1918 return;
1919
91a74fc6 1920 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
f1f41a6c 1921 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
91a74fc6 1922 {
1923 int dist = dist_v[loop_depth];
1924
6d8fb6cf 1925 if (dump_enabled_p ())
7bd765d4 1926 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1927 "dependence distance = %d.\n", dist);
91a74fc6 1928
1929 /* Same loop iteration. */
1930 if (dist == 0
1931 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1932 {
1933 /* Two references with distance zero have the same alignment. */
f1f41a6c 1934 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
1935 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
6d8fb6cf 1936 if (dump_enabled_p ())
91a74fc6 1937 {
78bb46f5 1938 dump_printf_loc (MSG_NOTE, vect_location,
1939 "accesses have the same alignment.\n");
7bd765d4 1940 dump_printf (MSG_NOTE,
78bb46f5 1941 "dependence distance modulo vf == 0 between ");
7bd765d4 1942 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
1943 dump_printf (MSG_NOTE, " and ");
1944 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
78bb46f5 1945 dump_printf (MSG_NOTE, "\n");
91a74fc6 1946 }
1947 }
1948 }
1949}
1950
1951
fb85abff 1952/* Function vect_analyze_data_refs_alignment
1953
1954 Analyze the alignment of the data-references in the loop.
1955 Return FALSE if a data reference is found that cannot be vectorized. */
1956
1957bool
48e1416a 1958vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
37545e54 1959 bb_vec_info bb_vinfo)
fb85abff 1960{
6d8fb6cf 1961 if (dump_enabled_p ())
7bd765d4 1962 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1963 "=== vect_analyze_data_refs_alignment ===\n");
fb85abff 1964
91a74fc6 1965 /* Mark groups of data references with same alignment using
1966 data dependence information. */
1967 if (loop_vinfo)
1968 {
f1f41a6c 1969 vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
91a74fc6 1970 struct data_dependence_relation *ddr;
1971 unsigned int i;
1972
f1f41a6c 1973 FOR_EACH_VEC_ELT (ddrs, i, ddr)
91a74fc6 1974 vect_find_same_alignment_drs (ddr, loop_vinfo);
1975 }
1976
37545e54 1977 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
fb85abff 1978 {
6d8fb6cf 1979 if (dump_enabled_p ())
78bb46f5 1980 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1981 "not vectorized: can't calculate alignment "
1982 "for data ref.\n");
fb85abff 1983 return false;
1984 }
1985
1986 return true;
1987}
1988
1989
ee612634 1990/* Analyze groups of accesses: check that DR belongs to a group of
1991 accesses of legal size, step, etc. Detect gaps, single element
1992 interleaving, and other special cases. Set grouped access info.
fb85abff 1993 Collect groups of strided stores for further use in SLP analysis. */
1994
1995static bool
1996vect_analyze_group_access (struct data_reference *dr)
1997{
1998 tree step = DR_STEP (dr);
1999 tree scalar_type = TREE_TYPE (DR_REF (dr));
f9ae6f95 2000 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
fb85abff 2001 gimple stmt = DR_STMT (dr);
2002 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2003 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 2004 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
f9ae6f95 2005 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
ee612634 2006 HOST_WIDE_INT groupsize, last_accessed_element = 1;
fb85abff 2007 bool slp_impossible = false;
d88ecd95 2008 struct loop *loop = NULL;
2009
2010 if (loop_vinfo)
2011 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 2012
ee612634 2013 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2014 size of the interleaving group (including gaps). */
8bbe6b75 2015 groupsize = absu_hwi (dr_step) / type_size;
fb85abff 2016
2017 /* Not consecutive access is possible only if it is a part of interleaving. */
21009880 2018 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
fb85abff 2019 {
2020 /* Check if it this DR is a part of interleaving, and is a single
2021 element of the group that is accessed in the loop. */
48e1416a 2022
fb85abff 2023 /* Gaps are supported only for loads. STEP must be a multiple of the type
2024 size. The size of the group must be a power of 2. */
2025 if (DR_IS_READ (dr)
2026 && (dr_step % type_size) == 0
ee612634 2027 && groupsize > 0
2028 && exact_log2 (groupsize) != -1)
fb85abff 2029 {
21009880 2030 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
ee612634 2031 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
6d8fb6cf 2032 if (dump_enabled_p ())
fb85abff 2033 {
78bb46f5 2034 dump_printf_loc (MSG_NOTE, vect_location,
2035 "Detected single element interleaving ");
7bd765d4 2036 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2037 dump_printf (MSG_NOTE, " step ");
2038 dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
78bb46f5 2039 dump_printf (MSG_NOTE, "\n");
fb85abff 2040 }
a4ee7fac 2041
2042 if (loop_vinfo)
2043 {
6d8fb6cf 2044 if (dump_enabled_p ())
7bd765d4 2045 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2046 "Data access with gaps requires scalar "
2047 "epilogue loop\n");
d88ecd95 2048 if (loop->inner)
2049 {
6d8fb6cf 2050 if (dump_enabled_p ())
7bd765d4 2051 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2052 "Peeling for outer loop is not"
78bb46f5 2053 " supported\n");
d88ecd95 2054 return false;
2055 }
2056
2057 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
a4ee7fac 2058 }
2059
fb85abff 2060 return true;
2061 }
6ea6a380 2062
6d8fb6cf 2063 if (dump_enabled_p ())
6ea6a380 2064 {
7bd765d4 2065 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 2066 "not consecutive access ");
2067 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2068 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
6ea6a380 2069 }
2070
2071 if (bb_vinfo)
2072 {
2073 /* Mark the statement as unvectorizable. */
2074 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2075 return true;
2076 }
7bd765d4 2077
fb85abff 2078 return false;
2079 }
2080
21009880 2081 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
fb85abff 2082 {
2083 /* First stmt in the interleaving chain. Check the chain. */
21009880 2084 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
fb85abff 2085 struct data_reference *data_ref = dr;
1a0e7d51 2086 unsigned int count = 1;
fb85abff 2087 tree prev_init = DR_INIT (data_ref);
2088 gimple prev = stmt;
8bbe6b75 2089 HOST_WIDE_INT diff, gaps = 0;
2090 unsigned HOST_WIDE_INT count_in_bytes;
fb85abff 2091
2092 while (next)
2093 {
282bf14c 2094 /* Skip same data-refs. In case that two or more stmts share
2095 data-ref (supported only for loads), we vectorize only the first
2096 stmt, and the rest get their vectorized loads from the first
2097 one. */
fb85abff 2098 if (!tree_int_cst_compare (DR_INIT (data_ref),
2099 DR_INIT (STMT_VINFO_DATA_REF (
2100 vinfo_for_stmt (next)))))
2101 {
9ff25603 2102 if (DR_IS_WRITE (data_ref))
fb85abff 2103 {
6d8fb6cf 2104 if (dump_enabled_p ())
78bb46f5 2105 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2106 "Two store stmts share the same dr.\n");
fb85abff 2107 return false;
2108 }
2109
fb85abff 2110 /* For load use the same data-ref load. */
21009880 2111 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
fb85abff 2112
2113 prev = next;
21009880 2114 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
fb85abff 2115 continue;
2116 }
a4ee7fac 2117
fb85abff 2118 prev = next;
8bbe6b75 2119 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
fb85abff 2120
8bbe6b75 2121 /* All group members have the same STEP by construction. */
2122 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
fb85abff 2123
fb85abff 2124 /* Check that the distance between two accesses is equal to the type
2125 size. Otherwise, we have gaps. */
f9ae6f95 2126 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2127 - TREE_INT_CST_LOW (prev_init)) / type_size;
fb85abff 2128 if (diff != 1)
2129 {
2130 /* FORNOW: SLP of accesses with gaps is not supported. */
2131 slp_impossible = true;
9ff25603 2132 if (DR_IS_WRITE (data_ref))
fb85abff 2133 {
6d8fb6cf 2134 if (dump_enabled_p ())
78bb46f5 2135 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2136 "interleaved store with gaps\n");
fb85abff 2137 return false;
2138 }
b11576bf 2139
2140 gaps += diff - 1;
fb85abff 2141 }
2142
a4ee7fac 2143 last_accessed_element += diff;
2144
fb85abff 2145 /* Store the gap from the previous member of the group. If there is no
21009880 2146 gap in the access, GROUP_GAP is always 1. */
2147 GROUP_GAP (vinfo_for_stmt (next)) = diff;
fb85abff 2148
2149 prev_init = DR_INIT (data_ref);
21009880 2150 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
fb85abff 2151 /* Count the number of data-refs in the chain. */
2152 count++;
2153 }
2154
2155 /* COUNT is the number of accesses found, we multiply it by the size of
2156 the type to get COUNT_IN_BYTES. */
2157 count_in_bytes = type_size * count;
2158
48e1416a 2159 /* Check that the size of the interleaving (including gaps) is not
37545e54 2160 greater than STEP. */
8bbe6b75 2161 if (dr_step != 0
2162 && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
fb85abff 2163 {
6d8fb6cf 2164 if (dump_enabled_p ())
fb85abff 2165 {
78bb46f5 2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 2167 "interleaving size is greater than step for ");
78bb46f5 2168 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2169 DR_REF (dr));
2170 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2171 }
2172 return false;
2173 }
2174
2175 /* Check that the size of the interleaving is equal to STEP for stores,
2176 i.e., that there are no gaps. */
8bbe6b75 2177 if (dr_step != 0
2178 && absu_hwi (dr_step) != count_in_bytes)
fb85abff 2179 {
2180 if (DR_IS_READ (dr))
2181 {
2182 slp_impossible = true;
2183 /* There is a gap after the last load in the group. This gap is a
ee612634 2184 difference between the groupsize and the number of elements.
2185 When there is no gap, this difference should be 0. */
2186 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
fb85abff 2187 }
2188 else
2189 {
6d8fb6cf 2190 if (dump_enabled_p ())
78bb46f5 2191 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2192 "interleaved store with gaps\n");
fb85abff 2193 return false;
2194 }
2195 }
2196
2197 /* Check that STEP is a multiple of type size. */
8bbe6b75 2198 if (dr_step != 0
2199 && (dr_step % type_size) != 0)
fb85abff 2200 {
6d8fb6cf 2201 if (dump_enabled_p ())
fb85abff 2202 {
7bd765d4 2203 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2204 "step is not a multiple of type size: step ");
2205 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2206 dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2207 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2208 TYPE_SIZE_UNIT (scalar_type));
78bb46f5 2209 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 2210 }
2211 return false;
2212 }
2213
ee612634 2214 if (groupsize == 0)
2215 groupsize = count;
48e1416a 2216
ee612634 2217 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
6d8fb6cf 2218 if (dump_enabled_p ())
78bb46f5 2219 dump_printf_loc (MSG_NOTE, vect_location,
2220 "Detected interleaving of size %d\n", (int)groupsize);
fb85abff 2221
48e1416a 2222 /* SLP: create an SLP data structure for every interleaving group of
fb85abff 2223 stores for further analysis in vect_analyse_slp. */
9ff25603 2224 if (DR_IS_WRITE (dr) && !slp_impossible)
37545e54 2225 {
2226 if (loop_vinfo)
f1f41a6c 2227 LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
37545e54 2228 if (bb_vinfo)
f1f41a6c 2229 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
37545e54 2230 }
a4ee7fac 2231
2232 /* There is a gap in the end of the group. */
ee612634 2233 if (groupsize - last_accessed_element > 0 && loop_vinfo)
a4ee7fac 2234 {
6d8fb6cf 2235 if (dump_enabled_p ())
7bd765d4 2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 2237 "Data access with gaps requires scalar "
2238 "epilogue loop\n");
d88ecd95 2239 if (loop->inner)
2240 {
6d8fb6cf 2241 if (dump_enabled_p ())
78bb46f5 2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2243 "Peeling for outer loop is not supported\n");
d88ecd95 2244 return false;
2245 }
2246
2247 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
a4ee7fac 2248 }
fb85abff 2249 }
2250
2251 return true;
2252}
2253
2254
2255/* Analyze the access pattern of the data-reference DR.
2256 In case of non-consecutive accesses call vect_analyze_group_access() to
ee612634 2257 analyze groups of accesses. */
fb85abff 2258
2259static bool
2260vect_analyze_data_ref_access (struct data_reference *dr)
2261{
2262 tree step = DR_STEP (dr);
2263 tree scalar_type = TREE_TYPE (DR_REF (dr));
2264 gimple stmt = DR_STMT (dr);
2265 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2266 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 2267 struct loop *loop = NULL;
fb85abff 2268
37545e54 2269 if (loop_vinfo)
2270 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 2271
37545e54 2272 if (loop_vinfo && !step)
fb85abff 2273 {
6d8fb6cf 2274 if (dump_enabled_p ())
78bb46f5 2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2276 "bad data-ref access in loop\n");
fb85abff 2277 return false;
2278 }
2279
afa60cb4 2280 /* Allow invariant loads in not nested loops. */
f634c3e9 2281 if (loop_vinfo && integer_zerop (step))
b04940e7 2282 {
2283 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
afa60cb4 2284 if (nested_in_vect_loop_p (loop, stmt))
2285 {
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2288 "zero step in inner loop of nest\n");
afa60cb4 2289 return false;
2290 }
b04940e7 2291 return DR_IS_READ (dr);
2292 }
fb85abff 2293
37545e54 2294 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 2295 {
2296 /* Interleaved accesses are not yet supported within outer-loop
2297 vectorization for references in the inner-loop. */
21009880 2298 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
fb85abff 2299
2300 /* For the rest of the analysis we use the outer-loop step. */
2301 step = STMT_VINFO_DR_STEP (stmt_info);
f634c3e9 2302 if (integer_zerop (step))
fb85abff 2303 {
6d8fb6cf 2304 if (dump_enabled_p ())
7bd765d4 2305 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2306 "zero step in outer loop.\n");
fb85abff 2307 if (DR_IS_READ (dr))
48e1416a 2308 return true;
fb85abff 2309 else
2310 return false;
2311 }
2312 }
2313
2314 /* Consecutive? */
f634c3e9 2315 if (TREE_CODE (step) == INTEGER_CST)
fb85abff 2316 {
f9ae6f95 2317 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
f634c3e9 2318 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2319 || (dr_step < 0
2320 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2321 {
2322 /* Mark that it is not interleaving. */
2323 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2324 return true;
2325 }
fb85abff 2326 }
2327
37545e54 2328 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 2329 {
6d8fb6cf 2330 if (dump_enabled_p ())
7bd765d4 2331 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2332 "grouped access in outer loop.\n");
fb85abff 2333 return false;
2334 }
2335
f634c3e9 2336 /* Assume this is a DR handled by non-constant strided load case. */
2337 if (TREE_CODE (step) != INTEGER_CST)
2338 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2339
fb85abff 2340 /* Not consecutive access - check if it's a part of interleaving group. */
2341 return vect_analyze_group_access (dr);
2342}
2343
9b44c85a 2344
2345
2346/* A helper function used in the comparator function to sort data
2347 references. T1 and T2 are two data references to be compared.
2348 The function returns -1, 0, or 1. */
2349
2350static int
2351compare_tree (tree t1, tree t2)
2352{
2353 int i, cmp;
2354 enum tree_code code;
2355 char tclass;
2356
2357 if (t1 == t2)
2358 return 0;
2359 if (t1 == NULL)
2360 return -1;
2361 if (t2 == NULL)
2362 return 1;
2363
2364
2365 if (TREE_CODE (t1) != TREE_CODE (t2))
2366 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
2367
2368 code = TREE_CODE (t1);
2369 switch (code)
2370 {
2371 /* For const values, we can just use hash values for comparisons. */
2372 case INTEGER_CST:
2373 case REAL_CST:
2374 case FIXED_CST:
2375 case STRING_CST:
2376 case COMPLEX_CST:
2377 case VECTOR_CST:
2378 {
2379 hashval_t h1 = iterative_hash_expr (t1, 0);
2380 hashval_t h2 = iterative_hash_expr (t2, 0);
2381 if (h1 != h2)
2382 return h1 < h2 ? -1 : 1;
2383 break;
2384 }
2385
2386 case SSA_NAME:
2387 cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
2388 if (cmp != 0)
2389 return cmp;
2390
2391 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
2392 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
2393 break;
2394
2395 default:
2396 tclass = TREE_CODE_CLASS (code);
2397
2398 /* For var-decl, we could compare their UIDs. */
2399 if (tclass == tcc_declaration)
2400 {
2401 if (DECL_UID (t1) != DECL_UID (t2))
2402 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
2403 break;
2404 }
2405
2406 /* For expressions with operands, compare their operands recursively. */
2407 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
2408 {
2409 cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
2410 if (cmp != 0)
2411 return cmp;
2412 }
2413 }
2414
2415 return 0;
2416}
2417
2418
68f15e9d 2419/* Compare two data-references DRA and DRB to group them into chunks
2420 suitable for grouping. */
2421
2422static int
2423dr_group_sort_cmp (const void *dra_, const void *drb_)
2424{
2425 data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2426 data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
68f15e9d 2427 int cmp;
2428
2429 /* Stabilize sort. */
2430 if (dra == drb)
2431 return 0;
2432
2433 /* Ordering of DRs according to base. */
2434 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
2435 {
9b44c85a 2436 cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb));
2437 if (cmp != 0)
2438 return cmp;
68f15e9d 2439 }
2440
2441 /* And according to DR_OFFSET. */
2442 if (!dr_equal_offsets_p (dra, drb))
2443 {
9b44c85a 2444 cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2445 if (cmp != 0)
2446 return cmp;
68f15e9d 2447 }
2448
2449 /* Put reads before writes. */
2450 if (DR_IS_READ (dra) != DR_IS_READ (drb))
2451 return DR_IS_READ (dra) ? -1 : 1;
2452
2453 /* Then sort after access size. */
2454 if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2455 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
2456 {
9b44c85a 2457 cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2458 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2459 if (cmp != 0)
2460 return cmp;
68f15e9d 2461 }
2462
2463 /* And after step. */
2464 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2465 {
9b44c85a 2466 cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
2467 if (cmp != 0)
2468 return cmp;
68f15e9d 2469 }
2470
2471 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2472 cmp = tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb));
2473 if (cmp == 0)
2474 return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2475 return cmp;
2476}
fb85abff 2477
2478/* Function vect_analyze_data_ref_accesses.
2479
2480 Analyze the access pattern of all the data references in the loop.
2481
2482 FORNOW: the only access pattern that is considered vectorizable is a
2483 simple step 1 (consecutive) access.
2484
2485 FORNOW: handle only arrays and pointer accesses. */
2486
2487bool
37545e54 2488vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
fb85abff 2489{
2490 unsigned int i;
f1f41a6c 2491 vec<data_reference_p> datarefs;
fb85abff 2492 struct data_reference *dr;
2493
6d8fb6cf 2494 if (dump_enabled_p ())
7bd765d4 2495 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2496 "=== vect_analyze_data_ref_accesses ===\n");
fb85abff 2497
37545e54 2498 if (loop_vinfo)
2499 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2500 else
2501 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2502
68f15e9d 2503 if (datarefs.is_empty ())
2504 return true;
2505
2506 /* Sort the array of datarefs to make building the interleaving chains
863a3781 2507 linear. Don't modify the original vector's order, it is needed for
2508 determining what dependencies are reversed. */
2509 vec<data_reference_p> datarefs_copy = datarefs.copy ();
2510 qsort (datarefs_copy.address (), datarefs_copy.length (),
68f15e9d 2511 sizeof (data_reference_p), dr_group_sort_cmp);
2512
2513 /* Build the interleaving chains. */
863a3781 2514 for (i = 0; i < datarefs_copy.length () - 1;)
68f15e9d 2515 {
863a3781 2516 data_reference_p dra = datarefs_copy[i];
68f15e9d 2517 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2518 stmt_vec_info lastinfo = NULL;
863a3781 2519 for (i = i + 1; i < datarefs_copy.length (); ++i)
68f15e9d 2520 {
863a3781 2521 data_reference_p drb = datarefs_copy[i];
68f15e9d 2522 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2523
2524 /* ??? Imperfect sorting (non-compatible types, non-modulo
2525 accesses, same accesses) can lead to a group to be artificially
2526 split here as we don't just skip over those. If it really
2527 matters we can push those to a worklist and re-iterate
2528 over them. The we can just skip ahead to the next DR here. */
2529
2530 /* Check that the data-refs have same first location (except init)
2531 and they are both either store or load (not load and store). */
2532 if (DR_IS_READ (dra) != DR_IS_READ (drb)
2533 || !operand_equal_p (DR_BASE_ADDRESS (dra),
2534 DR_BASE_ADDRESS (drb), 0)
2535 || !dr_equal_offsets_p (dra, drb))
2536 break;
2537
2538 /* Check that the data-refs have the same constant size and step. */
2539 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2540 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
e913b5cd 2541 if (!tree_fits_uhwi_p (sza)
2542 || !tree_fits_uhwi_p (szb)
68f15e9d 2543 || !tree_int_cst_equal (sza, szb)
e913b5cd 2544 || !tree_fits_shwi_p (DR_STEP (dra))
2545 || !tree_fits_shwi_p (DR_STEP (drb))
68f15e9d 2546 || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
2547 break;
2548
2549 /* Do not place the same access in the interleaving chain twice. */
2550 if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
2551 break;
2552
2553 /* Check the types are compatible.
2554 ??? We don't distinguish this during sorting. */
2555 if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2556 TREE_TYPE (DR_REF (drb))))
2557 break;
2558
2559 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
f9ae6f95 2560 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2561 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
68f15e9d 2562 gcc_assert (init_a < init_b);
2563
2564 /* If init_b == init_a + the size of the type * k, we have an
2565 interleaving, and DRA is accessed before DRB. */
8c53c46c 2566 HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
68f15e9d 2567 if ((init_b - init_a) % type_size_a != 0)
2568 break;
2569
2570 /* The step (if not zero) is greater than the difference between
2571 data-refs' inits. This splits groups into suitable sizes. */
8c53c46c 2572 HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
68f15e9d 2573 if (step != 0 && step <= (init_b - init_a))
2574 break;
2575
2576 if (dump_enabled_p ())
2577 {
2578 dump_printf_loc (MSG_NOTE, vect_location,
2579 "Detected interleaving ");
2580 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2581 dump_printf (MSG_NOTE, " and ");
2582 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
78bb46f5 2583 dump_printf (MSG_NOTE, "\n");
68f15e9d 2584 }
2585
2586 /* Link the found element into the group list. */
2587 if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
2588 {
2589 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
2590 lastinfo = stmtinfo_a;
2591 }
2592 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
2593 GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
2594 lastinfo = stmtinfo_b;
2595 }
2596 }
2597
863a3781 2598 FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
6ea6a380 2599 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2600 && !vect_analyze_data_ref_access (dr))
fb85abff 2601 {
6d8fb6cf 2602 if (dump_enabled_p ())
78bb46f5 2603 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2604 "not vectorized: complicated access pattern.\n");
6ea6a380 2605
2606 if (bb_vinfo)
2607 {
2608 /* Mark the statement as not vectorizable. */
2609 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2610 continue;
2611 }
2612 else
863a3781 2613 {
2614 datarefs_copy.release ();
2615 return false;
2616 }
fb85abff 2617 }
2618
863a3781 2619 datarefs_copy.release ();
fb85abff 2620 return true;
2621}
2622
8a7b0f48 2623
43d14b66 2624/* Operator == between two dr_with_seg_len objects.
8a7b0f48 2625
2626 This equality operator is used to make sure two data refs
2627 are the same one so that we will consider to combine the
2628 aliasing checks of those two pairs of data dependent data
2629 refs. */
2630
2631static bool
43d14b66 2632operator == (const dr_with_seg_len& d1,
2633 const dr_with_seg_len& d2)
8a7b0f48 2634{
43d14b66 2635 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
2636 DR_BASE_ADDRESS (d2.dr), 0)
2637 && compare_tree (d1.offset, d2.offset) == 0
2638 && compare_tree (d1.seg_len, d2.seg_len) == 0;
8a7b0f48 2639}
2640
43d14b66 2641/* Function comp_dr_with_seg_len_pair.
8a7b0f48 2642
43d14b66 2643 Comparison function for sorting objects of dr_with_seg_len_pair_t
8a7b0f48 2644 so that we can combine aliasing checks in one scan. */
2645
2646static int
43d14b66 2647comp_dr_with_seg_len_pair (const void *p1_, const void *p2_)
8a7b0f48 2648{
43d14b66 2649 const dr_with_seg_len_pair_t* p1 = (const dr_with_seg_len_pair_t *) p1_;
2650 const dr_with_seg_len_pair_t* p2 = (const dr_with_seg_len_pair_t *) p2_;
2651
2652 const dr_with_seg_len &p11 = p1->first,
2653 &p12 = p1->second,
2654 &p21 = p2->first,
2655 &p22 = p2->second;
2656
2657 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
2658 if a and c have the same basic address snd step, and b and d have the same
2659 address and step. Therefore, if any a&c or b&d don't have the same address
2660 and step, we don't care the order of those two pairs after sorting. */
2661 int comp_res;
2662
2663 if ((comp_res = compare_tree (DR_BASE_ADDRESS (p11.dr),
2664 DR_BASE_ADDRESS (p21.dr))) != 0)
8a7b0f48 2665 return comp_res;
43d14b66 2666 if ((comp_res = compare_tree (DR_BASE_ADDRESS (p12.dr),
2667 DR_BASE_ADDRESS (p22.dr))) != 0)
2668 return comp_res;
2669 if ((comp_res = compare_tree (DR_STEP (p11.dr), DR_STEP (p21.dr))) != 0)
2670 return comp_res;
2671 if ((comp_res = compare_tree (DR_STEP (p12.dr), DR_STEP (p22.dr))) != 0)
2672 return comp_res;
2673 if ((comp_res = compare_tree (p11.offset, p21.offset)) != 0)
2674 return comp_res;
2675 if ((comp_res = compare_tree (p12.offset, p22.offset)) != 0)
8a7b0f48 2676 return comp_res;
8a7b0f48 2677
2678 return 0;
2679}
2680
2681template <class T> static void
2682swap (T& a, T& b)
2683{
2684 T c (a);
2685 a = b;
2686 b = c;
2687}
2688
2689/* Function vect_vfa_segment_size.
2690
2691 Create an expression that computes the size of segment
2692 that will be accessed for a data reference. The functions takes into
2693 account that realignment loads may access one more vector.
2694
2695 Input:
2696 DR: The data reference.
2697 LENGTH_FACTOR: segment length to consider.
2698
2699 Return an expression whose value is the size of segment which will be
2700 accessed by DR. */
2701
2702static tree
2703vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2704{
2705 tree segment_length;
2706
2707 if (integer_zerop (DR_STEP (dr)))
2708 segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2709 else
2710 segment_length = size_binop (MULT_EXPR,
43d14b66 2711 fold_convert (sizetype, DR_STEP (dr)),
2712 fold_convert (sizetype, length_factor));
8a7b0f48 2713
2714 if (vect_supportable_dr_alignment (dr, false)
43d14b66 2715 == dr_explicit_realign_optimized)
8a7b0f48 2716 {
2717 tree vector_size = TYPE_SIZE_UNIT
2718 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2719
2720 segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2721 }
2722 return segment_length;
2723}
2724
fb85abff 2725/* Function vect_prune_runtime_alias_test_list.
2726
2727 Prune a list of ddrs to be tested at run-time by versioning for alias.
8a7b0f48 2728 Merge several alias checks into one if possible.
fb85abff 2729 Return FALSE if resulting list of ddrs is longer then allowed by
2730 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2731
2732bool
2733vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2734{
8a7b0f48 2735 vec<ddr_p> may_alias_ddrs =
fb85abff 2736 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
43d14b66 2737 vec<dr_with_seg_len_pair_t>& comp_alias_ddrs =
8a7b0f48 2738 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2739 int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2740 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2741
2742 ddr_p ddr;
2743 unsigned int i;
2744 tree length_factor;
fb85abff 2745
6d8fb6cf 2746 if (dump_enabled_p ())
7bd765d4 2747 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2748 "=== vect_prune_runtime_alias_test_list ===\n");
fb85abff 2749
8a7b0f48 2750 if (may_alias_ddrs.is_empty ())
2751 return true;
2752
2753 /* Basically, for each pair of dependent data refs store_ptr_0
2754 and load_ptr_0, we create an expression:
2755
2756 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2757 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2758
2759 for aliasing checks. However, in some cases we can decrease
2760 the number of checks by combining two checks into one. For
2761 example, suppose we have another pair of data refs store_ptr_0
2762 and load_ptr_1, and if the following condition is satisfied:
2763
2764 load_ptr_0 < load_ptr_1 &&
2765 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
2766
2767 (this condition means, in each iteration of vectorized loop,
2768 the accessed memory of store_ptr_0 cannot be between the memory
2769 of load_ptr_0 and load_ptr_1.)
2770
2771 we then can use only the following expression to finish the
2772 alising checks between store_ptr_0 & load_ptr_0 and
2773 store_ptr_0 & load_ptr_1:
2774
2775 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2776 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
2777
2778 Note that we only consider that load_ptr_0 and load_ptr_1 have the
2779 same basic address. */
2780
2781 comp_alias_ddrs.create (may_alias_ddrs.length ());
2782
2783 /* First, we collect all data ref pairs for aliasing checks. */
2784 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
fb85abff 2785 {
8a7b0f48 2786 struct data_reference *dr_a, *dr_b;
2787 gimple dr_group_first_a, dr_group_first_b;
2788 tree segment_length_a, segment_length_b;
2789 gimple stmt_a, stmt_b;
2790
2791 dr_a = DDR_A (ddr);
2792 stmt_a = DR_STMT (DDR_A (ddr));
2793 dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2794 if (dr_group_first_a)
2795 {
2796 stmt_a = dr_group_first_a;
2797 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2798 }
fb85abff 2799
8a7b0f48 2800 dr_b = DDR_B (ddr);
2801 stmt_b = DR_STMT (DDR_B (ddr));
2802 dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2803 if (dr_group_first_b)
2804 {
2805 stmt_b = dr_group_first_b;
2806 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2807 }
fb85abff 2808
8a7b0f48 2809 if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2810 length_factor = scalar_loop_iters;
2811 else
2812 length_factor = size_int (vect_factor);
2813 segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2814 segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2815
43d14b66 2816 dr_with_seg_len_pair_t dr_with_seg_len_pair
2817 (dr_with_seg_len (dr_a, segment_length_a),
2818 dr_with_seg_len (dr_b, segment_length_b));
2819
2820 if (compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b)) > 0)
8a7b0f48 2821 swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
2822
2823 comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
2824 }
2825
2826 /* Second, we sort the collected data ref pairs so that we can scan
2827 them once to combine all possible aliasing checks. */
43d14b66 2828 comp_alias_ddrs.qsort (comp_dr_with_seg_len_pair);
fb85abff 2829
8a7b0f48 2830 /* Third, we scan the sorted dr pairs and check if we can combine
2831 alias checks of two neighbouring dr pairs. */
2832 for (size_t i = 1; i < comp_alias_ddrs.length (); ++i)
2833 {
2834 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
43d14b66 2835 dr_with_seg_len *dr_a1 = &comp_alias_ddrs[i-1].first,
2836 *dr_b1 = &comp_alias_ddrs[i-1].second,
2837 *dr_a2 = &comp_alias_ddrs[i].first,
2838 *dr_b2 = &comp_alias_ddrs[i].second;
8a7b0f48 2839
2840 /* Remove duplicate data ref pairs. */
2841 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
2842 {
2843 if (dump_enabled_p ())
fb85abff 2844 {
8a7b0f48 2845 dump_printf_loc (MSG_NOTE, vect_location,
2846 "found equal ranges ");
2847 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2848 DR_REF (dr_a1->dr));
2849 dump_printf (MSG_NOTE, ", ");
2850 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2851 DR_REF (dr_b1->dr));
2852 dump_printf (MSG_NOTE, " and ");
2853 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2854 DR_REF (dr_a2->dr));
2855 dump_printf (MSG_NOTE, ", ");
2856 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2857 DR_REF (dr_b2->dr));
2858 dump_printf (MSG_NOTE, "\n");
fb85abff 2859 }
8a7b0f48 2860
2861 comp_alias_ddrs.ordered_remove (i--);
2862 continue;
fb85abff 2863 }
48e1416a 2864
8a7b0f48 2865 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
2866 {
2867 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
2868 and DR_A1 and DR_A2 are two consecutive memrefs. */
2869 if (*dr_a1 == *dr_a2)
2870 {
2871 swap (dr_a1, dr_b1);
2872 swap (dr_a2, dr_b2);
2873 }
2874
43d14b66 2875 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
2876 DR_BASE_ADDRESS (dr_a2->dr),
2877 0)
55af3bae 2878 || !tree_fits_shwi_p (dr_a1->offset)
2879 || !tree_fits_shwi_p (dr_a2->offset))
8a7b0f48 2880 continue;
2881
55af3bae 2882 HOST_WIDE_INT diff = (tree_to_shwi (dr_a2->offset)
2883 - tree_to_shwi (dr_a1->offset));
8a7b0f48 2884
2885
2886 /* Now we check if the following condition is satisfied:
2887
2888 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
2889
2890 where DIFF = DR_A2->OFFSET - DR_A1->OFFSET. However,
2891 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
2892 have to make a best estimation. We can get the minimum value
2893 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
2894 then either of the following two conditions can guarantee the
2895 one above:
2896
2897 1: DIFF <= MIN_SEG_LEN_B
2898 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
2899
2900 */
2901
55af3bae 2902 HOST_WIDE_INT min_seg_len_b = (tree_fits_shwi_p (dr_b1->seg_len)
2903 ? tree_to_shwi (dr_b1->seg_len)
2904 : vect_factor);
8a7b0f48 2905
2906 if (diff <= min_seg_len_b
55af3bae 2907 || (tree_fits_shwi_p (dr_a1->seg_len)
2908 && diff - tree_to_shwi (dr_a1->seg_len) < min_seg_len_b))
8a7b0f48 2909 {
1c7f4889 2910 if (dump_enabled_p ())
2911 {
2912 dump_printf_loc (MSG_NOTE, vect_location,
2913 "merging ranges for ");
2914 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2915 DR_REF (dr_a1->dr));
2916 dump_printf (MSG_NOTE, ", ");
2917 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2918 DR_REF (dr_b1->dr));
2919 dump_printf (MSG_NOTE, " and ");
2920 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2921 DR_REF (dr_a2->dr));
2922 dump_printf (MSG_NOTE, ", ");
2923 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2924 DR_REF (dr_b2->dr));
2925 dump_printf (MSG_NOTE, "\n");
2926 }
2927
8a7b0f48 2928 dr_a1->seg_len = size_binop (PLUS_EXPR,
2929 dr_a2->seg_len, size_int (diff));
2930 comp_alias_ddrs.ordered_remove (i--);
2931 }
2932 }
fb85abff 2933 }
2934
1c7f4889 2935 dump_printf_loc (MSG_NOTE, vect_location,
2936 "improved number of alias checks from %d to %d\n",
2937 may_alias_ddrs.length (), comp_alias_ddrs.length ());
8a7b0f48 2938 if ((int) comp_alias_ddrs.length () >
2939 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1c7f4889 2940 return false;
fb85abff 2941
2942 return true;
2943}
2944
16dfb112 2945/* Check whether a non-affine read in stmt is suitable for gather load
2946 and if so, return a builtin decl for that operation. */
2947
2948tree
2949vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2950 tree *offp, int *scalep)
2951{
2952 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2953 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2954 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2955 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2956 tree offtype = NULL_TREE;
2957 tree decl, base, off;
2958 enum machine_mode pmode;
2959 int punsignedp, pvolatilep;
2960
c71d3c24 2961 base = DR_REF (dr);
2962 /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
2963 see if we can use the def stmt of the address. */
2964 if (is_gimple_call (stmt)
2965 && gimple_call_internal_p (stmt)
2966 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
2967 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)
2968 && TREE_CODE (base) == MEM_REF
2969 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
2970 && integer_zerop (TREE_OPERAND (base, 1))
2971 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
2972 {
2973 gimple def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
2974 if (is_gimple_assign (def_stmt)
2975 && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
2976 base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
2977 }
2978
16dfb112 2979 /* The gather builtins need address of the form
2980 loop_invariant + vector * {1, 2, 4, 8}
2981 or
2982 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2983 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2984 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2985 multiplications and additions in it. To get a vector, we need
2986 a single SSA_NAME that will be defined in the loop and will
2987 contain everything that is not loop invariant and that can be
2988 vectorized. The following code attempts to find such a preexistng
2989 SSA_NAME OFF and put the loop invariants into a tree BASE
2990 that can be gimplified before the loop. */
c71d3c24 2991 base = get_inner_reference (base, &pbitsize, &pbitpos, &off,
16dfb112 2992 &pmode, &punsignedp, &pvolatilep, false);
2993 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2994
2995 if (TREE_CODE (base) == MEM_REF)
2996 {
2997 if (!integer_zerop (TREE_OPERAND (base, 1)))
2998 {
2999 if (off == NULL_TREE)
3000 {
5de9d3ed 3001 offset_int moff = mem_ref_offset (base);
e913b5cd 3002 off = wide_int_to_tree (sizetype, moff);
16dfb112 3003 }
3004 else
3005 off = size_binop (PLUS_EXPR, off,
3006 fold_convert (sizetype, TREE_OPERAND (base, 1)));
3007 }
3008 base = TREE_OPERAND (base, 0);
3009 }
3010 else
3011 base = build_fold_addr_expr (base);
3012
3013 if (off == NULL_TREE)
3014 off = size_zero_node;
3015
3016 /* If base is not loop invariant, either off is 0, then we start with just
3017 the constant offset in the loop invariant BASE and continue with base
3018 as OFF, otherwise give up.
3019 We could handle that case by gimplifying the addition of base + off
3020 into some SSA_NAME and use that as off, but for now punt. */
3021 if (!expr_invariant_in_loop_p (loop, base))
3022 {
3023 if (!integer_zerop (off))
3024 return NULL_TREE;
3025 off = base;
3026 base = size_int (pbitpos / BITS_PER_UNIT);
3027 }
3028 /* Otherwise put base + constant offset into the loop invariant BASE
3029 and continue with OFF. */
3030 else
3031 {
3032 base = fold_convert (sizetype, base);
3033 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
3034 }
3035
3036 /* OFF at this point may be either a SSA_NAME or some tree expression
3037 from get_inner_reference. Try to peel off loop invariants from it
3038 into BASE as long as possible. */
3039 STRIP_NOPS (off);
3040 while (offtype == NULL_TREE)
3041 {
3042 enum tree_code code;
3043 tree op0, op1, add = NULL_TREE;
3044
3045 if (TREE_CODE (off) == SSA_NAME)
3046 {
3047 gimple def_stmt = SSA_NAME_DEF_STMT (off);
3048
3049 if (expr_invariant_in_loop_p (loop, off))
3050 return NULL_TREE;
3051
3052 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3053 break;
3054
3055 op0 = gimple_assign_rhs1 (def_stmt);
3056 code = gimple_assign_rhs_code (def_stmt);
3057 op1 = gimple_assign_rhs2 (def_stmt);
3058 }
3059 else
3060 {
3061 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
3062 return NULL_TREE;
3063 code = TREE_CODE (off);
3064 extract_ops_from_tree (off, &code, &op0, &op1);
3065 }
3066 switch (code)
3067 {
3068 case POINTER_PLUS_EXPR:
3069 case PLUS_EXPR:
3070 if (expr_invariant_in_loop_p (loop, op0))
3071 {
3072 add = op0;
3073 off = op1;
3074 do_add:
3075 add = fold_convert (sizetype, add);
3076 if (scale != 1)
3077 add = size_binop (MULT_EXPR, add, size_int (scale));
3078 base = size_binop (PLUS_EXPR, base, add);
3079 continue;
3080 }
3081 if (expr_invariant_in_loop_p (loop, op1))
3082 {
3083 add = op1;
3084 off = op0;
3085 goto do_add;
3086 }
3087 break;
3088 case MINUS_EXPR:
3089 if (expr_invariant_in_loop_p (loop, op1))
3090 {
3091 add = fold_convert (sizetype, op1);
3092 add = size_binop (MINUS_EXPR, size_zero_node, add);
3093 off = op0;
3094 goto do_add;
3095 }
3096 break;
3097 case MULT_EXPR:
e913b5cd 3098 if (scale == 1 && tree_fits_shwi_p (op1))
16dfb112 3099 {
e913b5cd 3100 scale = tree_to_shwi (op1);
16dfb112 3101 off = op0;
3102 continue;
3103 }
3104 break;
3105 case SSA_NAME:
3106 off = op0;
3107 continue;
3108 CASE_CONVERT:
3109 if (!POINTER_TYPE_P (TREE_TYPE (op0))
3110 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3111 break;
3112 if (TYPE_PRECISION (TREE_TYPE (op0))
3113 == TYPE_PRECISION (TREE_TYPE (off)))
3114 {
3115 off = op0;
3116 continue;
3117 }
3118 if (TYPE_PRECISION (TREE_TYPE (op0))
3119 < TYPE_PRECISION (TREE_TYPE (off)))
3120 {
3121 off = op0;
3122 offtype = TREE_TYPE (off);
3123 STRIP_NOPS (off);
3124 continue;
3125 }
3126 break;
3127 default:
3128 break;
3129 }
3130 break;
3131 }
3132
3133 /* If at the end OFF still isn't a SSA_NAME or isn't
3134 defined in the loop, punt. */
3135 if (TREE_CODE (off) != SSA_NAME
3136 || expr_invariant_in_loop_p (loop, off))
3137 return NULL_TREE;
3138
3139 if (offtype == NULL_TREE)
3140 offtype = TREE_TYPE (off);
3141
3142 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
3143 offtype, scale);
3144 if (decl == NULL_TREE)
3145 return NULL_TREE;
3146
3147 if (basep)
3148 *basep = base;
3149 if (offp)
3150 *offp = off;
3151 if (scalep)
3152 *scalep = scale;
3153 return decl;
3154}
3155
fb85abff 3156/* Function vect_analyze_data_refs.
3157
37545e54 3158 Find all the data references in the loop or basic block.
fb85abff 3159
3160 The general structure of the analysis of data refs in the vectorizer is as
3161 follows:
48e1416a 3162 1- vect_analyze_data_refs(loop/bb): call
37545e54 3163 compute_data_dependences_for_loop/bb to find and analyze all data-refs
3164 in the loop/bb and their dependences.
fb85abff 3165 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3166 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3167 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3168
3169*/
3170
3171bool
91a74fc6 3172vect_analyze_data_refs (loop_vec_info loop_vinfo,
3173 bb_vec_info bb_vinfo,
ca91d3f8 3174 int *min_vf, unsigned *n_stmts)
fb85abff 3175{
37545e54 3176 struct loop *loop = NULL;
3177 basic_block bb = NULL;
fb85abff 3178 unsigned int i;
f1f41a6c 3179 vec<data_reference_p> datarefs;
fb85abff 3180 struct data_reference *dr;
3181 tree scalar_type;
3182
6d8fb6cf 3183 if (dump_enabled_p ())
7bd765d4 3184 dump_printf_loc (MSG_NOTE, vect_location,
3185 "=== vect_analyze_data_refs ===\n");
48e1416a 3186
37545e54 3187 if (loop_vinfo)
3188 {
d09768a4 3189 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
3190
37545e54 3191 loop = LOOP_VINFO_LOOP (loop_vinfo);
d09768a4 3192 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3193 if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
590a32b6 3194 {
6d8fb6cf 3195 if (dump_enabled_p ())
78bb46f5 3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3197 "not vectorized: loop contains function calls"
3198 " or data references that cannot be analyzed\n");
590a32b6 3199 return false;
3200 }
3201
d09768a4 3202 for (i = 0; i < loop->num_nodes; i++)
3203 {
3204 gimple_stmt_iterator gsi;
3205
3206 for (gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
3207 {
3208 gimple stmt = gsi_stmt (gsi);
ca91d3f8 3209 if (is_gimple_debug (stmt))
3210 continue;
3211 ++*n_stmts;
d09768a4 3212 if (!find_data_references_in_stmt (loop, stmt, &datarefs))
3213 {
3214 if (is_gimple_call (stmt) && loop->safelen)
3215 {
3216 tree fndecl = gimple_call_fndecl (stmt), op;
3217 if (fndecl != NULL_TREE)
3218 {
3219 struct cgraph_node *node = cgraph_get_node (fndecl);
3220 if (node != NULL && node->simd_clones != NULL)
3221 {
3222 unsigned int j, n = gimple_call_num_args (stmt);
3223 for (j = 0; j < n; j++)
3224 {
3225 op = gimple_call_arg (stmt, j);
3226 if (DECL_P (op)
3227 || (REFERENCE_CLASS_P (op)
3228 && get_base_address (op)))
3229 break;
3230 }
3231 op = gimple_call_lhs (stmt);
3232 /* Ignore #pragma omp declare simd functions
3233 if they don't have data references in the
3234 call stmt itself. */
3235 if (j == n
3236 && !(op
3237 && (DECL_P (op)
3238 || (REFERENCE_CLASS_P (op)
3239 && get_base_address (op)))))
3240 continue;
3241 }
3242 }
3243 }
3244 LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3247 "not vectorized: loop contains function "
3248 "calls or data references that cannot "
3249 "be analyzed\n");
3250 return false;
3251 }
3252 }
3253 }
3254
3255 LOOP_VINFO_DATAREFS (loop_vinfo) = datarefs;
37545e54 3256 }
3257 else
3258 {
ec84182d 3259 gimple_stmt_iterator gsi;
3260
37545e54 3261 bb = BB_VINFO_BB (bb_vinfo);
ec84182d 3262 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3263 {
3264 gimple stmt = gsi_stmt (gsi);
ca91d3f8 3265 if (is_gimple_debug (stmt))
3266 continue;
3267 ++*n_stmts;
ec84182d 3268 if (!find_data_references_in_stmt (NULL, stmt,
3269 &BB_VINFO_DATAREFS (bb_vinfo)))
3270 {
3271 /* Mark the rest of the basic-block as unvectorizable. */
3272 for (; !gsi_end_p (gsi); gsi_next (&gsi))
156b8feb 3273 {
3274 stmt = gsi_stmt (gsi);
3275 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
3276 }
ec84182d 3277 break;
3278 }
3279 }
590a32b6 3280
37545e54 3281 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
3282 }
fb85abff 3283
282bf14c 3284 /* Go through the data-refs, check that the analysis succeeded. Update
3285 pointer from stmt_vec_info struct to DR and vectype. */
fb85abff 3286
f1f41a6c 3287 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 3288 {
3289 gimple stmt;
3290 stmt_vec_info stmt_info;
48e1416a 3291 tree base, offset, init;
16dfb112 3292 bool gather = false;
3d483a94 3293 bool simd_lane_access = false;
91a74fc6 3294 int vf;
48e1416a 3295
8911f4de 3296again:
fb85abff 3297 if (!dr || !DR_REF (dr))
3298 {
6d8fb6cf 3299 if (dump_enabled_p ())
7bd765d4 3300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 3301 "not vectorized: unhandled data-ref\n");
fb85abff 3302 return false;
3303 }
3304
3305 stmt = DR_STMT (dr);
3306 stmt_info = vinfo_for_stmt (stmt);
3307
8911f4de 3308 /* Discard clobbers from the dataref vector. We will remove
3309 clobber stmts during vectorization. */
3310 if (gimple_clobber_p (stmt))
3311 {
53c3c39b 3312 free_data_ref (dr);
8911f4de 3313 if (i == datarefs.length () - 1)
3314 {
3315 datarefs.pop ();
3316 break;
3317 }
4aaf064d 3318 datarefs.ordered_remove (i);
3319 dr = datarefs[i];
8911f4de 3320 goto again;
3321 }
3322
fb85abff 3323 /* Check that analysis of the data-ref succeeded. */
3324 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
16dfb112 3325 || !DR_STEP (dr))
fb85abff 3326 {
3d483a94 3327 bool maybe_gather
3328 = DR_IS_READ (dr)
16dfb112 3329 && !TREE_THIS_VOLATILE (DR_REF (dr))
3d483a94 3330 && targetm.vectorize.builtin_gather != NULL;
3331 bool maybe_simd_lane_access
3332 = loop_vinfo && loop->simduid;
3333
3334 /* If target supports vector gather loads, or if this might be
3335 a SIMD lane access, see if they can't be used. */
3336 if (loop_vinfo
3337 && (maybe_gather || maybe_simd_lane_access)
16dfb112 3338 && !nested_in_vect_loop_p (loop, stmt))
3339 {
3340 struct data_reference *newdr
3341 = create_data_ref (NULL, loop_containing_stmt (stmt),
3342 DR_REF (dr), stmt, true);
3343 gcc_assert (newdr != NULL && DR_REF (newdr));
3344 if (DR_BASE_ADDRESS (newdr)
3345 && DR_OFFSET (newdr)
3346 && DR_INIT (newdr)
3347 && DR_STEP (newdr)
3348 && integer_zerop (DR_STEP (newdr)))
3349 {
3d483a94 3350 if (maybe_simd_lane_access)
3351 {
3352 tree off = DR_OFFSET (newdr);
3353 STRIP_NOPS (off);
3354 if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
3355 && TREE_CODE (off) == MULT_EXPR
d85a2013 3356 && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
3d483a94 3357 {
3358 tree step = TREE_OPERAND (off, 1);
3359 off = TREE_OPERAND (off, 0);
3360 STRIP_NOPS (off);
3361 if (CONVERT_EXPR_P (off)
3362 && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
3363 0)))
3364 < TYPE_PRECISION (TREE_TYPE (off)))
3365 off = TREE_OPERAND (off, 0);
3366 if (TREE_CODE (off) == SSA_NAME)
3367 {
3368 gimple def = SSA_NAME_DEF_STMT (off);
3369 tree reft = TREE_TYPE (DR_REF (newdr));
cae17039 3370 if (is_gimple_call (def)
3371 && gimple_call_internal_p (def)
3372 && (gimple_call_internal_fn (def)
3373 == IFN_GOMP_SIMD_LANE))
3d483a94 3374 {
3375 tree arg = gimple_call_arg (def, 0);
3376 gcc_assert (TREE_CODE (arg) == SSA_NAME);
3377 arg = SSA_NAME_VAR (arg);
3378 if (arg == loop->simduid
3379 /* For now. */
3380 && tree_int_cst_equal
3381 (TYPE_SIZE_UNIT (reft),
3382 step))
3383 {
3384 DR_OFFSET (newdr) = ssize_int (0);
3385 DR_STEP (newdr) = step;
3b36c9f7 3386 DR_ALIGNED_TO (newdr)
3387 = size_int (BIGGEST_ALIGNMENT);
3d483a94 3388 dr = newdr;
3389 simd_lane_access = true;
3390 }
3391 }
3392 }
3393 }
3394 }
3395 if (!simd_lane_access && maybe_gather)
3396 {
3397 dr = newdr;
3398 gather = true;
3399 }
16dfb112 3400 }
3d483a94 3401 if (!gather && !simd_lane_access)
16dfb112 3402 free_data_ref (newdr);
3403 }
6ea6a380 3404
3d483a94 3405 if (!gather && !simd_lane_access)
16dfb112 3406 {
6d8fb6cf 3407 if (dump_enabled_p ())
16dfb112 3408 {
78bb46f5 3409 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 3410 "not vectorized: data ref analysis "
3411 "failed ");
3412 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3413 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
16dfb112 3414 }
3c9feaca 3415
16dfb112 3416 if (bb_vinfo)
07e3bcbf 3417 break;
16dfb112 3418
3419 return false;
3420 }
fb85abff 3421 }
3422
3423 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3424 {
6d8fb6cf 3425 if (dump_enabled_p ())
7bd765d4 3426 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3427 "not vectorized: base addr of dr is a "
78bb46f5 3428 "constant\n");
3c9feaca 3429
3430 if (bb_vinfo)
07e3bcbf 3431 break;
3c9feaca 3432
3d483a94 3433 if (gather || simd_lane_access)
16dfb112 3434 free_data_ref (dr);
3435 return false;
fb85abff 3436 }
3437
83ec4165 3438 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3439 {
6d8fb6cf 3440 if (dump_enabled_p ())
83ec4165 3441 {
7bd765d4 3442 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3443 "not vectorized: volatile type ");
3444 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3445 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
83ec4165 3446 }
3c9feaca 3447
3448 if (bb_vinfo)
07e3bcbf 3449 break;
3c9feaca 3450
83ec4165 3451 return false;
3452 }
3453
96b07a49 3454 if (stmt_can_throw_internal (stmt))
42a6710e 3455 {
6d8fb6cf 3456 if (dump_enabled_p ())
42a6710e 3457 {
7bd765d4 3458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3459 "not vectorized: statement can throw an "
3460 "exception ");
3461 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3462 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
42a6710e 3463 }
3c9feaca 3464
3465 if (bb_vinfo)
07e3bcbf 3466 break;
3c9feaca 3467
3d483a94 3468 if (gather || simd_lane_access)
16dfb112 3469 free_data_ref (dr);
42a6710e 3470 return false;
3471 }
3472
87c952b8 3473 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3474 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3475 {
6d8fb6cf 3476 if (dump_enabled_p ())
87c952b8 3477 {
7bd765d4 3478 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3479 "not vectorized: statement is bitfield "
3480 "access ");
3481 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3482 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
87c952b8 3483 }
3484
3485 if (bb_vinfo)
07e3bcbf 3486 break;
87c952b8 3487
3d483a94 3488 if (gather || simd_lane_access)
87c952b8 3489 free_data_ref (dr);
3490 return false;
3491 }
3492
3493 base = unshare_expr (DR_BASE_ADDRESS (dr));
3494 offset = unshare_expr (DR_OFFSET (dr));
3495 init = unshare_expr (DR_INIT (dr));
3496
c71d3c24 3497 if (is_gimple_call (stmt)
3498 && (!gimple_call_internal_p (stmt)
3499 || (gimple_call_internal_fn (stmt) != IFN_MASK_LOAD
3500 && gimple_call_internal_fn (stmt) != IFN_MASK_STORE)))
635bf3aa 3501 {
6d8fb6cf 3502 if (dump_enabled_p ())
635bf3aa 3503 {
7bd765d4 3504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 3505 "not vectorized: dr in a call ");
7bd765d4 3506 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3507 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
635bf3aa 3508 }
3509
3510 if (bb_vinfo)
07e3bcbf 3511 break;
635bf3aa 3512
3d483a94 3513 if (gather || simd_lane_access)
635bf3aa 3514 free_data_ref (dr);
3515 return false;
3516 }
3517
fb85abff 3518 /* Update DR field in stmt_vec_info struct. */
fb85abff 3519
3520 /* If the dataref is in an inner-loop of the loop that is considered for
3521 for vectorization, we also want to analyze the access relative to
48e1416a 3522 the outer-loop (DR contains information only relative to the
fb85abff 3523 inner-most enclosing loop). We do that by building a reference to the
3524 first location accessed by the inner-loop, and analyze it relative to
48e1416a 3525 the outer-loop. */
3526 if (loop && nested_in_vect_loop_p (loop, stmt))
fb85abff 3527 {
3528 tree outer_step, outer_base, outer_init;
3529 HOST_WIDE_INT pbitsize, pbitpos;
3530 tree poffset;
3531 enum machine_mode pmode;
3532 int punsignedp, pvolatilep;
3533 affine_iv base_iv, offset_iv;
3534 tree dinit;
3535
48e1416a 3536 /* Build a reference to the first location accessed by the
282bf14c 3537 inner-loop: *(BASE+INIT). (The first location is actually
fb85abff 3538 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3539 tree inner_base = build_fold_indirect_ref
2cc66f2a 3540 (fold_build_pointer_plus (base, init));
fb85abff 3541
6d8fb6cf 3542 if (dump_enabled_p ())
fb85abff 3543 {
7bd765d4 3544 dump_printf_loc (MSG_NOTE, vect_location,
3545 "analyze in outer-loop: ");
3546 dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
78bb46f5 3547 dump_printf (MSG_NOTE, "\n");
fb85abff 3548 }
3549
48e1416a 3550 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
fb85abff 3551 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3552 gcc_assert (outer_base != NULL_TREE);
3553
3554 if (pbitpos % BITS_PER_UNIT != 0)
3555 {
6d8fb6cf 3556 if (dump_enabled_p ())
7bd765d4 3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3558 "failed: bit offset alignment.\n");
fb85abff 3559 return false;
3560 }
3561
3562 outer_base = build_fold_addr_expr (outer_base);
48e1416a 3563 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
fb85abff 3564 &base_iv, false))
3565 {
6d8fb6cf 3566 if (dump_enabled_p ())
78bb46f5 3567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 3568 "failed: evolution of base is not affine.\n");
fb85abff 3569 return false;
3570 }
3571
3572 if (offset)
3573 {
3574 if (poffset)
48e1416a 3575 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
fb85abff 3576 poffset);
3577 else
3578 poffset = offset;
3579 }
3580
3581 if (!poffset)
3582 {
3583 offset_iv.base = ssize_int (0);
3584 offset_iv.step = ssize_int (0);
3585 }
48e1416a 3586 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
fb85abff 3587 &offset_iv, false))
3588 {
6d8fb6cf 3589 if (dump_enabled_p ())
78bb46f5 3590 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 3591 "evolution of offset is not affine.\n");
fb85abff 3592 return false;
3593 }
3594
3595 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3596 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3597 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3598 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3599 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3600
3601 outer_step = size_binop (PLUS_EXPR,
3602 fold_convert (ssizetype, base_iv.step),
3603 fold_convert (ssizetype, offset_iv.step));
3604
3605 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3606 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
48e1416a 3607 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
fb85abff 3608 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
48e1416a 3609 STMT_VINFO_DR_OFFSET (stmt_info) =
fb85abff 3610 fold_convert (ssizetype, offset_iv.base);
48e1416a 3611 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
fb85abff 3612 size_int (highest_pow2_factor (offset_iv.base));
3613
6d8fb6cf 3614 if (dump_enabled_p ())
fb85abff 3615 {
7bd765d4 3616 dump_printf_loc (MSG_NOTE, vect_location,
3617 "\touter base_address: ");
3618 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3619 STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3620 dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3621 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3622 STMT_VINFO_DR_OFFSET (stmt_info));
3623 dump_printf (MSG_NOTE,
3624 "\n\touter constant offset from base address: ");
3625 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3626 STMT_VINFO_DR_INIT (stmt_info));
3627 dump_printf (MSG_NOTE, "\n\touter step: ");
3628 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3629 STMT_VINFO_DR_STEP (stmt_info));
3630 dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3631 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3632 STMT_VINFO_DR_ALIGNED_TO (stmt_info));
78bb46f5 3633 dump_printf (MSG_NOTE, "\n");
fb85abff 3634 }
3635 }
3636
3637 if (STMT_VINFO_DATA_REF (stmt_info))
3638 {
6d8fb6cf 3639 if (dump_enabled_p ())
fb85abff 3640 {
7bd765d4 3641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3642 "not vectorized: more than one data ref "
3643 "in stmt: ");
3644 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3645 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 3646 }
3c9feaca 3647
3648 if (bb_vinfo)
07e3bcbf 3649 break;
3c9feaca 3650
3d483a94 3651 if (gather || simd_lane_access)
16dfb112 3652 free_data_ref (dr);
fb85abff 3653 return false;
3654 }
f083cd24 3655
fb85abff 3656 STMT_VINFO_DATA_REF (stmt_info) = dr;
3d483a94 3657 if (simd_lane_access)
3658 {
3659 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
53c3c39b 3660 free_data_ref (datarefs[i]);
3d483a94 3661 datarefs[i] = dr;
3662 }
48e1416a 3663
fb85abff 3664 /* Set vectype for STMT. */
3665 scalar_type = TREE_TYPE (DR_REF (dr));
53c3c39b 3666 STMT_VINFO_VECTYPE (stmt_info)
3667 = get_vectype_for_scalar_type (scalar_type);
48e1416a 3668 if (!STMT_VINFO_VECTYPE (stmt_info))
fb85abff 3669 {
6d8fb6cf 3670 if (dump_enabled_p ())
fb85abff 3671 {
78bb46f5 3672 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7bd765d4 3673 "not vectorized: no vectype for stmt: ");
3674 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3675 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3676 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3677 scalar_type);
78bb46f5 3678 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 3679 }
6ea6a380 3680
3681 if (bb_vinfo)
07e3bcbf 3682 break;
16dfb112 3683
3d483a94 3684 if (gather || simd_lane_access)
16dfb112 3685 {
3686 STMT_VINFO_DATA_REF (stmt_info) = NULL;
53c3c39b 3687 if (gather)
3688 free_data_ref (dr);
16dfb112 3689 }
3690 return false;
fb85abff 3691 }
0bf5f81b 3692 else
3693 {
3694 if (dump_enabled_p ())
3695 {
3696 dump_printf_loc (MSG_NOTE, vect_location,
3697 "got vectype for stmt: ");
3698 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3699 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3700 STMT_VINFO_VECTYPE (stmt_info));
78bb46f5 3701 dump_printf (MSG_NOTE, "\n");
0bf5f81b 3702 }
3703 }
91a74fc6 3704
3705 /* Adjust the minimal vectorization factor according to the
3706 vector type. */
3707 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3708 if (vf > *min_vf)
3709 *min_vf = vf;
16dfb112 3710
3711 if (gather)
3712 {
16dfb112 3713 tree off;
16dfb112 3714
7a10f06b 3715 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3716 if (gather
3717 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3718 gather = false;
f634c3e9 3719 if (!gather)
16dfb112 3720 {
706567b8 3721 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3722 free_data_ref (dr);
6d8fb6cf 3723 if (dump_enabled_p ())
16dfb112 3724 {
7bd765d4 3725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3726 "not vectorized: not suitable for gather "
3727 "load ");
3728 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3729 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
16dfb112 3730 }
3731 return false;
3732 }
3733
f1f41a6c 3734 datarefs[i] = dr;
f634c3e9 3735 STMT_VINFO_GATHER_P (stmt_info) = true;
3736 }
3737 else if (loop_vinfo
3738 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3739 {
2dca30a9 3740 if (nested_in_vect_loop_p (loop, stmt)
3741 || !DR_IS_READ (dr))
f634c3e9 3742 {
6d8fb6cf 3743 if (dump_enabled_p ())
f634c3e9 3744 {
7bd765d4 3745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3746 "not vectorized: not suitable for strided "
3747 "load ");
3748 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
78bb46f5 3749 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f634c3e9 3750 }
3751 return false;
3752 }
3753 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
16dfb112 3754 }
fb85abff 3755 }
48e1416a 3756
07e3bcbf 3757 /* If we stopped analysis at the first dataref we could not analyze
3758 when trying to vectorize a basic-block mark the rest of the datarefs
3759 as not vectorizable and truncate the vector of datarefs. That
3760 avoids spending useless time in analyzing their dependence. */
3761 if (i != datarefs.length ())
3762 {
3763 gcc_assert (bb_vinfo != NULL);
3764 for (unsigned j = i; j < datarefs.length (); ++j)
3765 {
3766 data_reference_p dr = datarefs[j];
3767 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
3768 free_data_ref (dr);
3769 }
3770 datarefs.truncate (i);
3771 }
3772
fb85abff 3773 return true;
3774}
3775
3776
3777/* Function vect_get_new_vect_var.
3778
282bf14c 3779 Returns a name for a new variable. The current naming scheme appends the
48e1416a 3780 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3781 the name of vectorizer generated variables, and appends that to NAME if
fb85abff 3782 provided. */
3783
3784tree
3785vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3786{
3787 const char *prefix;
3788 tree new_vect_var;
3789
3790 switch (var_kind)
3791 {
3792 case vect_simple_var:
0bf5f81b 3793 prefix = "vect";
fb85abff 3794 break;
3795 case vect_scalar_var:
0bf5f81b 3796 prefix = "stmp";
fb85abff 3797 break;
3798 case vect_pointer_var:
0bf5f81b 3799 prefix = "vectp";
fb85abff 3800 break;
3801 default:
3802 gcc_unreachable ();
3803 }
3804
3805 if (name)
3806 {
0bf5f81b 3807 char* tmp = concat (prefix, "_", name, NULL);
35244493 3808 new_vect_var = create_tmp_reg (type, tmp);
fb85abff 3809 free (tmp);
3810 }
3811 else
35244493 3812 new_vect_var = create_tmp_reg (type, prefix);
fb85abff 3813
3814 return new_vect_var;
3815}
3816
3817
3818/* Function vect_create_addr_base_for_vector_ref.
3819
3820 Create an expression that computes the address of the first memory location
3821 that will be accessed for a data reference.
3822
3823 Input:
3824 STMT: The statement containing the data reference.
3825 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3826 OFFSET: Optional. If supplied, it is be added to the initial address.
3827 LOOP: Specify relative to which loop-nest should the address be computed.
3828 For example, when the dataref is in an inner-loop nested in an
3829 outer-loop that is now being vectorized, LOOP can be either the
282bf14c 3830 outer-loop, or the inner-loop. The first memory location accessed
fb85abff 3831 by the following dataref ('in' points to short):
3832
3833 for (i=0; i<N; i++)
3834 for (j=0; j<M; j++)
3835 s += in[i+j]
3836
3837 is as follows:
3838 if LOOP=i_loop: &in (relative to i_loop)
3839 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3840
3841 Output:
48e1416a 3842 1. Return an SSA_NAME whose value is the address of the memory location of
fb85abff 3843 the first vector of the data reference.
3844 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3845 these statement(s) which define the returned SSA_NAME.
3846
3847 FORNOW: We are only handling array accesses with step 1. */
3848
3849tree
3850vect_create_addr_base_for_vector_ref (gimple stmt,
3851 gimple_seq *new_stmt_list,
3852 tree offset,
3853 struct loop *loop)
3854{
3855 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3856 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
90d4c4af 3857 tree data_ref_base;
3c18ea71 3858 const char *base_name;
90d4c4af 3859 tree addr_base;
fb85abff 3860 tree dest;
3861 gimple_seq seq = NULL;
90d4c4af 3862 tree base_offset;
3863 tree init;
f083cd24 3864 tree vect_ptr_type;
fb85abff 3865 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
37545e54 3866 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
fb85abff 3867
37545e54 3868 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
fb85abff 3869 {
37545e54 3870 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 3871
37545e54 3872 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
fb85abff 3873
3874 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3875 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3876 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3877 }
90d4c4af 3878 else
3879 {
3880 data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3881 base_offset = unshare_expr (DR_OFFSET (dr));
3882 init = unshare_expr (DR_INIT (dr));
3883 }
fb85abff 3884
37545e54 3885 if (loop_vinfo)
3c18ea71 3886 base_name = get_name (data_ref_base);
37545e54 3887 else
3888 {
3889 base_offset = ssize_int (0);
3890 init = ssize_int (0);
3c18ea71 3891 base_name = get_name (DR_REF (dr));
48e1416a 3892 }
37545e54 3893
fb85abff 3894 /* Create base_offset */
3895 base_offset = size_binop (PLUS_EXPR,
3896 fold_convert (sizetype, base_offset),
3897 fold_convert (sizetype, init));
fb85abff 3898
3899 if (offset)
3900 {
fb85abff 3901 offset = fold_build2 (MULT_EXPR, sizetype,
3902 fold_convert (sizetype, offset), step);
3903 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3904 base_offset, offset);
fb85abff 3905 }
3906
3907 /* base + base_offset */
37545e54 3908 if (loop_vinfo)
2cc66f2a 3909 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
37545e54 3910 else
3911 {
182cf5a9 3912 addr_base = build1 (ADDR_EXPR,
3913 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3914 unshare_expr (DR_REF (dr)));
37545e54 3915 }
48e1416a 3916
fb85abff 3917 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
90d4c4af 3918 addr_base = fold_convert (vect_ptr_type, addr_base);
3919 dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
3920 addr_base = force_gimple_operand (addr_base, &seq, false, dest);
fb85abff 3921 gimple_seq_add_seq (new_stmt_list, seq);
3922
f544b9a4 3923 if (DR_PTR_INFO (dr)
90d4c4af 3924 && TREE_CODE (addr_base) == SSA_NAME)
1259ab70 3925 {
90d4c4af 3926 duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr));
1259ab70 3927 if (offset)
90d4c4af 3928 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
1259ab70 3929 }
f544b9a4 3930
6d8fb6cf 3931 if (dump_enabled_p ())
fb85abff 3932 {
7bd765d4 3933 dump_printf_loc (MSG_NOTE, vect_location, "created ");
90d4c4af 3934 dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
78bb46f5 3935 dump_printf (MSG_NOTE, "\n");
fb85abff 3936 }
f083cd24 3937
90d4c4af 3938 return addr_base;
fb85abff 3939}
3940
3941
3942/* Function vect_create_data_ref_ptr.
3943
bd5ba09f 3944 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3945 location accessed in the loop by STMT, along with the def-use update
3946 chain to appropriately advance the pointer through the loop iterations.
3947 Also set aliasing information for the pointer. This pointer is used by
3948 the callers to this function to create a memory reference expression for
3949 vector load/store access.
fb85abff 3950
3951 Input:
3952 1. STMT: a stmt that references memory. Expected to be of the form
3953 GIMPLE_ASSIGN <name, data-ref> or
3954 GIMPLE_ASSIGN <data-ref, name>.
bd5ba09f 3955 2. AGGR_TYPE: the type of the reference, which should be either a vector
3956 or an array.
3957 3. AT_LOOP: the loop where the vector memref is to be created.
3958 4. OFFSET (optional): an offset to be added to the initial address accessed
fb85abff 3959 by the data-ref in STMT.
bd5ba09f 3960 5. BSI: location where the new stmts are to be placed if there is no loop
3961 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
fb85abff 3962 pointing to the initial address.
fb85abff 3963
3964 Output:
3965 1. Declare a new ptr to vector_type, and have it point to the base of the
3966 data reference (initial addressed accessed by the data reference).
3967 For example, for vector of type V8HI, the following code is generated:
3968
bd5ba09f 3969 v8hi *ap;
3970 ap = (v8hi *)initial_address;
fb85abff 3971
3972 if OFFSET is not supplied:
3973 initial_address = &a[init];
3974 if OFFSET is supplied:
3975 initial_address = &a[init + OFFSET];
3976
3977 Return the initial_address in INITIAL_ADDRESS.
3978
3979 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
48e1416a 3980 update the pointer in each iteration of the loop.
fb85abff 3981
3982 Return the increment stmt that updates the pointer in PTR_INCR.
3983
48e1416a 3984 3. Set INV_P to true if the access pattern of the data reference in the
282bf14c 3985 vectorized loop is invariant. Set it to false otherwise.
fb85abff 3986
3987 4. Return the pointer. */
3988
3989tree
bd5ba09f 3990vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3991 tree offset, tree *initial_address,
3992 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3993 bool only_init, bool *inv_p)
fb85abff 3994{
3c18ea71 3995 const char *base_name;
fb85abff 3996 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3997 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 3998 struct loop *loop = NULL;
3999 bool nested_in_vect_loop = false;
4000 struct loop *containing_loop = NULL;
bd5ba09f 4001 tree aggr_ptr_type;
4002 tree aggr_ptr;
fb85abff 4003 tree new_temp;
4004 gimple vec_stmt;
4005 gimple_seq new_stmt_list = NULL;
37545e54 4006 edge pe = NULL;
fb85abff 4007 basic_block new_bb;
bd5ba09f 4008 tree aggr_ptr_init;
fb85abff 4009 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
bd5ba09f 4010 tree aptr;
fb85abff 4011 gimple_stmt_iterator incr_gsi;
4012 bool insert_after;
4013 tree indx_before_incr, indx_after_incr;
4014 gimple incr;
4015 tree step;
37545e54 4016 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
48e1416a 4017
bd5ba09f 4018 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
4019 || TREE_CODE (aggr_type) == VECTOR_TYPE);
4020
37545e54 4021 if (loop_vinfo)
4022 {
4023 loop = LOOP_VINFO_LOOP (loop_vinfo);
4024 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4025 containing_loop = (gimple_bb (stmt))->loop_father;
4026 pe = loop_preheader_edge (loop);
4027 }
4028 else
4029 {
4030 gcc_assert (bb_vinfo);
4031 only_init = true;
4032 *ptr_incr = NULL;
4033 }
48e1416a 4034
fb85abff 4035 /* Check the step (evolution) of the load in LOOP, and record
4036 whether it's invariant. */
4037 if (nested_in_vect_loop)
4038 step = STMT_VINFO_DR_STEP (stmt_info);
4039 else
4040 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
48e1416a 4041
8bbe6b75 4042 if (integer_zerop (step))
fb85abff 4043 *inv_p = true;
4044 else
4045 *inv_p = false;
4046
4047 /* Create an expression for the first address accessed by this load
48e1416a 4048 in LOOP. */
3c18ea71 4049 base_name = get_name (DR_BASE_ADDRESS (dr));
fb85abff 4050
6d8fb6cf 4051 if (dump_enabled_p ())
fb85abff 4052 {
3c18ea71 4053 tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
7bd765d4 4054 dump_printf_loc (MSG_NOTE, vect_location,
4055 "create %s-pointer variable to type: ",
f3d35d4d 4056 get_tree_code_name (TREE_CODE (aggr_type)));
7bd765d4 4057 dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
3c18ea71 4058 if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
7bd765d4 4059 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
19bacd59 4060 else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
4061 dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
3c18ea71 4062 else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
7bd765d4 4063 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
3c18ea71 4064 else
7bd765d4 4065 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
3c18ea71 4066 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
78bb46f5 4067 dump_printf (MSG_NOTE, "\n");
fb85abff 4068 }
4069
90d4c4af 4070 /* (1) Create the new aggregate-pointer variable.
4071 Vector and array types inherit the alias set of their component
bd5ba09f 4072 type by default so we need to use a ref-all pointer if the data
4073 reference does not conflict with the created aggregated data
4074 reference because it is not addressable. */
90d4c4af 4075 bool need_ref_all = false;
4076 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
a34701c9 4077 get_alias_set (DR_REF (dr))))
90d4c4af 4078 need_ref_all = true;
a34701c9 4079 /* Likewise for any of the data references in the stmt group. */
21009880 4080 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
fb85abff 4081 {
21009880 4082 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
dd277d48 4083 do
4084 {
90d4c4af 4085 stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
4086 struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4087 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4088 get_alias_set (DR_REF (sdr))))
dd277d48 4089 {
90d4c4af 4090 need_ref_all = true;
dd277d48 4091 break;
4092 }
90d4c4af 4093 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
dd277d48 4094 }
4095 while (orig_stmt);
fb85abff 4096 }
90d4c4af 4097 aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4098 need_ref_all);
4099 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4100
fb85abff 4101
282bf14c 4102 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4103 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4104 def-use update cycles for the pointer: one relative to the outer-loop
4105 (LOOP), which is what steps (3) and (4) below do. The other is relative
4106 to the inner-loop (which is the inner-most loop containing the dataref),
4107 and this is done be step (5) below.
fb85abff 4108
282bf14c 4109 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4110 inner-most loop, and so steps (3),(4) work the same, and step (5) is
4111 redundant. Steps (3),(4) create the following:
fb85abff 4112
4113 vp0 = &base_addr;
4114 LOOP: vp1 = phi(vp0,vp2)
48e1416a 4115 ...
fb85abff 4116 ...
4117 vp2 = vp1 + step
4118 goto LOOP
48e1416a 4119
282bf14c 4120 If there is an inner-loop nested in loop, then step (5) will also be
4121 applied, and an additional update in the inner-loop will be created:
fb85abff 4122
4123 vp0 = &base_addr;
4124 LOOP: vp1 = phi(vp0,vp2)
4125 ...
4126 inner: vp3 = phi(vp1,vp4)
4127 vp4 = vp3 + inner_step
4128 if () goto inner
4129 ...
4130 vp2 = vp1 + step
4131 if () goto LOOP */
4132
bd5ba09f 4133 /* (2) Calculate the initial address of the aggregate-pointer, and set
4134 the aggregate-pointer to point to it before the loop. */
fb85abff 4135
4136 /* Create: (&(base[init_val+offset]) in the loop preheader. */
4137
4138 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
4139 offset, loop);
fb85abff 4140 if (new_stmt_list)
4141 {
37545e54 4142 if (pe)
4143 {
4144 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4145 gcc_assert (!new_bb);
4146 }
4147 else
bee862b6 4148 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
fb85abff 4149 }
4150
4151 *initial_address = new_temp;
4152
bd5ba09f 4153 /* Create: p = (aggr_type *) initial_base */
f544b9a4 4154 if (TREE_CODE (new_temp) != SSA_NAME
bd5ba09f 4155 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
37545e54 4156 {
bd5ba09f 4157 vec_stmt = gimple_build_assign (aggr_ptr,
4158 fold_convert (aggr_ptr_type, new_temp));
4159 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
f544b9a4 4160 /* Copy the points-to information if it exists. */
4161 if (DR_PTR_INFO (dr))
bd5ba09f 4162 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
4163 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
f544b9a4 4164 if (pe)
4165 {
4166 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
4167 gcc_assert (!new_bb);
4168 }
4169 else
bee862b6 4170 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
37545e54 4171 }
4172 else
bd5ba09f 4173 aggr_ptr_init = new_temp;
fb85abff 4174
bd5ba09f 4175 /* (3) Handle the updating of the aggregate-pointer inside the loop.
282bf14c 4176 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4177 inner-loop nested in LOOP (during outer-loop vectorization). */
fb85abff 4178
37545e54 4179 /* No update in loop is required. */
48e1416a 4180 if (only_init && (!loop_vinfo || at_loop == loop))
bd5ba09f 4181 aptr = aggr_ptr_init;
fb85abff 4182 else
4183 {
bd5ba09f 4184 /* The step of the aggregate pointer is the type size. */
8bbe6b75 4185 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
48e1416a 4186 /* One exception to the above is when the scalar step of the load in
fb85abff 4187 LOOP is zero. In this case the step here is also zero. */
4188 if (*inv_p)
8bbe6b75 4189 iv_step = size_zero_node;
4190 else if (tree_int_cst_sgn (step) == -1)
4191 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
fb85abff 4192
4193 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4194
bd5ba09f 4195 create_iv (aggr_ptr_init,
8bbe6b75 4196 fold_convert (aggr_ptr_type, iv_step),
bd5ba09f 4197 aggr_ptr, loop, &incr_gsi, insert_after,
fb85abff 4198 &indx_before_incr, &indx_after_incr);
4199 incr = gsi_stmt (incr_gsi);
37545e54 4200 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
fb85abff 4201
4202 /* Copy the points-to information if it exists. */
4203 if (DR_PTR_INFO (dr))
4204 {
4205 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4206 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4207 }
fb85abff 4208 if (ptr_incr)
4209 *ptr_incr = incr;
4210
bd5ba09f 4211 aptr = indx_before_incr;
fb85abff 4212 }
4213
4214 if (!nested_in_vect_loop || only_init)
bd5ba09f 4215 return aptr;
fb85abff 4216
4217
bd5ba09f 4218 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
282bf14c 4219 nested in LOOP, if exists. */
fb85abff 4220
4221 gcc_assert (nested_in_vect_loop);
4222 if (!only_init)
4223 {
4224 standard_iv_increment_position (containing_loop, &incr_gsi,
4225 &insert_after);
bd5ba09f 4226 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
fb85abff 4227 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4228 &indx_after_incr);
4229 incr = gsi_stmt (incr_gsi);
37545e54 4230 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
fb85abff 4231
4232 /* Copy the points-to information if it exists. */
4233 if (DR_PTR_INFO (dr))
4234 {
4235 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4236 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4237 }
fb85abff 4238 if (ptr_incr)
4239 *ptr_incr = incr;
4240
48e1416a 4241 return indx_before_incr;
fb85abff 4242 }
4243 else
4244 gcc_unreachable ();
4245}
4246
4247
4248/* Function bump_vector_ptr
4249
4250 Increment a pointer (to a vector type) by vector-size. If requested,
48e1416a 4251 i.e. if PTR-INCR is given, then also connect the new increment stmt
fb85abff 4252 to the existing def-use update-chain of the pointer, by modifying
4253 the PTR_INCR as illustrated below:
4254
4255 The pointer def-use update-chain before this function:
4256 DATAREF_PTR = phi (p_0, p_2)
4257 ....
48e1416a 4258 PTR_INCR: p_2 = DATAREF_PTR + step
fb85abff 4259
4260 The pointer def-use update-chain after this function:
4261 DATAREF_PTR = phi (p_0, p_2)
4262 ....
4263 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4264 ....
4265 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4266
4267 Input:
48e1416a 4268 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
fb85abff 4269 in the loop.
48e1416a 4270 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
fb85abff 4271 the loop. The increment amount across iterations is expected
48e1416a 4272 to be vector_size.
fb85abff 4273 BSI - location where the new update stmt is to be placed.
4274 STMT - the original scalar memory-access stmt that is being vectorized.
4275 BUMP - optional. The offset by which to bump the pointer. If not given,
4276 the offset is assumed to be vector_size.
4277
4278 Output: Return NEW_DATAREF_PTR as illustrated above.
48e1416a 4279
fb85abff 4280*/
4281
4282tree
4283bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
4284 gimple stmt, tree bump)
4285{
4286 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4287 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4288 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 4289 tree update = TYPE_SIZE_UNIT (vectype);
4290 gimple incr_stmt;
4291 ssa_op_iter iter;
4292 use_operand_p use_p;
4293 tree new_dataref_ptr;
4294
4295 if (bump)
4296 update = bump;
48e1416a 4297
874117c8 4298 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
4299 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
fb85abff 4300 dataref_ptr, update);
fb85abff 4301 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4302
4303 /* Copy the points-to information if it exists. */
4304 if (DR_PTR_INFO (dr))
1259ab70 4305 {
4306 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
ceea063b 4307 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
1259ab70 4308 }
fb85abff 4309
4310 if (!ptr_incr)
4311 return new_dataref_ptr;
4312
4313 /* Update the vector-pointer's cross-iteration increment. */
4314 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4315 {
4316 tree use = USE_FROM_PTR (use_p);
4317
4318 if (use == dataref_ptr)
4319 SET_USE (use_p, new_dataref_ptr);
4320 else
4321 gcc_assert (tree_int_cst_compare (use, update) == 0);
4322 }
4323
4324 return new_dataref_ptr;
4325}
4326
4327
4328/* Function vect_create_destination_var.
4329
4330 Create a new temporary of type VECTYPE. */
4331
4332tree
4333vect_create_destination_var (tree scalar_dest, tree vectype)
4334{
4335 tree vec_dest;
0bf5f81b 4336 const char *name;
4337 char *new_name;
fb85abff 4338 tree type;
4339 enum vect_var_kind kind;
4340
4341 kind = vectype ? vect_simple_var : vect_scalar_var;
4342 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4343
4344 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4345
0bf5f81b 4346 name = get_name (scalar_dest);
4347 if (name)
4348 asprintf (&new_name, "%s_%u", name, SSA_NAME_VERSION (scalar_dest));
4349 else
4350 asprintf (&new_name, "_%u", SSA_NAME_VERSION (scalar_dest));
fb85abff 4351 vec_dest = vect_get_new_vect_var (type, kind, new_name);
0bf5f81b 4352 free (new_name);
fb85abff 4353
4354 return vec_dest;
4355}
4356
ee612634 4357/* Function vect_grouped_store_supported.
fb85abff 4358
42f6a6e8 4359 Returns TRUE if interleave high and interleave low permutations
4360 are supported, and FALSE otherwise. */
fb85abff 4361
4362bool
ee612634 4363vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
fb85abff 4364{
42f6a6e8 4365 enum machine_mode mode = TYPE_MODE (vectype);
48e1416a 4366
481fc474 4367 /* vect_permute_store_chain requires the group size to be a power of two. */
4368 if (exact_log2 (count) == -1)
4369 {
6d8fb6cf 4370 if (dump_enabled_p ())
7bd765d4 4371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4372 "the size of the group of accesses"
78bb46f5 4373 " is not a power of 2\n");
481fc474 4374 return false;
4375 }
4376
42f6a6e8 4377 /* Check that the permutation is supported. */
8bec2124 4378 if (VECTOR_MODE_P (mode))
4379 {
4380 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4381 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4382 for (i = 0; i < nelt / 2; i++)
4383 {
4384 sel[i * 2] = i;
4385 sel[i * 2 + 1] = i + nelt;
4386 }
4387 if (can_vec_perm_p (mode, false, sel))
4388 {
4389 for (i = 0; i < nelt; i++)
4390 sel[i] += nelt / 2;
4391 if (can_vec_perm_p (mode, false, sel))
4392 return true;
4393 }
4394 }
fb85abff 4395
6d8fb6cf 4396 if (dump_enabled_p ())
7bd765d4 4397 dump_printf (MSG_MISSED_OPTIMIZATION,
78bb46f5 4398 "interleave op not supported by target.\n");
6620d7d7 4399 return false;
fb85abff 4400}
4401
4402
94b7b4dd 4403/* Return TRUE if vec_store_lanes is available for COUNT vectors of
4404 type VECTYPE. */
4405
4406bool
4407vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4408{
4409 return vect_lanes_optab_supported_p ("vec_store_lanes",
4410 vec_store_lanes_optab,
4411 vectype, count);
4412}
4413
4414
fb85abff 4415/* Function vect_permute_store_chain.
4416
4417 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
48e1416a 4418 a power of 2, generate interleave_high/low stmts to reorder the data
282bf14c 4419 correctly for the stores. Return the final references for stores in
fb85abff 4420 RESULT_CHAIN.
4421
4422 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
282bf14c 4423 The input is 4 vectors each containing 8 elements. We assign a number to
4424 each element, the input sequence is:
fb85abff 4425
4426 1st vec: 0 1 2 3 4 5 6 7
4427 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 4428 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 4429 4th vec: 24 25 26 27 28 29 30 31
4430
4431 The output sequence should be:
4432
4433 1st vec: 0 8 16 24 1 9 17 25
4434 2nd vec: 2 10 18 26 3 11 19 27
4435 3rd vec: 4 12 20 28 5 13 21 30
4436 4th vec: 6 14 22 30 7 15 23 31
4437
4438 i.e., we interleave the contents of the four vectors in their order.
4439
282bf14c 4440 We use interleave_high/low instructions to create such output. The input of
fb85abff 4441 each interleave_high/low operation is two vectors:
48e1416a 4442 1st vec 2nd vec
4443 0 1 2 3 4 5 6 7
4444 the even elements of the result vector are obtained left-to-right from the
282bf14c 4445 high/low elements of the first vector. The odd elements of the result are
fb85abff 4446 obtained left-to-right from the high/low elements of the second vector.
4447 The output of interleave_high will be: 0 4 1 5
4448 and of interleave_low: 2 6 3 7
4449
48e1416a 4450
282bf14c 4451 The permutation is done in log LENGTH stages. In each stage interleave_high
48e1416a 4452 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4453 where the first argument is taken from the first half of DR_CHAIN and the
4454 second argument from it's second half.
4455 In our example,
fb85abff 4456
4457 I1: interleave_high (1st vec, 3rd vec)
4458 I2: interleave_low (1st vec, 3rd vec)
4459 I3: interleave_high (2nd vec, 4th vec)
4460 I4: interleave_low (2nd vec, 4th vec)
4461
4462 The output for the first stage is:
4463
4464 I1: 0 16 1 17 2 18 3 19
4465 I2: 4 20 5 21 6 22 7 23
4466 I3: 8 24 9 25 10 26 11 27
4467 I4: 12 28 13 29 14 30 15 31
4468
4469 The output of the second stage, i.e. the final result is:
4470
4471 I1: 0 8 16 24 1 9 17 25
4472 I2: 2 10 18 26 3 11 19 27
4473 I3: 4 12 20 28 5 13 21 30
4474 I4: 6 14 22 30 7 15 23 31. */
48e1416a 4475
481fc474 4476void
f1f41a6c 4477vect_permute_store_chain (vec<tree> dr_chain,
48e1416a 4478 unsigned int length,
fb85abff 4479 gimple stmt,
4480 gimple_stmt_iterator *gsi,
f1f41a6c 4481 vec<tree> *result_chain)
fb85abff 4482{
03d37e4e 4483 tree vect1, vect2, high, low;
fb85abff 4484 gimple perm_stmt;
4485 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
8bec2124 4486 tree perm_mask_low, perm_mask_high;
4487 unsigned int i, n;
42f6a6e8 4488 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
8bec2124 4489 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
48e1416a 4490
f40aaf2d 4491 result_chain->quick_grow (length);
4492 memcpy (result_chain->address (), dr_chain.address (),
4493 length * sizeof (tree));
fb85abff 4494
8bec2124 4495 for (i = 0, n = nelt / 2; i < n; i++)
4496 {
4497 sel[i * 2] = i;
4498 sel[i * 2 + 1] = i + nelt;
4499 }
4500 perm_mask_high = vect_gen_perm_mask (vectype, sel);
42f6a6e8 4501 gcc_assert (perm_mask_high != NULL);
4502
8bec2124 4503 for (i = 0; i < nelt; i++)
4504 sel[i] += nelt / 2;
4505 perm_mask_low = vect_gen_perm_mask (vectype, sel);
42f6a6e8 4506 gcc_assert (perm_mask_low != NULL);
8bec2124 4507
4508 for (i = 0, n = exact_log2 (length); i < n; i++)
fb85abff 4509 {
4510 for (j = 0; j < length/2; j++)
4511 {
f1f41a6c 4512 vect1 = dr_chain[j];
4513 vect2 = dr_chain[j+length/2];
fb85abff 4514
4515 /* Create interleaving stmt:
8bec2124 4516 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
03d37e4e 4517 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
8bec2124 4518 perm_stmt
446e85eb 4519 = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4520 vect1, vect2, perm_mask_high);
fb85abff 4521 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
f1f41a6c 4522 (*result_chain)[2*j] = high;
fb85abff 4523
4524 /* Create interleaving stmt:
8bec2124 4525 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4526 nelt*3/2+1, ...}> */
03d37e4e 4527 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
8bec2124 4528 perm_stmt
446e85eb 4529 = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4530 vect1, vect2, perm_mask_low);
fb85abff 4531 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
f1f41a6c 4532 (*result_chain)[2*j+1] = low;
fb85abff 4533 }
f40aaf2d 4534 memcpy (dr_chain.address (), result_chain->address (),
4535 length * sizeof (tree));
fb85abff 4536 }
fb85abff 4537}
4538
4539/* Function vect_setup_realignment
48e1416a 4540
fb85abff 4541 This function is called when vectorizing an unaligned load using
4542 the dr_explicit_realign[_optimized] scheme.
4543 This function generates the following code at the loop prolog:
4544
4545 p = initial_addr;
4546 x msq_init = *(floor(p)); # prolog load
48e1416a 4547 realignment_token = call target_builtin;
fb85abff 4548 loop:
4549 x msq = phi (msq_init, ---)
4550
48e1416a 4551 The stmts marked with x are generated only for the case of
fb85abff 4552 dr_explicit_realign_optimized.
4553
48e1416a 4554 The code above sets up a new (vector) pointer, pointing to the first
fb85abff 4555 location accessed by STMT, and a "floor-aligned" load using that pointer.
4556 It also generates code to compute the "realignment-token" (if the relevant
4557 target hook was defined), and creates a phi-node at the loop-header bb
4558 whose arguments are the result of the prolog-load (created by this
4559 function) and the result of a load that takes place in the loop (to be
4560 created by the caller to this function).
4561
4562 For the case of dr_explicit_realign_optimized:
48e1416a 4563 The caller to this function uses the phi-result (msq) to create the
fb85abff 4564 realignment code inside the loop, and sets up the missing phi argument,
4565 as follows:
48e1416a 4566 loop:
fb85abff 4567 msq = phi (msq_init, lsq)
4568 lsq = *(floor(p')); # load in loop
4569 result = realign_load (msq, lsq, realignment_token);
4570
4571 For the case of dr_explicit_realign:
4572 loop:
4573 msq = *(floor(p)); # load in loop
4574 p' = p + (VS-1);
4575 lsq = *(floor(p')); # load in loop
4576 result = realign_load (msq, lsq, realignment_token);
4577
4578 Input:
4579 STMT - (scalar) load stmt to be vectorized. This load accesses
4580 a memory location that may be unaligned.
4581 BSI - place where new code is to be inserted.
4582 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
48e1416a 4583 is used.
4584
fb85abff 4585 Output:
4586 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4587 target hook, if defined.
4588 Return value - the result of the loop-header phi node. */
4589
4590tree
4591vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4592 tree *realignment_token,
4593 enum dr_alignment_support alignment_support_scheme,
4594 tree init_addr,
4595 struct loop **at_loop)
4596{
4597 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4598 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4599 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2cb9ef39 4600 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
ad074595 4601 struct loop *loop = NULL;
4602 edge pe = NULL;
fb85abff 4603 tree scalar_dest = gimple_assign_lhs (stmt);
4604 tree vec_dest;
4605 gimple inc;
4606 tree ptr;
4607 tree data_ref;
4608 gimple new_stmt;
4609 basic_block new_bb;
4610 tree msq_init = NULL_TREE;
4611 tree new_temp;
4612 gimple phi_stmt;
4613 tree msq = NULL_TREE;
4614 gimple_seq stmts = NULL;
4615 bool inv_p;
4616 bool compute_in_loop = false;
ad074595 4617 bool nested_in_vect_loop = false;
fb85abff 4618 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
ad074595 4619 struct loop *loop_for_initial_load = NULL;
4620
4621 if (loop_vinfo)
4622 {
4623 loop = LOOP_VINFO_LOOP (loop_vinfo);
4624 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4625 }
fb85abff 4626
4627 gcc_assert (alignment_support_scheme == dr_explicit_realign
4628 || alignment_support_scheme == dr_explicit_realign_optimized);
4629
4630 /* We need to generate three things:
4631 1. the misalignment computation
4632 2. the extra vector load (for the optimized realignment scheme).
4633 3. the phi node for the two vectors from which the realignment is
282bf14c 4634 done (for the optimized realignment scheme). */
fb85abff 4635
4636 /* 1. Determine where to generate the misalignment computation.
4637
4638 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4639 calculation will be generated by this function, outside the loop (in the
4640 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4641 caller, inside the loop.
4642
4643 Background: If the misalignment remains fixed throughout the iterations of
4644 the loop, then both realignment schemes are applicable, and also the
4645 misalignment computation can be done outside LOOP. This is because we are
4646 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4647 are a multiple of VS (the Vector Size), and therefore the misalignment in
4648 different vectorized LOOP iterations is always the same.
4649 The problem arises only if the memory access is in an inner-loop nested
4650 inside LOOP, which is now being vectorized using outer-loop vectorization.
4651 This is the only case when the misalignment of the memory access may not
4652 remain fixed throughout the iterations of the inner-loop (as explained in
4653 detail in vect_supportable_dr_alignment). In this case, not only is the
4654 optimized realignment scheme not applicable, but also the misalignment
4655 computation (and generation of the realignment token that is passed to
4656 REALIGN_LOAD) have to be done inside the loop.
4657
4658 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4659 or not, which in turn determines if the misalignment is computed inside
4660 the inner-loop, or outside LOOP. */
4661
ad074595 4662 if (init_addr != NULL_TREE || !loop_vinfo)
fb85abff 4663 {
4664 compute_in_loop = true;
4665 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4666 }
4667
4668
4669 /* 2. Determine where to generate the extra vector load.
4670
4671 For the optimized realignment scheme, instead of generating two vector
4672 loads in each iteration, we generate a single extra vector load in the
4673 preheader of the loop, and in each iteration reuse the result of the
4674 vector load from the previous iteration. In case the memory access is in
4675 an inner-loop nested inside LOOP, which is now being vectorized using
4676 outer-loop vectorization, we need to determine whether this initial vector
4677 load should be generated at the preheader of the inner-loop, or can be
4678 generated at the preheader of LOOP. If the memory access has no evolution
4679 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4680 to be generated inside LOOP (in the preheader of the inner-loop). */
4681
4682 if (nested_in_vect_loop)
4683 {
4684 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4685 bool invariant_in_outerloop =
4686 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4687 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4688 }
4689 else
4690 loop_for_initial_load = loop;
4691 if (at_loop)
4692 *at_loop = loop_for_initial_load;
4693
ad074595 4694 if (loop_for_initial_load)
4695 pe = loop_preheader_edge (loop_for_initial_load);
4696
fb85abff 4697 /* 3. For the case of the optimized realignment, create the first vector
4698 load at the loop preheader. */
4699
4700 if (alignment_support_scheme == dr_explicit_realign_optimized)
4701 {
4702 /* Create msq_init = *(floor(p1)) in the loop preheader */
4703
4704 gcc_assert (!compute_in_loop);
fb85abff 4705 vec_dest = vect_create_destination_var (scalar_dest, vectype);
bd5ba09f 4706 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4707 NULL_TREE, &init_addr, NULL, &inc,
4708 true, &inv_p);
874117c8 4709 new_temp = copy_ssa_name (ptr, NULL);
86638c2e 4710 new_stmt = gimple_build_assign_with_ops
874117c8 4711 (BIT_AND_EXPR, new_temp, ptr,
86638c2e 4712 build_int_cst (TREE_TYPE (ptr),
4713 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
86638c2e 4714 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4715 gcc_assert (!new_bb);
2cb9ef39 4716 data_ref
4717 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4718 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
fb85abff 4719 new_stmt = gimple_build_assign (vec_dest, data_ref);
4720 new_temp = make_ssa_name (vec_dest, new_stmt);
4721 gimple_assign_set_lhs (new_stmt, new_temp);
ad074595 4722 if (pe)
4723 {
4724 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4725 gcc_assert (!new_bb);
4726 }
4727 else
4728 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4729
fb85abff 4730 msq_init = gimple_assign_lhs (new_stmt);
4731 }
4732
4733 /* 4. Create realignment token using a target builtin, if available.
4734 It is done either inside the containing loop, or before LOOP (as
4735 determined above). */
4736
4737 if (targetm.vectorize.builtin_mask_for_load)
4738 {
4739 tree builtin_decl;
4740
4741 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
ad074595 4742 if (!init_addr)
fb85abff 4743 {
4744 /* Generate the INIT_ADDR computation outside LOOP. */
4745 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4746 NULL_TREE, loop);
ad074595 4747 if (loop)
4748 {
4749 pe = loop_preheader_edge (loop);
4750 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4751 gcc_assert (!new_bb);
4752 }
4753 else
4754 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
fb85abff 4755 }
4756
4757 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4758 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4759 vec_dest =
4760 vect_create_destination_var (scalar_dest,
4761 gimple_call_return_type (new_stmt));
4762 new_temp = make_ssa_name (vec_dest, new_stmt);
4763 gimple_call_set_lhs (new_stmt, new_temp);
4764
4765 if (compute_in_loop)
4766 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4767 else
4768 {
4769 /* Generate the misalignment computation outside LOOP. */
4770 pe = loop_preheader_edge (loop);
4771 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4772 gcc_assert (!new_bb);
4773 }
4774
4775 *realignment_token = gimple_call_lhs (new_stmt);
4776
4777 /* The result of the CALL_EXPR to this builtin is determined from
4778 the value of the parameter and no global variables are touched
4779 which makes the builtin a "const" function. Requiring the
4780 builtin to have the "const" attribute makes it unnecessary
4781 to call mark_call_clobbered. */
4782 gcc_assert (TREE_READONLY (builtin_decl));
4783 }
4784
4785 if (alignment_support_scheme == dr_explicit_realign)
4786 return msq;
4787
4788 gcc_assert (!compute_in_loop);
4789 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4790
4791
4792 /* 5. Create msq = phi <msq_init, lsq> in loop */
4793
4794 pe = loop_preheader_edge (containing_loop);
4795 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4796 msq = make_ssa_name (vec_dest, NULL);
4797 phi_stmt = create_phi_node (msq, containing_loop->header);
60d535d2 4798 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
fb85abff 4799
4800 return msq;
4801}
4802
4803
ee612634 4804/* Function vect_grouped_load_supported.
fb85abff 4805
42f6a6e8 4806 Returns TRUE if even and odd permutations are supported,
fb85abff 4807 and FALSE otherwise. */
4808
4809bool
ee612634 4810vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
fb85abff 4811{
42f6a6e8 4812 enum machine_mode mode = TYPE_MODE (vectype);
fb85abff 4813
1e1bca71 4814 /* vect_permute_load_chain requires the group size to be equal to 3 or
4815 be a power of two. */
4816 if (count != 3 && exact_log2 (count) == -1)
481fc474 4817 {
6d8fb6cf 4818 if (dump_enabled_p ())
7bd765d4 4819 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1e1bca71 4820 "the size of the group of accesses"
4821 " is not a power of 2 or not equal to 3\n");
481fc474 4822 return false;
4823 }
4824
42f6a6e8 4825 /* Check that the permutation is supported. */
4826 if (VECTOR_MODE_P (mode))
4827 {
1e1bca71 4828 unsigned int i, j, nelt = GET_MODE_NUNITS (mode);
42f6a6e8 4829 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
fb85abff 4830
1e1bca71 4831 if (count == 3)
42f6a6e8 4832 {
1e1bca71 4833 unsigned int k;
4834 for (k = 0; k < 3; k++)
4835 {
4836 for (i = 0; i < nelt; i++)
4837 if (3 * i + k < 2 * nelt)
4838 sel[i] = 3 * i + k;
4839 else
4840 sel[i] = 0;
4841 if (!can_vec_perm_p (mode, false, sel))
4842 {
4843 if (dump_enabled_p ())
4844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4845 "shuffle of 3 loads is not supported by"
4846 " target\n");
4847 return false;
4848 }
4849 for (i = 0, j = 0; i < nelt; i++)
4850 if (3 * i + k < 2 * nelt)
4851 sel[i] = i;
4852 else
4853 sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
4854 if (!can_vec_perm_p (mode, false, sel))
4855 {
4856 if (dump_enabled_p ())
4857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4858 "shuffle of 3 loads is not supported by"
4859 " target\n");
4860 return false;
4861 }
4862 }
4863 return true;
4864 }
4865 else
4866 {
4867 /* If length is not equal to 3 then only power of 2 is supported. */
4868 gcc_assert (exact_log2 (count) != -1);
42f6a6e8 4869 for (i = 0; i < nelt; i++)
1e1bca71 4870 sel[i] = i * 2;
42f6a6e8 4871 if (can_vec_perm_p (mode, false, sel))
1e1bca71 4872 {
4873 for (i = 0; i < nelt; i++)
4874 sel[i] = i * 2 + 1;
4875 if (can_vec_perm_p (mode, false, sel))
4876 return true;
4877 }
4878 }
42f6a6e8 4879 }
fb85abff 4880
6d8fb6cf 4881 if (dump_enabled_p ())
7bd765d4 4882 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1e1bca71 4883 "extract even/odd not supported by target\n");
6620d7d7 4884 return false;
fb85abff 4885}
4886
94b7b4dd 4887/* Return TRUE if vec_load_lanes is available for COUNT vectors of
4888 type VECTYPE. */
4889
4890bool
4891vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4892{
4893 return vect_lanes_optab_supported_p ("vec_load_lanes",
4894 vec_load_lanes_optab,
4895 vectype, count);
4896}
fb85abff 4897
4898/* Function vect_permute_load_chain.
4899
4900 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
1e1bca71 4901 a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
4902 the input data correctly. Return the final references for loads in
4903 RESULT_CHAIN.
fb85abff 4904
4905 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4906 The input is 4 vectors each containing 8 elements. We assign a number to each
4907 element, the input sequence is:
4908
4909 1st vec: 0 1 2 3 4 5 6 7
4910 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 4911 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 4912 4th vec: 24 25 26 27 28 29 30 31
4913
4914 The output sequence should be:
4915
4916 1st vec: 0 4 8 12 16 20 24 28
4917 2nd vec: 1 5 9 13 17 21 25 29
48e1416a 4918 3rd vec: 2 6 10 14 18 22 26 30
fb85abff 4919 4th vec: 3 7 11 15 19 23 27 31
4920
4921 i.e., the first output vector should contain the first elements of each
4922 interleaving group, etc.
4923
282bf14c 4924 We use extract_even/odd instructions to create such output. The input of
4925 each extract_even/odd operation is two vectors
48e1416a 4926 1st vec 2nd vec
4927 0 1 2 3 4 5 6 7
fb85abff 4928
282bf14c 4929 and the output is the vector of extracted even/odd elements. The output of
fb85abff 4930 extract_even will be: 0 2 4 6
4931 and of extract_odd: 1 3 5 7
4932
48e1416a 4933
282bf14c 4934 The permutation is done in log LENGTH stages. In each stage extract_even
4935 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4936 their order. In our example,
fb85abff 4937
4938 E1: extract_even (1st vec, 2nd vec)
4939 E2: extract_odd (1st vec, 2nd vec)
4940 E3: extract_even (3rd vec, 4th vec)
4941 E4: extract_odd (3rd vec, 4th vec)
4942
4943 The output for the first stage will be:
4944
4945 E1: 0 2 4 6 8 10 12 14
4946 E2: 1 3 5 7 9 11 13 15
48e1416a 4947 E3: 16 18 20 22 24 26 28 30
fb85abff 4948 E4: 17 19 21 23 25 27 29 31
4949
4950 In order to proceed and create the correct sequence for the next stage (or
48e1416a 4951 for the correct output, if the second stage is the last one, as in our
4952 example), we first put the output of extract_even operation and then the
fb85abff 4953 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4954 The input for the second stage is:
4955
4956 1st vec (E1): 0 2 4 6 8 10 12 14
48e1416a 4957 2nd vec (E3): 16 18 20 22 24 26 28 30
4958 3rd vec (E2): 1 3 5 7 9 11 13 15
fb85abff 4959 4th vec (E4): 17 19 21 23 25 27 29 31
4960
4961 The output of the second stage:
4962
4963 E1: 0 4 8 12 16 20 24 28
4964 E2: 2 6 10 14 18 22 26 30
4965 E3: 1 5 9 13 17 21 25 29
4966 E4: 3 7 11 15 19 23 27 31
4967
4968 And RESULT_CHAIN after reordering:
4969
4970 1st vec (E1): 0 4 8 12 16 20 24 28
4971 2nd vec (E3): 1 5 9 13 17 21 25 29
48e1416a 4972 3rd vec (E2): 2 6 10 14 18 22 26 30
fb85abff 4973 4th vec (E4): 3 7 11 15 19 23 27 31. */
4974
481fc474 4975static void
f1f41a6c 4976vect_permute_load_chain (vec<tree> dr_chain,
48e1416a 4977 unsigned int length,
fb85abff 4978 gimple stmt,
4979 gimple_stmt_iterator *gsi,
f1f41a6c 4980 vec<tree> *result_chain)
fb85abff 4981{
03d37e4e 4982 tree data_ref, first_vect, second_vect;
42f6a6e8 4983 tree perm_mask_even, perm_mask_odd;
1e1bca71 4984 tree perm3_mask_low, perm3_mask_high;
fb85abff 4985 gimple perm_stmt;
4986 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
42f6a6e8 4987 unsigned int i, j, log_length = exact_log2 (length);
4988 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4989 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
fb85abff 4990
1648f21f 4991 result_chain->quick_grow (length);
4992 memcpy (result_chain->address (), dr_chain.address (),
4993 length * sizeof (tree));
42f6a6e8 4994
1e1bca71 4995 if (length == 3)
fb85abff 4996 {
1e1bca71 4997 unsigned int k;
fb85abff 4998
1e1bca71 4999 for (k = 0; k < 3; k++)
5000 {
5001 for (i = 0; i < nelt; i++)
5002 if (3 * i + k < 2 * nelt)
5003 sel[i] = 3 * i + k;
5004 else
5005 sel[i] = 0;
5006 perm3_mask_low = vect_gen_perm_mask (vectype, sel);
5007 gcc_assert (perm3_mask_low != NULL);
5008
5009 for (i = 0, j = 0; i < nelt; i++)
5010 if (3 * i + k < 2 * nelt)
5011 sel[i] = i;
5012 else
5013 sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
5014
5015 perm3_mask_high = vect_gen_perm_mask (vectype, sel);
5016 gcc_assert (perm3_mask_high != NULL);
5017
5018 first_vect = dr_chain[0];
5019 second_vect = dr_chain[1];
5020
5021 /* Create interleaving stmt (low part of):
5022 low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5023 ...}> */
5024 data_ref = make_temp_ssa_name (vectype, NULL, "vect_suffle3_low");
446e85eb 5025 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5026 first_vect, second_vect,
1e1bca71 5027 perm3_mask_low);
fb85abff 5028 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
48e1416a 5029
1e1bca71 5030 /* Create interleaving stmt (high part of):
5031 high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5032 ...}> */
5033 first_vect = data_ref;
5034 second_vect = dr_chain[2];
5035 data_ref = make_temp_ssa_name (vectype, NULL, "vect_suffle3_high");
446e85eb 5036 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5037 first_vect, second_vect,
1e1bca71 5038 perm3_mask_high);
fb85abff 5039 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
1e1bca71 5040 (*result_chain)[k] = data_ref;
fb85abff 5041 }
fb85abff 5042 }
1e1bca71 5043 else
5044 {
5045 /* If length is not equal to 3 then only power of 2 is supported. */
5046 gcc_assert (exact_log2 (length) != -1);
5047
5048 for (i = 0; i < nelt; ++i)
5049 sel[i] = i * 2;
5050 perm_mask_even = vect_gen_perm_mask (vectype, sel);
5051 gcc_assert (perm_mask_even != NULL);
5052
5053 for (i = 0; i < nelt; ++i)
5054 sel[i] = i * 2 + 1;
5055 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
5056 gcc_assert (perm_mask_odd != NULL);
fb85abff 5057
1e1bca71 5058 for (i = 0; i < log_length; i++)
5059 {
5060 for (j = 0; j < length; j += 2)
5061 {
5062 first_vect = dr_chain[j];
5063 second_vect = dr_chain[j+1];
5064
5065 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5066 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
5067 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5068 first_vect, second_vect,
5069 perm_mask_even);
5070 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5071 (*result_chain)[j/2] = data_ref;
5072
5073 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5074 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
5075 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
5076 first_vect, second_vect,
5077 perm_mask_odd);
5078 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
5079 (*result_chain)[j/2+length/2] = data_ref;
5080 }
5081 memcpy (dr_chain.address (), result_chain->address (),
5082 length * sizeof (tree));
5083 }
5084 }
5085}
fb85abff 5086
ee612634 5087/* Function vect_transform_grouped_load.
fb85abff 5088
5089 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5090 to perform their permutation and ascribe the result vectorized statements to
5091 the scalar statements.
5092*/
5093
481fc474 5094void
f1f41a6c 5095vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
fb85abff 5096 gimple_stmt_iterator *gsi)
5097{
1e094109 5098 vec<tree> result_chain = vNULL;
fb85abff 5099
48e1416a 5100 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5101 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
fb85abff 5102 vectors, that are ready for vector computation. */
f1f41a6c 5103 result_chain.create (size);
481fc474 5104 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
ee612634 5105 vect_record_grouped_load_vectors (stmt, result_chain);
f1f41a6c 5106 result_chain.release ();
94b7b4dd 5107}
5108
ee612634 5109/* RESULT_CHAIN contains the output of a group of grouped loads that were
94b7b4dd 5110 generated as part of the vectorization of STMT. Assign the statement
5111 for each vector to the associated scalar statement. */
5112
5113void
f1f41a6c 5114vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
94b7b4dd 5115{
21009880 5116 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
94b7b4dd 5117 gimple next_stmt, new_stmt;
5118 unsigned int i, gap_count;
5119 tree tmp_data_ref;
fb85abff 5120
48e1416a 5121 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5122 Since we scan the chain starting from it's first node, their order
fb85abff 5123 corresponds the order of data-refs in RESULT_CHAIN. */
5124 next_stmt = first_stmt;
5125 gap_count = 1;
f1f41a6c 5126 FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
fb85abff 5127 {
5128 if (!next_stmt)
5129 break;
5130
282bf14c 5131 /* Skip the gaps. Loads created for the gaps will be removed by dead
5132 code elimination pass later. No need to check for the first stmt in
fb85abff 5133 the group, since it always exists.
21009880 5134 GROUP_GAP is the number of steps in elements from the previous
5135 access (if there is no gap GROUP_GAP is 1). We skip loads that
282bf14c 5136 correspond to the gaps. */
48e1416a 5137 if (next_stmt != first_stmt
21009880 5138 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
fb85abff 5139 {
5140 gap_count++;
5141 continue;
5142 }
5143
5144 while (next_stmt)
5145 {
5146 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
5147 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5148 copies, and we put the new vector statement in the first available
5149 RELATED_STMT. */
5150 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
5151 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
5152 else
5153 {
21009880 5154 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
fb85abff 5155 {
5156 gimple prev_stmt =
5157 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
5158 gimple rel_stmt =
5159 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
5160 while (rel_stmt)
5161 {
5162 prev_stmt = rel_stmt;
48e1416a 5163 rel_stmt =
fb85abff 5164 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
5165 }
5166
48e1416a 5167 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
fb85abff 5168 new_stmt;
5169 }
5170 }
5171
21009880 5172 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
fb85abff 5173 gap_count = 1;
5174 /* If NEXT_STMT accesses the same DR as the previous statement,
5175 put the same TMP_DATA_REF as its vectorized statement; otherwise
5176 get the next data-ref from RESULT_CHAIN. */
21009880 5177 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
fb85abff 5178 break;
5179 }
5180 }
fb85abff 5181}
5182
5183/* Function vect_force_dr_alignment_p.
5184
5185 Returns whether the alignment of a DECL can be forced to be aligned
5186 on ALIGNMENT bit boundary. */
5187
48e1416a 5188bool
fb85abff 5189vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
5190{
5191 if (TREE_CODE (decl) != VAR_DECL)
5192 return false;
5193
1f0f7e1e 5194 /* We cannot change alignment of common or external symbols as another
5195 translation unit may contain a definition with lower alignment.
5196 The rules of common symbol linking mean that the definition
90a674eb 5197 will override the common symbol. The same is true for constant
5198 pool entries which may be shared and are not properly merged
5199 by LTO. */
1f0f7e1e 5200 if (DECL_EXTERNAL (decl)
90a674eb 5201 || DECL_COMMON (decl)
5202 || DECL_IN_CONSTANT_POOL (decl))
fb85abff 5203 return false;
5204
5205 if (TREE_ASM_WRITTEN (decl))
5206 return false;
5207
0bbf0fd8 5208 /* Do not override the alignment as specified by the ABI when the used
5209 attribute is set. */
5210 if (DECL_PRESERVE_P (decl))
5f8265c3 5211 return false;
5212
ce281ff3 5213 /* Do not override explicit alignment set by the user when an explicit
5214 section name is also used. This is a common idiom used by many
5215 software projects. */
5216 if (DECL_SECTION_NAME (decl) != NULL_TREE
5217 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
5218 return false;
5219
fb85abff 5220 if (TREE_STATIC (decl))
5221 return (alignment <= MAX_OFILE_ALIGNMENT);
5222 else
5223 return (alignment <= MAX_STACK_ALIGNMENT);
5224}
5225
fb85abff 5226
0822b158 5227/* Return whether the data reference DR is supported with respect to its
5228 alignment.
5229 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
5230 it is aligned, i.e., check if it is possible to vectorize it with different
fb85abff 5231 alignment. */
5232
5233enum dr_alignment_support
0822b158 5234vect_supportable_dr_alignment (struct data_reference *dr,
5235 bool check_aligned_accesses)
fb85abff 5236{
5237 gimple stmt = DR_STMT (dr);
5238 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5239 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
bc620c5c 5240 enum machine_mode mode = TYPE_MODE (vectype);
37545e54 5241 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5242 struct loop *vect_loop = NULL;
5243 bool nested_in_vect_loop = false;
fb85abff 5244
0822b158 5245 if (aligned_access_p (dr) && !check_aligned_accesses)
fb85abff 5246 return dr_aligned;
5247
c71d3c24 5248 /* For now assume all conditional loads/stores support unaligned
5249 access without any special code. */
5250 if (is_gimple_call (stmt)
5251 && gimple_call_internal_p (stmt)
5252 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
5253 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
5254 return dr_unaligned_supported;
5255
ad074595 5256 if (loop_vinfo)
5257 {
5258 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
5259 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
5260 }
37545e54 5261
fb85abff 5262 /* Possibly unaligned access. */
5263
5264 /* We can choose between using the implicit realignment scheme (generating
5265 a misaligned_move stmt) and the explicit realignment scheme (generating
282bf14c 5266 aligned loads with a REALIGN_LOAD). There are two variants to the
5267 explicit realignment scheme: optimized, and unoptimized.
fb85abff 5268 We can optimize the realignment only if the step between consecutive
5269 vector loads is equal to the vector size. Since the vector memory
5270 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
5271 is guaranteed that the misalignment amount remains the same throughout the
5272 execution of the vectorized loop. Therefore, we can create the
5273 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
5274 at the loop preheader.
5275
5276 However, in the case of outer-loop vectorization, when vectorizing a
5277 memory access in the inner-loop nested within the LOOP that is now being
5278 vectorized, while it is guaranteed that the misalignment of the
5279 vectorized memory access will remain the same in different outer-loop
5280 iterations, it is *not* guaranteed that is will remain the same throughout
5281 the execution of the inner-loop. This is because the inner-loop advances
5282 with the original scalar step (and not in steps of VS). If the inner-loop
5283 step happens to be a multiple of VS, then the misalignment remains fixed
5284 and we can use the optimized realignment scheme. For example:
5285
5286 for (i=0; i<N; i++)
5287 for (j=0; j<M; j++)
5288 s += a[i+j];
5289
5290 When vectorizing the i-loop in the above example, the step between
5291 consecutive vector loads is 1, and so the misalignment does not remain
5292 fixed across the execution of the inner-loop, and the realignment cannot
5293 be optimized (as illustrated in the following pseudo vectorized loop):
5294
5295 for (i=0; i<N; i+=4)
5296 for (j=0; j<M; j++){
5297 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
5298 // when j is {0,1,2,3,4,5,6,7,...} respectively.
5299 // (assuming that we start from an aligned address).
5300 }
5301
5302 We therefore have to use the unoptimized realignment scheme:
5303
5304 for (i=0; i<N; i+=4)
5305 for (j=k; j<M; j+=4)
5306 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
5307 // that the misalignment of the initial address is
5308 // 0).
5309
5310 The loop can then be vectorized as follows:
5311
5312 for (k=0; k<4; k++){
5313 rt = get_realignment_token (&vp[k]);
5314 for (i=0; i<N; i+=4){
5315 v1 = vp[i+k];
5316 for (j=k; j<M; j+=4){
5317 v2 = vp[i+j+VS-1];
5318 va = REALIGN_LOAD <v1,v2,rt>;
5319 vs += va;
5320 v1 = v2;
5321 }
5322 }
5323 } */
5324
5325 if (DR_IS_READ (dr))
5326 {
c6b19c5f 5327 bool is_packed = false;
5328 tree type = (TREE_TYPE (DR_REF (dr)));
5329
d6bf3b14 5330 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
fb85abff 5331 && (!targetm.vectorize.builtin_mask_for_load
5332 || targetm.vectorize.builtin_mask_for_load ()))
5333 {
5334 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ad074595 5335 if ((nested_in_vect_loop
f9ae6f95 5336 && (TREE_INT_CST_LOW (DR_STEP (dr))
ad074595 5337 != GET_MODE_SIZE (TYPE_MODE (vectype))))
5338 || !loop_vinfo)
fb85abff 5339 return dr_explicit_realign;
5340 else
5341 return dr_explicit_realign_optimized;
5342 }
c6b19c5f 5343 if (!known_alignment_for_access_p (dr))
cfa724cf 5344 is_packed = not_size_aligned (DR_REF (dr));
48e1416a 5345
2cd0995e 5346 if ((TYPE_USER_ALIGN (type) && !is_packed)
5347 || targetm.vectorize.
5348 support_vector_misalignment (mode, type,
5349 DR_MISALIGNMENT (dr), is_packed))
fb85abff 5350 /* Can't software pipeline the loads, but can at least do them. */
5351 return dr_unaligned_supported;
5352 }
c6b19c5f 5353 else
5354 {
5355 bool is_packed = false;
5356 tree type = (TREE_TYPE (DR_REF (dr)));
fb85abff 5357
c6b19c5f 5358 if (!known_alignment_for_access_p (dr))
cfa724cf 5359 is_packed = not_size_aligned (DR_REF (dr));
48e1416a 5360
2cd0995e 5361 if ((TYPE_USER_ALIGN (type) && !is_packed)
5362 || targetm.vectorize.
5363 support_vector_misalignment (mode, type,
5364 DR_MISALIGNMENT (dr), is_packed))
c6b19c5f 5365 return dr_unaligned_supported;
5366 }
48e1416a 5367
fb85abff 5368 /* Unsupported. */
5369 return dr_unaligned_unsupported;
5370}