]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-data-refs.c
Allow automatics in equivalences
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
CommitLineData
48e1416a 1/* Data References Analysis and Manipulation Utilities for Vectorization.
fbd26352 2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
9ef16211 25#include "backend.h"
7c29e30e 26#include "target.h"
27#include "rtl.h"
fb85abff 28#include "tree.h"
9ef16211 29#include "gimple.h"
7c29e30e 30#include "predict.h"
ad7b10a2 31#include "memmodel.h"
7c29e30e 32#include "tm_p.h"
9ef16211 33#include "ssa.h"
7c29e30e 34#include "optabs-tree.h"
35#include "cgraph.h"
7c29e30e 36#include "dumpfile.h"
9ef16211 37#include "alias.h"
b20a8bb4 38#include "fold-const.h"
9ed99284 39#include "stor-layout.h"
bc61cadb 40#include "tree-eh.h"
a8783bee 41#include "gimplify.h"
dcf1a1ec 42#include "gimple-iterator.h"
e795d6e1 43#include "gimplify-me.h"
05d9c18a 44#include "tree-ssa-loop-ivopts.h"
45#include "tree-ssa-loop-manip.h"
073c1fd5 46#include "tree-ssa-loop.h"
fb85abff 47#include "cfgloop.h"
fb85abff 48#include "tree-scalar-evolution.h"
49#include "tree-vectorizer.h"
8e3cb73b 50#include "expr.h"
f7715905 51#include "builtins.h"
0d8001a7 52#include "params.h"
a5456a6d 53#include "tree-cfg.h"
f68a7726 54#include "tree-hash-traits.h"
d37760c5 55#include "vec-perm-indices.h"
1619606c 56#include "internal-fn.h"
fb85abff 57
94b7b4dd 58/* Return true if load- or store-lanes optab OPTAB is implemented for
59 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
60
61static bool
62vect_lanes_optab_supported_p (const char *name, convert_optab optab,
63 tree vectype, unsigned HOST_WIDE_INT count)
64{
30d26b1c 65 machine_mode mode, array_mode;
94b7b4dd 66 bool limit_p;
67
68 mode = TYPE_MODE (vectype);
30d26b1c 69 if (!targetm.array_mode (mode, count).exists (&array_mode))
94b7b4dd 70 {
30d26b1c 71 poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
72 limit_p = !targetm.array_mode_supported_p (mode, count);
73 if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
74 {
75 if (dump_enabled_p ())
76 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
bffe1cb4 77 "no array mode for %s[%wu]\n",
30d26b1c 78 GET_MODE_NAME (mode), count);
79 return false;
80 }
94b7b4dd 81 }
82
83 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
84 {
6d8fb6cf 85 if (dump_enabled_p ())
7bd765d4 86 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 87 "cannot use %s<%s><%s>\n", name,
7bd765d4 88 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
94b7b4dd 89 return false;
90 }
91
6d8fb6cf 92 if (dump_enabled_p ())
7bd765d4 93 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 94 "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
7bd765d4 95 GET_MODE_NAME (mode));
94b7b4dd 96
97 return true;
98}
99
100
ecc42a77 101/* Return the smallest scalar part of STMT_INFO.
282bf14c 102 This is used to determine the vectype of the stmt. We generally set the
103 vectype according to the type of the result (lhs). For stmts whose
fb85abff 104 result-type is different than the type of the arguments (e.g., demotion,
48e1416a 105 promotion), vectype will be reset appropriately (later). Note that we have
fb85abff 106 to visit the smallest datatype in this function, because that determines the
282bf14c 107 VF. If the smallest datatype in the loop is present only as the rhs of a
fb85abff 108 promotion operation - we'd miss it.
109 Such a case, where a variable of this datatype does not appear in the lhs
110 anywhere in the loop, can only occur if it's an invariant: e.g.:
48e1416a 111 'int_x = (int) short_inv', which we'd expect to have been optimized away by
282bf14c 112 invariant motion. However, we cannot rely on invariant motion to always
113 take invariants out of the loop, and so in the case of promotion we also
114 have to check the rhs.
fb85abff 115 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
116 types. */
117
118tree
ecc42a77 119vect_get_smallest_scalar_type (stmt_vec_info stmt_info,
120 HOST_WIDE_INT *lhs_size_unit,
121 HOST_WIDE_INT *rhs_size_unit)
fb85abff 122{
ecc42a77 123 tree scalar_type = gimple_expr_type (stmt_info->stmt);
fb85abff 124 HOST_WIDE_INT lhs, rhs;
125
0b86fa32 126 /* During the analysis phase, this function is called on arbitrary
127 statements that might not have scalar results. */
128 if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
129 return scalar_type;
130
f9ae6f95 131 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
fb85abff 132
ecc42a77 133 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5b4b7bcc 134 if (assign
135 && (gimple_assign_cast_p (assign)
136 || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
137 || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
138 || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
139 || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
140 || gimple_assign_rhs_code (assign) == FLOAT_EXPR))
fb85abff 141 {
5b4b7bcc 142 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
fb85abff 143
7e41cae0 144 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
145 if (rhs < lhs)
146 scalar_type = rhs_type;
147 }
37d0d1b1 148 else if (gcall *call = dyn_cast <gcall *> (stmt_info->stmt))
7e41cae0 149 {
37d0d1b1 150 unsigned int i = 0;
151 if (gimple_call_internal_p (call))
152 {
153 internal_fn ifn = gimple_call_internal_fn (call);
154 if (internal_load_fn_p (ifn) || internal_store_fn_p (ifn))
155 /* gimple_expr_type already picked the type of the loaded
156 or stored data. */
157 i = ~0U;
158 else if (internal_fn_mask_index (ifn) == 0)
159 i = 1;
160 }
161 if (i < gimple_call_num_args (call))
162 {
163 tree rhs_type = TREE_TYPE (gimple_call_arg (call, i));
164 if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (rhs_type)))
165 {
166 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
167 if (rhs < lhs)
168 scalar_type = rhs_type;
169 }
170 }
fb85abff 171 }
48e1416a 172
173 *lhs_size_unit = lhs;
fb85abff 174 *rhs_size_unit = rhs;
175 return scalar_type;
176}
177
178
fb85abff 179/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
180 tested at run-time. Return TRUE if DDR was successfully inserted.
181 Return false if versioning is not supported. */
182
ed9370cc 183static opt_result
fb85abff 184vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
185{
2e966e2a 186 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 187
188 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
ed9370cc 189 return opt_result::failure_at (vect_location,
190 "will not create alias checks, as"
191 " --param vect-max-version-for-alias-checks"
192 " == 0\n");
fb85abff 193
ed9370cc 194 opt_result res
195 = runtime_alias_check_p (ddr, loop,
196 optimize_loop_nest_for_speed_p (loop));
197 if (!res)
198 return res;
f634c3e9 199
f1f41a6c 200 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
ed9370cc 201 return opt_result::success ();
fb85abff 202}
203
e85b4a5e 204/* Record that loop LOOP_VINFO needs to check that VALUE is nonzero. */
205
206static void
207vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
208{
209 vec<tree> checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
210 for (unsigned int i = 0; i < checks.length(); ++i)
211 if (checks[i] == value)
212 return;
213
214 if (dump_enabled_p ())
a4e972e3 215 dump_printf_loc (MSG_NOTE, vect_location,
216 "need run-time check that %T is nonzero\n",
217 value);
e85b4a5e 218 LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
219}
220
abc9513d 221/* Return true if we know that the order of vectorized DR_INFO_A and
222 vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
223 DR_INFO_B. At least one of the accesses is a write. */
e85b4a5e 224
225static bool
abc9513d 226vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
e85b4a5e 227{
abc9513d 228 stmt_vec_info stmtinfo_a = dr_info_a->stmt;
229 stmt_vec_info stmtinfo_b = dr_info_b->stmt;
230
e85b4a5e 231 /* Single statements are always kept in their original order. */
232 if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
233 && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
234 return true;
235
236 /* STMT_A and STMT_B belong to overlapping groups. All loads in a
29659b56 237 SLP group are emitted at the position of the last scalar load and
238 all loads in an interleaving group are emitted at the position
239 of the first scalar load.
240 Stores in a group are emitted at the position of the last scalar store.
ce8e9d74 241 Compute that position and check whether the resulting order matches
29659b56 242 the current one.
243 We have not yet decided between SLP and interleaving so we have
244 to conservatively assume both. */
245 stmt_vec_info il_a;
246 stmt_vec_info last_a = il_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
ce8e9d74 247 if (last_a)
29659b56 248 {
249 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s;
250 s = DR_GROUP_NEXT_ELEMENT (s))
251 last_a = get_later_stmt (last_a, s);
252 if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_a)))
253 {
254 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_a); s;
255 s = DR_GROUP_NEXT_ELEMENT (s))
256 if (get_later_stmt (il_a, s) == il_a)
257 il_a = s;
258 }
259 else
260 il_a = last_a;
261 }
ce8e9d74 262 else
29659b56 263 last_a = il_a = stmtinfo_a;
264 stmt_vec_info il_b;
265 stmt_vec_info last_b = il_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
ce8e9d74 266 if (last_b)
29659b56 267 {
268 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s;
269 s = DR_GROUP_NEXT_ELEMENT (s))
270 last_b = get_later_stmt (last_b, s);
271 if (!DR_IS_WRITE (STMT_VINFO_DATA_REF (stmtinfo_b)))
272 {
273 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (il_b); s;
274 s = DR_GROUP_NEXT_ELEMENT (s))
275 if (get_later_stmt (il_b, s) == il_b)
276 il_b = s;
277 }
278 else
279 il_b = last_b;
280 }
ce8e9d74 281 else
29659b56 282 last_b = il_b = stmtinfo_b;
283 bool a_after_b = (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a);
284 return (/* SLP */
285 (get_later_stmt (last_a, last_b) == last_a) == a_after_b
286 /* Interleaving */
287 && (get_later_stmt (il_a, il_b) == il_a) == a_after_b
288 /* Mixed */
289 && (get_later_stmt (il_a, last_b) == il_a) == a_after_b
290 && (get_later_stmt (last_a, il_b) == last_a) == a_after_b);
e85b4a5e 291}
37545e54 292
403965f7 293/* A subroutine of vect_analyze_data_ref_dependence. Handle
294 DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
295 distances. These distances are conservatively correct but they don't
296 reflect a guaranteed dependence.
297
298 Return true if this function does all the work necessary to avoid
299 an alias or false if the caller should use the dependence distances
300 to limit the vectorization factor in the usual way. LOOP_DEPTH is
301 the depth of the loop described by LOOP_VINFO and the other arguments
302 are as for vect_analyze_data_ref_dependence. */
303
304static bool
305vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
306 loop_vec_info loop_vinfo,
d75596cd 307 int loop_depth, unsigned int *max_vf)
403965f7 308{
2e966e2a 309 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
403965f7 310 lambda_vector dist_v;
311 unsigned int i;
312 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
313 {
314 int dist = dist_v[loop_depth];
315 if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
316 {
317 /* If the user asserted safelen >= DIST consecutive iterations
318 can be executed concurrently, assume independence.
319
320 ??? An alternative would be to add the alias check even
321 in this case, and vectorize the fallback loop with the
322 maximum VF set to safelen. However, if the user has
323 explicitly given a length, it's less likely that that
324 would be a win. */
325 if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
326 {
d75596cd 327 if ((unsigned int) loop->safelen < *max_vf)
403965f7 328 *max_vf = loop->safelen;
329 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
330 continue;
331 }
332
333 /* For dependence distances of 2 or more, we have the option
334 of limiting VF or checking for an alias at runtime.
335 Prefer to check at runtime if we can, to avoid limiting
336 the VF unnecessarily when the bases are in fact independent.
337
338 Note that the alias checks will be removed if the VF ends up
339 being small enough. */
db72d3bf 340 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
341 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
342 return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
343 && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
fa681b45 344 && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
403965f7 345 }
346 }
347 return true;
348}
349
350
fb85abff 351/* Function vect_analyze_data_ref_dependence.
352
ed9370cc 353 FIXME: I needed to change the sense of the returned flag.
354
355 Return FALSE if there (might) exist a dependence between a memory-reference
fb85abff 356 DRA and a memory-reference DRB. When versioning for alias may check a
ed9370cc 357 dependence at run-time, return TRUE. Adjust *MAX_VF according to
91a74fc6 358 the data dependence. */
48e1416a 359
ed9370cc 360static opt_result
fb85abff 361vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
d75596cd 362 loop_vec_info loop_vinfo,
363 unsigned int *max_vf)
fb85abff 364{
365 unsigned int i;
2e966e2a 366 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 367 struct data_reference *dra = DDR_A (ddr);
368 struct data_reference *drb = DDR_B (ddr);
db72d3bf 369 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
370 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
abc9513d 371 stmt_vec_info stmtinfo_a = dr_info_a->stmt;
372 stmt_vec_info stmtinfo_b = dr_info_b->stmt;
fb85abff 373 lambda_vector dist_v;
374 unsigned int loop_depth;
48e1416a 375
68f15e9d 376 /* In loop analysis all data references should be vectorizable. */
6ea6a380 377 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
378 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
68f15e9d 379 gcc_unreachable ();
6ea6a380 380
68f15e9d 381 /* Independent data accesses. */
fb85abff 382 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
ed9370cc 383 return opt_result::success ();
37545e54 384
68f15e9d 385 if (dra == drb
386 || (DR_IS_READ (dra) && DR_IS_READ (drb)))
ed9370cc 387 return opt_result::success ();
48e1416a 388
5695a690 389 /* We do not have to consider dependences between accesses that belong
472a8968 390 to the same group, unless the stride could be smaller than the
391 group size. */
e1009321 392 if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
393 && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
394 == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
472a8968 395 && !STMT_VINFO_STRIDED_P (stmtinfo_a))
ed9370cc 396 return opt_result::success ();
5695a690 397
0f52e33a 398 /* Even if we have an anti-dependence then, as the vectorized loop covers at
399 least two scalar iterations, there is always also a true dependence.
400 As the vectorizer does not re-order loads and stores we can ignore
401 the anti-dependence if TBAA can disambiguate both DRs similar to the
402 case with known negative distance anti-dependences (positive
403 distance anti-dependences would violate TBAA constraints). */
404 if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
405 || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
406 && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
407 get_alias_set (DR_REF (drb))))
ed9370cc 408 return opt_result::success ();
48e1416a 409
68f15e9d 410 /* Unknown data dependence. */
fb85abff 411 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
412 {
3d483a94 413 /* If user asserted safelen consecutive iterations can be
414 executed concurrently, assume independence. */
415 if (loop->safelen >= 2)
416 {
d75596cd 417 if ((unsigned int) loop->safelen < *max_vf)
3d483a94 418 *max_vf = loop->safelen;
c7a8722c 419 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
ed9370cc 420 return opt_result::success ();
3d483a94 421 }
422
0bd6d857 423 if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
424 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
ed9370cc 425 return opt_result::failure_at
426 (stmtinfo_a->stmt,
427 "versioning for alias not supported for: "
428 "can't determine dependence between %T and %T\n",
429 DR_REF (dra), DR_REF (drb));
95e19962 430
6d8fb6cf 431 if (dump_enabled_p ())
ed9370cc 432 dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
a4e972e3 433 "versioning for alias required: "
434 "can't determine dependence between %T and %T\n",
435 DR_REF (dra), DR_REF (drb));
d4b21757 436
68f15e9d 437 /* Add to list of ddrs that need to be tested at run-time. */
ed9370cc 438 return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
37545e54 439 }
440
68f15e9d 441 /* Known data dependence. */
fb85abff 442 if (DDR_NUM_DIST_VECTS (ddr) == 0)
443 {
3d483a94 444 /* If user asserted safelen consecutive iterations can be
445 executed concurrently, assume independence. */
446 if (loop->safelen >= 2)
447 {
d75596cd 448 if ((unsigned int) loop->safelen < *max_vf)
3d483a94 449 *max_vf = loop->safelen;
c7a8722c 450 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
ed9370cc 451 return opt_result::success ();
3d483a94 452 }
453
0bd6d857 454 if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
455 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
ed9370cc 456 return opt_result::failure_at
457 (stmtinfo_a->stmt,
458 "versioning for alias not supported for: "
459 "bad dist vector for %T and %T\n",
460 DR_REF (dra), DR_REF (drb));
95e19962 461
6d8fb6cf 462 if (dump_enabled_p ())
ed9370cc 463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
a4e972e3 464 "versioning for alias required: "
465 "bad dist vector for %T and %T\n",
466 DR_REF (dra), DR_REF (drb));
fb85abff 467 /* Add to list of ddrs that need to be tested at run-time. */
ed9370cc 468 return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
48e1416a 469 }
fb85abff 470
471 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
403965f7 472
473 if (DDR_COULD_BE_INDEPENDENT_P (ddr)
474 && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
475 loop_depth, max_vf))
ed9370cc 476 return opt_result::success ();
403965f7 477
f1f41a6c 478 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
fb85abff 479 {
480 int dist = dist_v[loop_depth];
481
6d8fb6cf 482 if (dump_enabled_p ())
7bd765d4 483 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 484 "dependence distance = %d.\n", dist);
fb85abff 485
91a74fc6 486 if (dist == 0)
fb85abff 487 {
6d8fb6cf 488 if (dump_enabled_p ())
a4e972e3 489 dump_printf_loc (MSG_NOTE, vect_location,
490 "dependence distance == 0 between %T and %T\n",
491 DR_REF (dra), DR_REF (drb));
fb85abff 492
4d525783 493 /* When we perform grouped accesses and perform implicit CSE
494 by detecting equal accesses and doing disambiguation with
495 runtime alias tests like for
496 .. = a[i];
497 .. = a[i+1];
498 a[i] = ..;
499 a[i+1] = ..;
500 *p = ..;
501 .. = a[i];
502 .. = a[i+1];
503 where we will end up loading { a[i], a[i+1] } once, make
504 sure that inserting group loads before the first load and
5a91be9e 505 stores after the last store will do the right thing.
506 Similar for groups like
507 a[i] = ...;
508 ... = a[i];
509 a[i+1] = ...;
510 where loads from the group interleave with the store. */
abc9513d 511 if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
ed9370cc 512 return opt_result::failure_at (stmtinfo_a->stmt,
513 "READ_WRITE dependence"
514 " in interleaving.\n");
e85b4a5e 515
84017e0e 516 if (loop->safelen < 2)
4d525783 517 {
e85b4a5e 518 tree indicator = dr_zero_step_indicator (dra);
fa681b45 519 if (!indicator || integer_zerop (indicator))
ed9370cc 520 return opt_result::failure_at (stmtinfo_a->stmt,
521 "access also has a zero step\n");
fa681b45 522 else if (TREE_CODE (indicator) != INTEGER_CST)
523 vect_check_nonzero_value (loop_vinfo, indicator);
fb85abff 524 }
91a74fc6 525 continue;
526 }
527
528 if (dist > 0 && DDR_REVERSED_P (ddr))
529 {
530 /* If DDR_REVERSED_P the order of the data-refs in DDR was
531 reversed (to make distance vector positive), and the actual
532 distance is negative. */
6d8fb6cf 533 if (dump_enabled_p ())
ed85753c 534 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 535 "dependence distance negative.\n");
ed85753c 536 /* When doing outer loop vectorization, we need to check if there is
537 a backward dependence at the inner loop level if the dependence
538 at the outer loop is reversed. See PR81740. */
539 if (nested_in_vect_loop_p (loop, stmtinfo_a)
540 || nested_in_vect_loop_p (loop, stmtinfo_b))
541 {
542 unsigned inner_depth = index_in_loop_nest (loop->inner->num,
543 DDR_LOOP_NEST (ddr));
544 if (dist_v[inner_depth] < 0)
545 return opt_result::failure_at (stmtinfo_a->stmt,
546 "not vectorized, dependence "
547 "between data-refs %T and %T\n",
548 DR_REF (dra), DR_REF (drb));
549 }
a8cf7702 550 /* Record a negative dependence distance to later limit the
551 amount of stmt copying / unrolling we can perform.
552 Only need to handle read-after-write dependence. */
553 if (DR_IS_READ (drb)
554 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
555 || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
556 STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
91a74fc6 557 continue;
558 }
559
d75596cd 560 unsigned int abs_dist = abs (dist);
561 if (abs_dist >= 2 && abs_dist < *max_vf)
91a74fc6 562 {
563 /* The dependence distance requires reduction of the maximal
564 vectorization factor. */
ed85753c 565 *max_vf = abs_dist;
6d8fb6cf 566 if (dump_enabled_p ())
7bd765d4 567 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 568 "adjusting maximal vectorization factor to %i\n",
569 *max_vf);
fb85abff 570 }
571
d75596cd 572 if (abs_dist >= *max_vf)
fb85abff 573 {
48e1416a 574 /* Dependence distance does not create dependence, as far as
91a74fc6 575 vectorization is concerned, in this case. */
6d8fb6cf 576 if (dump_enabled_p ())
7bd765d4 577 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 578 "dependence distance >= VF.\n");
fb85abff 579 continue;
580 }
581
ed9370cc 582 return opt_result::failure_at (stmtinfo_a->stmt,
583 "not vectorized, possible dependence "
584 "between data-refs %T and %T\n",
585 DR_REF (dra), DR_REF (drb));
fb85abff 586 }
587
ed9370cc 588 return opt_result::success ();
fb85abff 589}
590
591/* Function vect_analyze_data_ref_dependences.
48e1416a 592
fb85abff 593 Examine all the data references in the loop, and make sure there do not
91a74fc6 594 exist any data dependences between them. Set *MAX_VF according to
595 the maximum vectorization factor the data dependences allow. */
48e1416a 596
ed9370cc 597opt_result
d75596cd 598vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
599 unsigned int *max_vf)
fb85abff 600{
601 unsigned int i;
fb85abff 602 struct data_dependence_relation *ddr;
603
88f6eb8f 604 DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
68f15e9d 605
a99aba41 606 if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
607 {
608 LOOP_VINFO_DDRS (loop_vinfo)
609 .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
610 * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
611 /* We need read-read dependences to compute
612 STMT_VINFO_SAME_ALIGN_REFS. */
03ad9f74 613 bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
614 &LOOP_VINFO_DDRS (loop_vinfo),
615 LOOP_VINFO_LOOP_NEST (loop_vinfo),
616 true);
617 gcc_assert (res);
a99aba41 618 }
619
c7a8722c 620 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
68f15e9d 621
5b631e09 622 /* For epilogues we either have no aliases or alias versioning
623 was applied to original loop. Therefore we may just get max_vf
624 using VF of original loop. */
625 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
4a85c0b1 626 *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
5b631e09 627 else
628 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
ed9370cc 629 {
630 opt_result res
631 = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
632 if (!res)
633 return res;
634 }
68f15e9d 635
ed9370cc 636 return opt_result::success ();
68f15e9d 637}
638
639
640/* Function vect_slp_analyze_data_ref_dependence.
641
642 Return TRUE if there (might) exist a dependence between a memory-reference
db72d3bf 643 DRA and a memory-reference DRB for VINFO. When versioning for alias
644 may check a dependence at run-time, return FALSE. Adjust *MAX_VF
645 according to the data dependence. */
68f15e9d 646
647static bool
db72d3bf 648vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
649 struct data_dependence_relation *ddr)
68f15e9d 650{
651 struct data_reference *dra = DDR_A (ddr);
652 struct data_reference *drb = DDR_B (ddr);
db72d3bf 653 dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
654 dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
68f15e9d 655
656 /* We need to check dependences of statements marked as unvectorizable
657 as well, they still can prohibit vectorization. */
658
659 /* Independent data accesses. */
660 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
661 return false;
662
663 if (dra == drb)
664 return false;
665
666 /* Read-read is OK. */
667 if (DR_IS_READ (dra) && DR_IS_READ (drb))
668 return false;
669
1fa434e3 670 /* If dra and drb are part of the same interleaving chain consider
671 them independent. */
abc9513d 672 if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
673 && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
674 == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
1fa434e3 675 return false;
676
68f15e9d 677 /* Unknown data dependence. */
678 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
07e3bcbf 679 {
50e6c257 680 if (dump_enabled_p ())
a4e972e3 681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "can't determine dependence between %T and %T\n",
683 DR_REF (dra), DR_REF (drb));
07e3bcbf 684 }
50e6c257 685 else if (dump_enabled_p ())
a4e972e3 686 dump_printf_loc (MSG_NOTE, vect_location,
687 "determined dependence between %T and %T\n",
688 DR_REF (dra), DR_REF (drb));
48e1416a 689
68f15e9d 690 return true;
691}
692
693
c256513d 694/* Analyze dependences involved in the transform of SLP NODE. STORES
695 contain the vector of scalar stores of this instance if we are
696 disambiguating the loads. */
77d241ed 697
698static bool
c256513d 699vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
06bb64b8 700 vec<stmt_vec_info> stores,
ecc42a77 701 stmt_vec_info last_store_info)
77d241ed 702{
703 /* This walks over all stmts involved in the SLP load/store done
704 in NODE verifying we can sink them up to the last stmt in the
705 group. */
3d9c962c 706 stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
1c2fef9a 707 vec_info *vinfo = last_access_info->vinfo;
77d241ed 708 for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
709 {
06bb64b8 710 stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k];
3d9c962c 711 if (access_info == last_access_info)
77d241ed 712 continue;
06bb64b8 713 data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
58cfef6b 714 ao_ref ref;
715 bool ref_initialized_p = false;
06bb64b8 716 for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
3d9c962c 717 gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
77d241ed 718 {
719 gimple *stmt = gsi_stmt (gsi);
d144c8b2 720 if (! gimple_vuse (stmt)
721 || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
77d241ed 722 continue;
723
d144c8b2 724 /* If we couldn't record a (single) data reference for this
58cfef6b 725 stmt we have to resort to the alias oracle. */
1c2fef9a 726 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
727 data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
d144c8b2 728 if (!dr_b)
58cfef6b 729 {
730 /* We are moving a store or sinking a load - this means
731 we cannot use TBAA for disambiguation. */
732 if (!ref_initialized_p)
733 ao_ref_init (&ref, DR_REF (dr_a));
734 if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
735 || ref_maybe_used_by_stmt_p (stmt, &ref, false))
736 return false;
737 continue;
738 }
d144c8b2 739
92bf253d 740 bool dependent = false;
c256513d 741 /* If we run into a store of this same instance (we've just
742 marked those) then delay dependence checking until we run
743 into the last store because this is where it will have
744 been sunk to (and we verify if we can do that as well). */
745 if (gimple_visited_p (stmt))
746 {
ecc42a77 747 if (stmt_info != last_store_info)
c256513d 748 continue;
749 unsigned i;
06bb64b8 750 stmt_vec_info store_info;
751 FOR_EACH_VEC_ELT (stores, i, store_info)
c256513d 752 {
06bb64b8 753 data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
c256513d 754 ddr_p ddr = initialize_data_dependence_relation
755 (dr_a, store_dr, vNULL);
db72d3bf 756 dependent
757 = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
c256513d 758 free_dependence_relation (ddr);
92bf253d 759 if (dependent)
760 break;
c256513d 761 }
762 }
92bf253d 763 else
77d241ed 764 {
92bf253d 765 ddr_p ddr = initialize_data_dependence_relation (dr_a,
766 dr_b, vNULL);
db72d3bf 767 dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
77d241ed 768 free_dependence_relation (ddr);
77d241ed 769 }
92bf253d 770 if (dependent)
771 return false;
77d241ed 772 }
773 }
774 return true;
775}
776
777
68f15e9d 778/* Function vect_analyze_data_ref_dependences.
779
780 Examine all the data references in the basic-block, and make sure there
781 do not exist any data dependences between them. Set *MAX_VF according to
782 the maximum vectorization factor the data dependences allow. */
783
784bool
c256513d 785vect_slp_analyze_instance_dependence (slp_instance instance)
68f15e9d 786{
88f6eb8f 787 DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
68f15e9d 788
c256513d 789 /* The stores of this instance are at the root of the SLP tree. */
790 slp_tree store = SLP_INSTANCE_TREE (instance);
06bb64b8 791 if (! STMT_VINFO_DATA_REF (SLP_TREE_SCALAR_STMTS (store)[0]))
c256513d 792 store = NULL;
793
794 /* Verify we can sink stores to the vectorized stmt insert location. */
3d9c962c 795 stmt_vec_info last_store_info = NULL;
c256513d 796 if (store)
77d241ed 797 {
c256513d 798 if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
799 return false;
800
801 /* Mark stores in this instance and remember the last one. */
3d9c962c 802 last_store_info = vect_find_last_scalar_stmt_in_slp (store);
c256513d 803 for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
06bb64b8 804 gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
77d241ed 805 }
68f15e9d 806
c256513d 807 bool res = true;
fb85abff 808
c256513d 809 /* Verify we can sink loads to the vectorized stmt insert location,
810 special-casing stores of this instance. */
811 slp_tree load;
812 unsigned int i;
813 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
814 if (! vect_slp_analyze_node_dependences (instance, load,
815 store
816 ? SLP_TREE_SCALAR_STMTS (store)
3d9c962c 817 : vNULL, last_store_info))
c256513d 818 {
819 res = false;
820 break;
821 }
822
823 /* Unset the visited flag. */
824 if (store)
825 for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
06bb64b8 826 gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
c256513d 827
828 return res;
fb85abff 829}
830
52643160 831/* Record the base alignment guarantee given by DRB, which occurs
832 in STMT_INFO. */
4f372c2c 833
834static void
52643160 835vect_record_base_alignment (stmt_vec_info stmt_info,
4f372c2c 836 innermost_loop_behavior *drb)
837{
52643160 838 vec_info *vinfo = stmt_info->vinfo;
4f372c2c 839 bool existed;
840 innermost_loop_behavior *&entry
841 = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
842 if (!existed || entry->base_alignment < drb->base_alignment)
843 {
844 entry = drb;
845 if (dump_enabled_p ())
a4e972e3 846 dump_printf_loc (MSG_NOTE, vect_location,
847 "recording new base alignment for %T\n"
848 " alignment: %d\n"
849 " misalignment: %d\n"
850 " based on: %G",
851 drb->base_address,
852 drb->base_alignment,
853 drb->base_misalignment,
854 stmt_info->stmt);
4f372c2c 855 }
856}
857
858/* If the region we're going to vectorize is reached, all unconditional
859 data references occur at least once. We can therefore pool the base
860 alignment guarantees from each unconditional reference. Do this by
861 going through all the data references in VINFO and checking whether
862 the containing statement makes the reference unconditionally. If so,
863 record the alignment of the base address in VINFO so that it can be
864 used for all other references with the same base. */
865
866void
867vect_record_base_alignments (vec_info *vinfo)
868{
869 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
2e966e2a 870 class loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
4f372c2c 871 data_reference *dr;
872 unsigned int i;
a99aba41 873 FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr)
adebd8d4 874 {
db72d3bf 875 dr_vec_info *dr_info = vinfo->lookup_dr (dr);
abc9513d 876 stmt_vec_info stmt_info = dr_info->stmt;
1ce0a2db 877 if (!DR_IS_CONDITIONAL_IN_STMT (dr)
fa681b45 878 && STMT_VINFO_VECTORIZABLE (stmt_info)
879 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1ce0a2db 880 {
52643160 881 vect_record_base_alignment (stmt_info, &DR_INNERMOST (dr));
4f372c2c 882
1ce0a2db 883 /* If DR is nested in the loop that is being vectorized, we can also
884 record the alignment of the base wrt the outer loop. */
0219dc42 885 if (loop && nested_in_vect_loop_p (loop, stmt_info))
fa681b45 886 vect_record_base_alignment
52643160 887 (stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
1ce0a2db 888 }
adebd8d4 889 }
4f372c2c 890}
891
abc9513d 892/* Return the target alignment for the vectorized form of DR_INFO. */
aec313e5 893
e092c20e 894static poly_uint64
abc9513d 895vect_calculate_target_alignment (dr_vec_info *dr_info)
aec313e5 896{
abc9513d 897 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
aec313e5 898 return targetm.vectorize.preferred_vector_alignment (vectype);
899}
900
fb85abff 901/* Function vect_compute_data_ref_alignment
902
abc9513d 903 Compute the misalignment of the data reference DR_INFO.
fb85abff 904
905 Output:
abc9513d 906 1. DR_MISALIGNMENT (DR_INFO) is defined.
fb85abff 907
908 FOR NOW: No analysis is actually performed. Misalignment is calculated
909 only for trivial cases. TODO. */
910
fa681b45 911static void
abc9513d 912vect_compute_data_ref_alignment (dr_vec_info *dr_info)
fb85abff 913{
abc9513d 914 stmt_vec_info stmt_info = dr_info->stmt;
4f372c2c 915 vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments;
fb85abff 916 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2e966e2a 917 class loop *loop = NULL;
abc9513d 918 tree ref = DR_REF (dr_info->dr);
9e879814 919 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
48e1416a 920
6d8fb6cf 921 if (dump_enabled_p ())
7bd765d4 922 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 923 "vect_compute_data_ref_alignment:\n");
fb85abff 924
37545e54 925 if (loop_vinfo)
926 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 927
fb85abff 928 /* Initialize misalignment to unknown. */
abc9513d 929 SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
fb85abff 930
fa681b45 931 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
932 return;
933
abc9513d 934 innermost_loop_behavior *drb = vect_dr_behavior (dr_info);
9e879814 935 bool step_preserves_misalignment_p;
936
e092c20e 937 poly_uint64 vector_alignment
938 = exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT);
abc9513d 939 DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
aec313e5 940
e092c20e 941 unsigned HOST_WIDE_INT vect_align_c;
942 if (!vector_alignment.is_constant (&vect_align_c))
943 return;
944
9e879814 945 /* No step for BB vectorization. */
946 if (!loop)
947 {
948 gcc_assert (integer_zerop (drb->step));
949 step_preserves_misalignment_p = true;
950 }
fb85abff 951
952 /* In case the dataref is in an inner-loop of the loop that is being
953 vectorized (LOOP), we use the base and misalignment information
282bf14c 954 relative to the outer-loop (LOOP). This is ok only if the misalignment
fb85abff 955 stays the same throughout the execution of the inner-loop, which is why
956 we have to check that the stride of the dataref in the inner-loop evenly
aec313e5 957 divides by the vector alignment. */
0219dc42 958 else if (nested_in_vect_loop_p (loop, stmt_info))
fb85abff 959 {
9e879814 960 step_preserves_misalignment_p
e092c20e 961 = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
48e1416a 962
9e879814 963 if (dump_enabled_p ())
fb85abff 964 {
9e879814 965 if (step_preserves_misalignment_p)
966 dump_printf_loc (MSG_NOTE, vect_location,
aec313e5 967 "inner step divides the vector alignment.\n");
9e879814 968 else
7bd765d4 969 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
aec313e5 970 "inner step doesn't divide the vector"
971 " alignment.\n");
fb85abff 972 }
973 }
974
c1bee668 975 /* Similarly we can only use base and misalignment information relative to
976 an innermost loop if the misalignment stays the same throughout the
977 execution of the loop. As above, this is the case if the stride of
aec313e5 978 the dataref evenly divides by the alignment. */
c1bee668 979 else
38682b67 980 {
d75596cd 981 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9e879814 982 step_preserves_misalignment_p
e092c20e 983 = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vect_align_c);
38682b67 984
9e879814 985 if (!step_preserves_misalignment_p && dump_enabled_p ())
986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
aec313e5 987 "step doesn't divide the vector alignment.\n");
38682b67 988 }
9dd88d41 989
a5456a6d 990 unsigned int base_alignment = drb->base_alignment;
991 unsigned int base_misalignment = drb->base_misalignment;
fb85abff 992
4f372c2c 993 /* Calculate the maximum of the pooled base address alignment and the
994 alignment that we can compute for DR itself. */
995 innermost_loop_behavior **entry = base_alignments->get (drb->base_address);
996 if (entry && base_alignment < (*entry)->base_alignment)
997 {
998 base_alignment = (*entry)->base_alignment;
999 base_misalignment = (*entry)->base_misalignment;
1000 }
1001
e092c20e 1002 if (drb->offset_alignment < vect_align_c
668dd7dc 1003 || !step_preserves_misalignment_p
1004 /* We need to know whether the step wrt the vectorized loop is
1005 negative when computing the starting misalignment below. */
1006 || TREE_CODE (drb->step) != INTEGER_CST)
fb85abff 1007 {
6d8fb6cf 1008 if (dump_enabled_p ())
a4e972e3 1009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1010 "Unknown alignment for access: %T\n", ref);
fa681b45 1011 return;
fb85abff 1012 }
1013
e092c20e 1014 if (base_alignment < vect_align_c)
fb85abff 1015 {
469f7bc0 1016 unsigned int max_alignment;
1017 tree base = get_base_for_alignment (drb->base_address, &max_alignment);
e092c20e 1018 if (max_alignment < vect_align_c
469f7bc0 1019 || !vect_can_force_dr_alignment_p (base,
e092c20e 1020 vect_align_c * BITS_PER_UNIT))
fb85abff 1021 {
6d8fb6cf 1022 if (dump_enabled_p ())
a4e972e3 1023 dump_printf_loc (MSG_NOTE, vect_location,
1024 "can't force alignment of ref: %T\n", ref);
fa681b45 1025 return;
fb85abff 1026 }
48e1416a 1027
fb85abff 1028 /* Force the alignment of the decl.
1029 NOTE: This is the only change to the code we make during
1030 the analysis phase, before deciding to vectorize the loop. */
6d8fb6cf 1031 if (dump_enabled_p ())
a4e972e3 1032 dump_printf_loc (MSG_NOTE, vect_location,
1033 "force alignment of %T\n", ref);
0822b158 1034
abc9513d 1035 dr_info->base_decl = base;
1036 dr_info->base_misaligned = true;
a5456a6d 1037 base_misalignment = 0;
fb85abff 1038 }
658a2c19 1039 poly_int64 misalignment
1040 = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
fb85abff 1041
85a846a2 1042 /* If this is a backward running DR then first access in the larger
1043 vectype actually is N-1 elements before the address in the DR.
1044 Adjust misalign accordingly. */
9e879814 1045 if (tree_int_cst_sgn (drb->step) < 0)
a5456a6d 1046 /* PLUS because STEP is negative. */
1047 misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
1048 * TREE_INT_CST_LOW (drb->step));
85a846a2 1049
658a2c19 1050 unsigned int const_misalignment;
e092c20e 1051 if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
658a2c19 1052 {
1053 if (dump_enabled_p ())
a4e972e3 1054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1055 "Non-constant misalignment for access: %T\n", ref);
fa681b45 1056 return;
658a2c19 1057 }
1058
abc9513d 1059 SET_DR_MISALIGNMENT (dr_info, const_misalignment);
fb85abff 1060
6d8fb6cf 1061 if (dump_enabled_p ())
a4e972e3 1062 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1063 "misalign = %d bytes of ref %T\n",
1064 DR_MISALIGNMENT (dr_info), ref);
fb85abff 1065
fa681b45 1066 return;
fb85abff 1067}
1068
cd8306bf 1069/* Function vect_update_misalignment_for_peel.
abc9513d 1070 Sets DR_INFO's misalignment
1071 - to 0 if it has the same alignment as DR_PEEL_INFO,
1072 - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
cd8306bf 1073 - to -1 (unknown) otherwise.
fb85abff 1074
abc9513d 1075 DR_INFO - the data reference whose misalignment is to be adjusted.
1076 DR_PEEL_INFO - the data reference whose misalignment is being made
1077 zero in the vector loop by the peel.
fb85abff 1078 NPEEL - the number of iterations in the peel loop if the misalignment
abc9513d 1079 of DR_PEEL_INFO is known at compile time. */
fb85abff 1080
1081static void
abc9513d 1082vect_update_misalignment_for_peel (dr_vec_info *dr_info,
1083 dr_vec_info *dr_peel_info, int npeel)
fb85abff 1084{
1085 unsigned int i;
cd8306bf 1086 vec<dr_p> same_aligned_drs;
fb85abff 1087 struct data_reference *current_dr;
abc9513d 1088 stmt_vec_info peel_stmt_info = dr_peel_info->stmt;
fb85abff 1089
b4d2979c 1090 /* It can be assumed that if dr_info has the same alignment as dr_peel,
1091 it is aligned in the vector loop. */
abc9513d 1092 same_aligned_drs = STMT_VINFO_SAME_ALIGN_REFS (peel_stmt_info);
cd8306bf 1093 FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr)
fb85abff 1094 {
abc9513d 1095 if (current_dr != dr_info->dr)
fb85abff 1096 continue;
abc9513d 1097 gcc_assert (!known_alignment_for_access_p (dr_info)
1098 || !known_alignment_for_access_p (dr_peel_info)
b4d2979c 1099 || (DR_MISALIGNMENT (dr_info)
1100 == DR_MISALIGNMENT (dr_peel_info)));
abc9513d 1101 SET_DR_MISALIGNMENT (dr_info, 0);
fb85abff 1102 return;
1103 }
1104
e092c20e 1105 unsigned HOST_WIDE_INT alignment;
1106 if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
1107 && known_alignment_for_access_p (dr_info)
abc9513d 1108 && known_alignment_for_access_p (dr_peel_info))
fb85abff 1109 {
abc9513d 1110 int misal = DR_MISALIGNMENT (dr_info);
b4d2979c 1111 misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
e092c20e 1112 misal &= alignment - 1;
abc9513d 1113 SET_DR_MISALIGNMENT (dr_info, misal);
fb85abff 1114 return;
1115 }
1116
6d8fb6cf 1117 if (dump_enabled_p ())
df8e9f7a 1118 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1119 "to unknown (-1).\n");
abc9513d 1120 SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
fb85abff 1121}
1122
1123
2f6fec15 1124/* Function verify_data_ref_alignment
1125
abc9513d 1126 Return TRUE if DR_INFO can be handled with respect to alignment. */
2f6fec15 1127
ed9370cc 1128static opt_result
abc9513d 1129verify_data_ref_alignment (dr_vec_info *dr_info)
2f6fec15 1130{
f6593f36 1131 enum dr_alignment_support supportable_dr_alignment
abc9513d 1132 = vect_supportable_dr_alignment (dr_info, false);
2f6fec15 1133 if (!supportable_dr_alignment)
ed9370cc 1134 return opt_result::failure_at
1135 (dr_info->stmt->stmt,
1136 DR_IS_READ (dr_info->dr)
1137 ? "not vectorized: unsupported unaligned load: %T\n"
1138 : "not vectorized: unsupported unaligned store: %T\n",
1139 DR_REF (dr_info->dr));
2f6fec15 1140
1141 if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
1142 dump_printf_loc (MSG_NOTE, vect_location,
1143 "Vectorizing an unaligned access.\n");
1144
ed9370cc 1145 return opt_result::success ();
2f6fec15 1146}
1147
fb85abff 1148/* Function vect_verify_datarefs_alignment
1149
1150 Return TRUE if all data references in the loop can be
1151 handled with respect to alignment. */
1152
ed9370cc 1153opt_result
2f6fec15 1154vect_verify_datarefs_alignment (loop_vec_info vinfo)
fb85abff 1155{
a99aba41 1156 vec<data_reference_p> datarefs = vinfo->shared->datarefs;
fb85abff 1157 struct data_reference *dr;
fb85abff 1158 unsigned int i;
1159
f1f41a6c 1160 FOR_EACH_VEC_ELT (datarefs, i, dr)
433b0ea3 1161 {
db72d3bf 1162 dr_vec_info *dr_info = vinfo->lookup_dr (dr);
abc9513d 1163 stmt_vec_info stmt_info = dr_info->stmt;
433b0ea3 1164
1165 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1166 continue;
f6593f36 1167
1168 /* For interleaving, only the alignment of the first access matters. */
1169 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 1170 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
c86d8a47 1171 continue;
f6593f36 1172
1173 /* Strided accesses perform only component accesses, alignment is
1174 irrelevant for them. */
1175 if (STMT_VINFO_STRIDED_P (stmt_info)
1176 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
c86d8a47 1177 continue;
f6593f36 1178
ed9370cc 1179 opt_result res = verify_data_ref_alignment (dr_info);
1180 if (!res)
1181 return res;
433b0ea3 1182 }
6ea6a380 1183
ed9370cc 1184 return opt_result::success ();
fb85abff 1185}
1186
cfa724cf 1187/* Given an memory reference EXP return whether its alignment is less
1188 than its size. */
1189
1190static bool
1191not_size_aligned (tree exp)
1192{
e913b5cd 1193 if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
cfa724cf 1194 return true;
1195
e913b5cd 1196 return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
cfa724cf 1197 > get_object_alignment (exp));
1198}
fb85abff 1199
1200/* Function vector_alignment_reachable_p
1201
abc9513d 1202 Return true if vector alignment for DR_INFO is reachable by peeling
fb85abff 1203 a few loop iterations. Return false otherwise. */
1204
1205static bool
abc9513d 1206vector_alignment_reachable_p (dr_vec_info *dr_info)
fb85abff 1207{
abc9513d 1208 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 1209 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1210
ee612634 1211 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
fb85abff 1212 {
1213 /* For interleaved access we peel only if number of iterations in
1214 the prolog loop ({VF - misalignment}), is a multiple of the
1215 number of the interleaved accesses. */
1216 int elem_size, mis_in_elements;
fb85abff 1217
1218 /* FORNOW: handle only known alignment. */
abc9513d 1219 if (!known_alignment_for_access_p (dr_info))
fb85abff 1220 return false;
1221
32a4b2d8 1222 poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1223 poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1224 elem_size = vector_element_size (vector_size, nelements);
abc9513d 1225 mis_in_elements = DR_MISALIGNMENT (dr_info) / elem_size;
fb85abff 1226
e1009321 1227 if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
fb85abff 1228 return false;
1229 }
1230
1231 /* If misalignment is known at the compile time then allow peeling
1232 only if natural alignment is reachable through peeling. */
abc9513d 1233 if (known_alignment_for_access_p (dr_info) && !aligned_access_p (dr_info))
fb85abff 1234 {
48e1416a 1235 HOST_WIDE_INT elmsize =
fb85abff 1236 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6d8fb6cf 1237 if (dump_enabled_p ())
fb85abff 1238 {
78bb46f5 1239 dump_printf_loc (MSG_NOTE, vect_location,
bffe1cb4 1240 "data size = %wd. misalignment = %d.\n", elmsize,
1241 DR_MISALIGNMENT (dr_info));
fb85abff 1242 }
abc9513d 1243 if (DR_MISALIGNMENT (dr_info) % elmsize)
fb85abff 1244 {
6d8fb6cf 1245 if (dump_enabled_p ())
78bb46f5 1246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1247 "data size does not divide the misalignment.\n");
fb85abff 1248 return false;
1249 }
1250 }
1251
abc9513d 1252 if (!known_alignment_for_access_p (dr_info))
fb85abff 1253 {
abc9513d 1254 tree type = TREE_TYPE (DR_REF (dr_info->dr));
1255 bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
6d8fb6cf 1256 if (dump_enabled_p ())
78bb46f5 1257 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
33a82fb9 1258 "Unknown misalignment, %snaturally aligned\n",
1259 is_packed ? "not " : "");
1260 return targetm.vectorize.vector_alignment_reachable (type, is_packed);
fb85abff 1261 }
1262
1263 return true;
1264}
1265
0822b158 1266
abc9513d 1267/* Calculate the cost of the memory access represented by DR_INFO. */
0822b158 1268
f97dec81 1269static void
abc9513d 1270vect_get_data_access_cost (dr_vec_info *dr_info,
0822b158 1271 unsigned int *inside_cost,
f97dec81 1272 unsigned int *outside_cost,
28d0cd4a 1273 stmt_vector_for_cost *body_cost_vec,
1274 stmt_vector_for_cost *prologue_cost_vec)
0822b158 1275{
abc9513d 1276 stmt_vec_info stmt_info = dr_info->stmt;
0822b158 1277 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4eb17cb6 1278 int ncopies;
1279
1280 if (PURE_SLP_STMT (stmt_info))
1281 ncopies = 1;
1282 else
1283 ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
0822b158 1284
abc9513d 1285 if (DR_IS_READ (dr_info->dr))
1ce0a2db 1286 vect_get_load_cost (stmt_info, ncopies, true, inside_cost, outside_cost,
28d0cd4a 1287 prologue_cost_vec, body_cost_vec, false);
0822b158 1288 else
1ce0a2db 1289 vect_get_store_cost (stmt_info, ncopies, inside_cost, body_cost_vec);
0822b158 1290
6d8fb6cf 1291 if (dump_enabled_p ())
7bd765d4 1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "vect_get_data_access_cost: inside_cost = %d, "
78bb46f5 1294 "outside_cost = %d.\n", *inside_cost, *outside_cost);
0822b158 1295}
1296
1297
41500e78 1298typedef struct _vect_peel_info
1299{
abc9513d 1300 dr_vec_info *dr_info;
487798e2 1301 int npeel;
41500e78 1302 unsigned int count;
1303} *vect_peel_info;
1304
1305typedef struct _vect_peel_extended_info
1306{
1307 struct _vect_peel_info peel_info;
1308 unsigned int inside_cost;
1309 unsigned int outside_cost;
41500e78 1310} *vect_peel_extended_info;
1311
1312
1313/* Peeling hashtable helpers. */
1314
1315struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1316{
1317 static inline hashval_t hash (const _vect_peel_info *);
1318 static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
1319};
1320
1321inline hashval_t
1322peel_info_hasher::hash (const _vect_peel_info *peel_info)
1323{
1324 return (hashval_t) peel_info->npeel;
1325}
1326
1327inline bool
1328peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
1329{
1330 return (a->npeel == b->npeel);
1331}
1332
1333
abc9513d 1334/* Insert DR_INFO into peeling hash table with NPEEL as key. */
0822b158 1335
1336static void
41500e78 1337vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
abc9513d 1338 loop_vec_info loop_vinfo, dr_vec_info *dr_info,
0822b158 1339 int npeel)
1340{
1341 struct _vect_peel_info elem, *slot;
3e871d4d 1342 _vect_peel_info **new_slot;
abc9513d 1343 bool supportable_dr_alignment
1344 = vect_supportable_dr_alignment (dr_info, true);
0822b158 1345
1346 elem.npeel = npeel;
41500e78 1347 slot = peeling_htab->find (&elem);
0822b158 1348 if (slot)
1349 slot->count++;
1350 else
1351 {
1352 slot = XNEW (struct _vect_peel_info);
1353 slot->npeel = npeel;
abc9513d 1354 slot->dr_info = dr_info;
0822b158 1355 slot->count = 1;
41500e78 1356 new_slot = peeling_htab->find_slot (slot, INSERT);
0822b158 1357 *new_slot = slot;
1358 }
1359
3e398f5b 1360 if (!supportable_dr_alignment
1361 && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
0822b158 1362 slot->count += VECT_MAX_COST;
1363}
1364
1365
1366/* Traverse peeling hash table to find peeling option that aligns maximum
1367 number of data accesses. */
1368
3e871d4d 1369int
1370vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1371 _vect_peel_extended_info *max)
0822b158 1372{
3e871d4d 1373 vect_peel_info elem = *slot;
0822b158 1374
593fa4d1 1375 if (elem->count > max->peel_info.count
1376 || (elem->count == max->peel_info.count
1377 && max->peel_info.npeel > elem->npeel))
0822b158 1378 {
1379 max->peel_info.npeel = elem->npeel;
1380 max->peel_info.count = elem->count;
abc9513d 1381 max->peel_info.dr_info = elem->dr_info;
0822b158 1382 }
1383
1384 return 1;
1385}
1386
db72d3bf 1387/* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
1388 data access costs for all data refs. If UNKNOWN_MISALIGNMENT is true,
1389 we assume DR0_INFO's misalignment will be zero after peeling. */
0822b158 1390
cd8306bf 1391static void
db72d3bf 1392vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
abc9513d 1393 dr_vec_info *dr0_info,
cd8306bf 1394 unsigned int *inside_cost,
1395 unsigned int *outside_cost,
1396 stmt_vector_for_cost *body_cost_vec,
28d0cd4a 1397 stmt_vector_for_cost *prologue_cost_vec,
5081fac8 1398 unsigned int npeel,
1399 bool unknown_misalignment)
0822b158 1400{
db72d3bf 1401 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
cd8306bf 1402 unsigned i;
1403 data_reference *dr;
0822b158 1404
f1f41a6c 1405 FOR_EACH_VEC_ELT (datarefs, i, dr)
0822b158 1406 {
db72d3bf 1407 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 1408 stmt_vec_info stmt_info = dr_info->stmt;
3bbc3f79 1409 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1410 continue;
1411
0822b158 1412 /* For interleaving, only the alignment of the first access
1413 matters. */
ee612634 1414 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 1415 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1416 continue;
0822b158 1417
d84b8514 1418 /* Strided accesses perform only component accesses, alignment is
1419 irrelevant for them. */
1420 if (STMT_VINFO_STRIDED_P (stmt_info)
1421 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1422 continue;
1423
cd8306bf 1424 int save_misalignment;
abc9513d 1425 save_misalignment = DR_MISALIGNMENT (dr_info);
db755b03 1426 if (npeel == 0)
1427 ;
abc9513d 1428 else if (unknown_misalignment && dr_info == dr0_info)
1429 SET_DR_MISALIGNMENT (dr_info, 0);
cd8306bf 1430 else
abc9513d 1431 vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
1432 vect_get_data_access_cost (dr_info, inside_cost, outside_cost,
28d0cd4a 1433 body_cost_vec, prologue_cost_vec);
abc9513d 1434 SET_DR_MISALIGNMENT (dr_info, save_misalignment);
0822b158 1435 }
cd8306bf 1436}
1437
1438/* Traverse peeling hash table and calculate cost for each peeling option.
1439 Find the one with the lowest cost. */
1440
1441int
1442vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1443 _vect_peel_extended_info *min)
1444{
1445 vect_peel_info elem = *slot;
1446 int dummy;
1447 unsigned int inside_cost = 0, outside_cost = 0;
abc9513d 1448 stmt_vec_info stmt_info = elem->dr_info->stmt;
cd8306bf 1449 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1450 stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
1451 epilogue_cost_vec;
1452
1453 prologue_cost_vec.create (2);
1454 body_cost_vec.create (2);
1455 epilogue_cost_vec.create (2);
1456
db72d3bf 1457 vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
1458 &outside_cost, &body_cost_vec,
1459 &prologue_cost_vec, elem->npeel, false);
0822b158 1460
f0f51716 1461 body_cost_vec.release ();
1462
41ae9eb4 1463 outside_cost += vect_get_known_peeling_cost
1464 (loop_vinfo, elem->npeel, &dummy,
2a9a3444 1465 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1466 &prologue_cost_vec, &epilogue_cost_vec);
f97dec81 1467
1468 /* Prologue and epilogue costs are added to the target model later.
1469 These costs depend only on the scalar iteration cost, the
1470 number of peeling iterations finally chosen, and the number of
1471 misaligned statements. So discard the information found here. */
f1f41a6c 1472 prologue_cost_vec.release ();
1473 epilogue_cost_vec.release ();
0822b158 1474
1475 if (inside_cost < min->inside_cost
cd8306bf 1476 || (inside_cost == min->inside_cost
1477 && outside_cost < min->outside_cost))
0822b158 1478 {
1479 min->inside_cost = inside_cost;
1480 min->outside_cost = outside_cost;
abc9513d 1481 min->peel_info.dr_info = elem->dr_info;
0822b158 1482 min->peel_info.npeel = elem->npeel;
cd8306bf 1483 min->peel_info.count = elem->count;
0822b158 1484 }
1485
1486 return 1;
1487}
1488
1489
1490/* Choose best peeling option by traversing peeling hash table and either
1491 choosing an option with the lowest cost (if cost model is enabled) or the
1492 option that aligns as many accesses as possible. */
1493
83786d5e 1494static struct _vect_peel_extended_info
41500e78 1495vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
f0f51716 1496 loop_vec_info loop_vinfo)
0822b158 1497{
1498 struct _vect_peel_extended_info res;
1499
abc9513d 1500 res.peel_info.dr_info = NULL;
0822b158 1501
3e398f5b 1502 if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
0822b158 1503 {
1504 res.inside_cost = INT_MAX;
1505 res.outside_cost = INT_MAX;
41500e78 1506 peeling_htab->traverse <_vect_peel_extended_info *,
1507 vect_peeling_hash_get_lowest_cost> (&res);
0822b158 1508 }
1509 else
1510 {
1511 res.peel_info.count = 0;
41500e78 1512 peeling_htab->traverse <_vect_peel_extended_info *,
1513 vect_peeling_hash_get_most_frequent> (&res);
83786d5e 1514 res.inside_cost = 0;
1515 res.outside_cost = 0;
0822b158 1516 }
1517
83786d5e 1518 return res;
0822b158 1519}
1520
cd8306bf 1521/* Return true if the new peeling NPEEL is supported. */
1522
1523static bool
abc9513d 1524vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
cd8306bf 1525 unsigned npeel)
1526{
1527 unsigned i;
1528 struct data_reference *dr = NULL;
1529 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
cd8306bf 1530 enum dr_alignment_support supportable_dr_alignment;
1531
1532 /* Ensure that all data refs can be vectorized after the peel. */
1533 FOR_EACH_VEC_ELT (datarefs, i, dr)
1534 {
1535 int save_misalignment;
1536
abc9513d 1537 if (dr == dr0_info->dr)
cd8306bf 1538 continue;
1539
db72d3bf 1540 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 1541 stmt_vec_info stmt_info = dr_info->stmt;
cd8306bf 1542 /* For interleaving, only the alignment of the first access
1543 matters. */
1544 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 1545 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
cd8306bf 1546 continue;
1547
1548 /* Strided accesses perform only component accesses, alignment is
1549 irrelevant for them. */
1550 if (STMT_VINFO_STRIDED_P (stmt_info)
1551 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1552 continue;
1553
abc9513d 1554 save_misalignment = DR_MISALIGNMENT (dr_info);
1555 vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
1556 supportable_dr_alignment
1557 = vect_supportable_dr_alignment (dr_info, false);
1558 SET_DR_MISALIGNMENT (dr_info, save_misalignment);
cd8306bf 1559
1560 if (!supportable_dr_alignment)
1561 return false;
1562 }
1563
1564 return true;
1565}
0822b158 1566
fb85abff 1567/* Function vect_enhance_data_refs_alignment
1568
1569 This pass will use loop versioning and loop peeling in order to enhance
1570 the alignment of data references in the loop.
1571
1572 FOR NOW: we assume that whatever versioning/peeling takes place, only the
282bf14c 1573 original loop is to be vectorized. Any other loops that are created by
fb85abff 1574 the transformations performed in this pass - are not supposed to be
282bf14c 1575 vectorized. This restriction will be relaxed.
fb85abff 1576
1577 This pass will require a cost model to guide it whether to apply peeling
282bf14c 1578 or versioning or a combination of the two. For example, the scheme that
fb85abff 1579 intel uses when given a loop with several memory accesses, is as follows:
1580 choose one memory access ('p') which alignment you want to force by doing
282bf14c 1581 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
fb85abff 1582 other accesses are not necessarily aligned, or (2) use loop versioning to
1583 generate one loop in which all accesses are aligned, and another loop in
1584 which only 'p' is necessarily aligned.
1585
1586 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1587 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1588 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1589
282bf14c 1590 Devising a cost model is the most critical aspect of this work. It will
fb85abff 1591 guide us on which access to peel for, whether to use loop versioning, how
282bf14c 1592 many versions to create, etc. The cost model will probably consist of
fb85abff 1593 generic considerations as well as target specific considerations (on
1594 powerpc for example, misaligned stores are more painful than misaligned
1595 loads).
1596
1597 Here are the general steps involved in alignment enhancements:
1598
1599 -- original loop, before alignment analysis:
1600 for (i=0; i<N; i++){
1601 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1602 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1603 }
1604
1605 -- After vect_compute_data_refs_alignment:
1606 for (i=0; i<N; i++){
1607 x = q[i]; # DR_MISALIGNMENT(q) = 3
1608 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1609 }
1610
1611 -- Possibility 1: we do loop versioning:
1612 if (p is aligned) {
1613 for (i=0; i<N; i++){ # loop 1A
1614 x = q[i]; # DR_MISALIGNMENT(q) = 3
1615 p[i] = y; # DR_MISALIGNMENT(p) = 0
1616 }
1617 }
1618 else {
1619 for (i=0; i<N; i++){ # loop 1B
1620 x = q[i]; # DR_MISALIGNMENT(q) = 3
1621 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1622 }
1623 }
1624
1625 -- Possibility 2: we do loop peeling:
1626 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1627 x = q[i];
1628 p[i] = y;
1629 }
1630 for (i = 3; i < N; i++){ # loop 2A
1631 x = q[i]; # DR_MISALIGNMENT(q) = 0
1632 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1633 }
1634
1635 -- Possibility 3: combination of loop peeling and versioning:
1636 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1637 x = q[i];
1638 p[i] = y;
1639 }
1640 if (p is aligned) {
1641 for (i = 3; i<N; i++){ # loop 3A
1642 x = q[i]; # DR_MISALIGNMENT(q) = 0
1643 p[i] = y; # DR_MISALIGNMENT(p) = 0
1644 }
1645 }
1646 else {
1647 for (i = 3; i<N; i++){ # loop 3B
1648 x = q[i]; # DR_MISALIGNMENT(q) = 0
1649 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1650 }
1651 }
1652
282bf14c 1653 These loops are later passed to loop_transform to be vectorized. The
fb85abff 1654 vectorizer will use the alignment information to guide the transformation
1655 (whether to generate regular loads/stores, or with special handling for
1656 misalignment). */
1657
ed9370cc 1658opt_result
fb85abff 1659vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1660{
f1f41a6c 1661 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2e966e2a 1662 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5081fac8 1663 enum dr_alignment_support supportable_dr_alignment;
abc9513d 1664 dr_vec_info *first_store = NULL;
1665 dr_vec_info *dr0_info = NULL;
fb85abff 1666 struct data_reference *dr;
0822b158 1667 unsigned int i, j;
fb85abff 1668 bool do_peeling = false;
1669 bool do_versioning = false;
0822b158 1670 unsigned int npeel = 0;
83786d5e 1671 bool one_misalignment_known = false;
1672 bool one_misalignment_unknown = false;
5081fac8 1673 bool one_dr_unsupportable = false;
abc9513d 1674 dr_vec_info *unsupportable_dr_info = NULL;
d75596cd 1675 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
0822b158 1676 unsigned possible_npeel_number = 1;
1677 tree vectype;
d75596cd 1678 unsigned int mis, same_align_drs_max = 0;
41500e78 1679 hash_table<peel_info_hasher> peeling_htab (1);
fb85abff 1680
88f6eb8f 1681 DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
fb85abff 1682
00ecf4da 1683 /* Reset data so we can safely be called multiple times. */
1684 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
1685 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
1686
fb85abff 1687 /* While cost model enhancements are expected in the future, the high level
1688 view of the code at this time is as follows:
1689
ec2886ed 1690 A) If there is a misaligned access then see if peeling to align
1691 this access can make all data references satisfy
454f25be 1692 vect_supportable_dr_alignment. If so, update data structures
1693 as needed and return true.
fb85abff 1694
1695 B) If peeling wasn't possible and there is a data reference with an
1696 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1697 then see if loop versioning checks can be used to make all data
1698 references satisfy vect_supportable_dr_alignment. If so, update
1699 data structures as needed and return true.
1700
1701 C) If neither peeling nor versioning were successful then return false if
1702 any data reference does not satisfy vect_supportable_dr_alignment.
1703
1704 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1705
1706 Note, Possibility 3 above (which is peeling and versioning together) is not
1707 being done at this time. */
1708
1709 /* (1) Peeling to force alignment. */
1710
1711 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1712 Considerations:
1713 + How many accesses will become aligned due to the peeling
1714 - How many accesses will become unaligned due to the peeling,
1715 and the cost of misaligned accesses.
48e1416a 1716 - The cost of peeling (the extra runtime checks, the increase
0822b158 1717 in code size). */
fb85abff 1718
f1f41a6c 1719 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 1720 {
db72d3bf 1721 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 1722 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 1723
1ad41595 1724 if (!STMT_VINFO_RELEVANT_P (stmt_info))
b04940e7 1725 continue;
1726
fb85abff 1727 /* For interleaving, only the alignment of the first access
1728 matters. */
ee612634 1729 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 1730 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1731 continue;
fb85abff 1732
fa681b45 1733 /* For scatter-gather or invariant accesses there is nothing
1734 to enhance. */
1735 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
1736 || integer_zerop (DR_STEP (dr)))
b04940e7 1737 continue;
1738
e1c75243 1739 /* Strided accesses perform only component accesses, alignment is
f634c3e9 1740 irrelevant for them. */
e1c75243 1741 if (STMT_VINFO_STRIDED_P (stmt_info)
994be998 1742 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
f634c3e9 1743 continue;
1744
abc9513d 1745 supportable_dr_alignment = vect_supportable_dr_alignment (dr_info, true);
1746 do_peeling = vector_alignment_reachable_p (dr_info);
0822b158 1747 if (do_peeling)
fb85abff 1748 {
abc9513d 1749 if (known_alignment_for_access_p (dr_info))
0822b158 1750 {
aec313e5 1751 unsigned int npeel_tmp = 0;
f1b8c740 1752 bool negative = tree_int_cst_compare (DR_STEP (dr),
1753 size_zero_node) < 0;
0822b158 1754
aec313e5 1755 vectype = STMT_VINFO_VECTYPE (stmt_info);
e092c20e 1756 /* If known_alignment_for_access_p then we have set
1757 DR_MISALIGNMENT which is only done if we know it at compiler
1758 time, so it is safe to assume target alignment is constant.
1759 */
1760 unsigned int target_align =
1761 DR_TARGET_ALIGNMENT (dr_info).to_constant ();
abc9513d 1762 unsigned int dr_size = vect_get_scalar_dr_size (dr_info);
1763 mis = (negative
1764 ? DR_MISALIGNMENT (dr_info)
1765 : -DR_MISALIGNMENT (dr_info));
1766 if (DR_MISALIGNMENT (dr_info) != 0)
aec313e5 1767 npeel_tmp = (mis & (target_align - 1)) / dr_size;
0822b158 1768
1769 /* For multiple types, it is possible that the bigger type access
282bf14c 1770 will have more than one peeling option. E.g., a loop with two
0822b158 1771 types: one of size (vector size / 4), and the other one of
282bf14c 1772 size (vector size / 8). Vectorization factor will 8. If both
df8e9f7a 1773 accesses are misaligned by 3, the first one needs one scalar
282bf14c 1774 iteration to be aligned, and the second one needs 5. But the
4bec4fee 1775 first one will be aligned also by peeling 5 scalar
0822b158 1776 iterations, and in that case both accesses will be aligned.
1777 Hence, except for the immediate peeling amount, we also want
1778 to try to add full vector size, while we don't exceed
1779 vectorization factor.
df8e9f7a 1780 We do this automatically for cost model, since we calculate
1781 cost for every peeling option. */
3e398f5b 1782 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
c1bee668 1783 {
d75596cd 1784 poly_uint64 nscalars = (STMT_SLP_TYPE (stmt_info)
e1009321 1785 ? vf * DR_GROUP_SIZE (stmt_info) : vf);
d75596cd 1786 possible_npeel_number
1787 = vect_get_num_vectors (nscalars, vectype);
0822b158 1788
5081fac8 1789 /* NPEEL_TMP is 0 when there is no misalignment, but also
1790 allow peeling NELEMENTS. */
abc9513d 1791 if (DR_MISALIGNMENT (dr_info) == 0)
df8e9f7a 1792 possible_npeel_number++;
1793 }
0822b158 1794
df8e9f7a 1795 /* Save info about DR in the hash table. Also include peeling
1796 amounts according to the explanation above. */
0822b158 1797 for (j = 0; j < possible_npeel_number; j++)
1798 {
41500e78 1799 vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
abc9513d 1800 dr_info, npeel_tmp);
aec313e5 1801 npeel_tmp += target_align / dr_size;
0822b158 1802 }
1803
83786d5e 1804 one_misalignment_known = true;
0822b158 1805 }
1806 else
1807 {
6046367e 1808 /* If we don't know any misalignment values, we prefer
1809 peeling for data-ref that has the maximum number of data-refs
0822b158 1810 with the same alignment, unless the target prefers to align
1811 stores over load. */
83786d5e 1812 unsigned same_align_drs
1813 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
abc9513d 1814 if (!dr0_info
83786d5e 1815 || same_align_drs_max < same_align_drs)
1816 {
1817 same_align_drs_max = same_align_drs;
abc9513d 1818 dr0_info = dr_info;
83786d5e 1819 }
1820 /* For data-refs with the same number of related
1821 accesses prefer the one where the misalign
1822 computation will be invariant in the outermost loop. */
1823 else if (same_align_drs_max == same_align_drs)
1824 {
2e966e2a 1825 class loop *ivloop0, *ivloop;
83786d5e 1826 ivloop0 = outermost_invariant_loop_for_expr
abc9513d 1827 (loop, DR_BASE_ADDRESS (dr0_info->dr));
83786d5e 1828 ivloop = outermost_invariant_loop_for_expr
1829 (loop, DR_BASE_ADDRESS (dr));
1830 if ((ivloop && !ivloop0)
1831 || (ivloop && ivloop0
1832 && flow_loop_nested_p (ivloop, ivloop0)))
abc9513d 1833 dr0_info = dr_info;
83786d5e 1834 }
0822b158 1835
5081fac8 1836 one_misalignment_unknown = true;
1837
1838 /* Check for data refs with unsupportable alignment that
1839 can be peeled. */
1840 if (!supportable_dr_alignment)
1841 {
1842 one_dr_unsupportable = true;
abc9513d 1843 unsupportable_dr_info = dr_info;
5081fac8 1844 }
1845
83786d5e 1846 if (!first_store && DR_IS_WRITE (dr))
abc9513d 1847 first_store = dr_info;
0822b158 1848 }
1849 }
1850 else
1851 {
abc9513d 1852 if (!aligned_access_p (dr_info))
0822b158 1853 {
6d8fb6cf 1854 if (dump_enabled_p ())
78bb46f5 1855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1856 "vector alignment may not be reachable\n");
0822b158 1857 break;
1858 }
1859 }
fb85abff 1860 }
1861
2cd0995e 1862 /* Check if we can possibly peel the loop. */
1863 if (!vect_can_advance_ivs_p (loop_vinfo)
5ee742c4 1864 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
1865 || loop->inner)
fb85abff 1866 do_peeling = false;
1867
b565a9ba 1868 struct _vect_peel_extended_info peel_for_known_alignment;
1869 struct _vect_peel_extended_info peel_for_unknown_alignment;
1870 struct _vect_peel_extended_info best_peel;
1871
1872 peel_for_unknown_alignment.inside_cost = INT_MAX;
1873 peel_for_unknown_alignment.outside_cost = INT_MAX;
1874 peel_for_unknown_alignment.peel_info.count = 0;
83786d5e 1875
192f7876 1876 if (do_peeling
b565a9ba 1877 && one_misalignment_unknown)
0822b158 1878 {
0822b158 1879 /* Check if the target requires to prefer stores over loads, i.e., if
1880 misaligned stores are more expensive than misaligned loads (taking
1881 drs with same alignment into account). */
b565a9ba 1882 unsigned int load_inside_cost = 0;
1883 unsigned int load_outside_cost = 0;
1884 unsigned int store_inside_cost = 0;
1885 unsigned int store_outside_cost = 0;
d75596cd 1886 unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
b565a9ba 1887
1888 stmt_vector_for_cost dummy;
1889 dummy.create (2);
db72d3bf 1890 vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
b565a9ba 1891 &load_inside_cost,
1892 &load_outside_cost,
28d0cd4a 1893 &dummy, &dummy, estimated_npeels, true);
b565a9ba 1894 dummy.release ();
1895
1896 if (first_store)
1897 {
83786d5e 1898 dummy.create (2);
db72d3bf 1899 vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
83786d5e 1900 &store_inside_cost,
1901 &store_outside_cost,
28d0cd4a 1902 &dummy, &dummy,
1903 estimated_npeels, true);
f1f41a6c 1904 dummy.release ();
b565a9ba 1905 }
1906 else
1907 {
1908 store_inside_cost = INT_MAX;
1909 store_outside_cost = INT_MAX;
1910 }
0822b158 1911
b565a9ba 1912 if (load_inside_cost > store_inside_cost
1913 || (load_inside_cost == store_inside_cost
1914 && load_outside_cost > store_outside_cost))
1915 {
abc9513d 1916 dr0_info = first_store;
b565a9ba 1917 peel_for_unknown_alignment.inside_cost = store_inside_cost;
1918 peel_for_unknown_alignment.outside_cost = store_outside_cost;
1919 }
1920 else
1921 {
1922 peel_for_unknown_alignment.inside_cost = load_inside_cost;
1923 peel_for_unknown_alignment.outside_cost = load_outside_cost;
1924 }
83786d5e 1925
b565a9ba 1926 stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
1927 prologue_cost_vec.create (2);
1928 epilogue_cost_vec.create (2);
83786d5e 1929
b565a9ba 1930 int dummy2;
1931 peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
d75596cd 1932 (loop_vinfo, estimated_npeels, &dummy2,
b565a9ba 1933 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1934 &prologue_cost_vec, &epilogue_cost_vec);
83786d5e 1935
b565a9ba 1936 prologue_cost_vec.release ();
1937 epilogue_cost_vec.release ();
0822b158 1938
b565a9ba 1939 peel_for_unknown_alignment.peel_info.count = 1
abc9513d 1940 + STMT_VINFO_SAME_ALIGN_REFS (dr0_info->stmt).length ();
0822b158 1941 }
1942
b565a9ba 1943 peel_for_unknown_alignment.peel_info.npeel = 0;
abc9513d 1944 peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
b565a9ba 1945
1946 best_peel = peel_for_unknown_alignment;
1947
83786d5e 1948 peel_for_known_alignment.inside_cost = INT_MAX;
1949 peel_for_known_alignment.outside_cost = INT_MAX;
1950 peel_for_known_alignment.peel_info.count = 0;
abc9513d 1951 peel_for_known_alignment.peel_info.dr_info = NULL;
83786d5e 1952
1953 if (do_peeling && one_misalignment_known)
0822b158 1954 {
1955 /* Peeling is possible, but there is no data access that is not supported
b565a9ba 1956 unless aligned. So we try to choose the best possible peeling from
1957 the hash table. */
83786d5e 1958 peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
f0f51716 1959 (&peeling_htab, loop_vinfo);
0822b158 1960 }
1961
83786d5e 1962 /* Compare costs of peeling for known and unknown alignment. */
abc9513d 1963 if (peel_for_known_alignment.peel_info.dr_info != NULL
b565a9ba 1964 && peel_for_unknown_alignment.inside_cost
1965 >= peel_for_known_alignment.inside_cost)
5081fac8 1966 {
1967 best_peel = peel_for_known_alignment;
b565a9ba 1968
5081fac8 1969 /* If the best peeling for known alignment has NPEEL == 0, perform no
1970 peeling at all except if there is an unsupportable dr that we can
1971 align. */
1972 if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
1973 do_peeling = false;
1974 }
b565a9ba 1975
5081fac8 1976 /* If there is an unsupportable data ref, prefer this over all choices so far
1977 since we'd have to discard a chosen peeling except when it accidentally
1978 aligned the unsupportable data ref. */
1979 if (one_dr_unsupportable)
abc9513d 1980 dr0_info = unsupportable_dr_info;
5081fac8 1981 else if (do_peeling)
1982 {
db755b03 1983 /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
f0f51716 1984 TODO: Use nopeel_outside_cost or get rid of it? */
5081fac8 1985 unsigned nopeel_inside_cost = 0;
1986 unsigned nopeel_outside_cost = 0;
b565a9ba 1987
5081fac8 1988 stmt_vector_for_cost dummy;
1989 dummy.create (2);
db72d3bf 1990 vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
28d0cd4a 1991 &nopeel_outside_cost, &dummy, &dummy,
1992 0, false);
5081fac8 1993 dummy.release ();
b565a9ba 1994
5081fac8 1995 /* Add epilogue costs. As we do not peel for alignment here, no prologue
1996 costs will be recorded. */
1997 stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
1998 prologue_cost_vec.create (2);
1999 epilogue_cost_vec.create (2);
b565a9ba 2000
5081fac8 2001 int dummy2;
2002 nopeel_outside_cost += vect_get_known_peeling_cost
2003 (loop_vinfo, 0, &dummy2,
2004 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
2005 &prologue_cost_vec, &epilogue_cost_vec);
2006
2007 prologue_cost_vec.release ();
2008 epilogue_cost_vec.release ();
b565a9ba 2009
5081fac8 2010 npeel = best_peel.peel_info.npeel;
abc9513d 2011 dr0_info = best_peel.peel_info.dr_info;
83786d5e 2012
5081fac8 2013 /* If no peeling is not more expensive than the best peeling we
2014 have so far, don't perform any peeling. */
2015 if (nopeel_inside_cost <= best_peel.inside_cost)
2016 do_peeling = false;
2017 }
83786d5e 2018
fb85abff 2019 if (do_peeling)
2020 {
abc9513d 2021 stmt_vec_info stmt_info = dr0_info->stmt;
0822b158 2022 vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 2023
abc9513d 2024 if (known_alignment_for_access_p (dr0_info))
fb85abff 2025 {
abc9513d 2026 bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
f1b8c740 2027 size_zero_node) < 0;
0822b158 2028 if (!npeel)
2029 {
2030 /* Since it's known at compile time, compute the number of
2031 iterations in the peeled loop (the peeling factor) for use in
2032 updating DR_MISALIGNMENT values. The peeling factor is the
2033 vectorization factor minus the misalignment as an element
2034 count. */
abc9513d 2035 mis = (negative
2036 ? DR_MISALIGNMENT (dr0_info)
2037 : -DR_MISALIGNMENT (dr0_info));
e092c20e 2038 /* If known_alignment_for_access_p then we have set
2039 DR_MISALIGNMENT which is only done if we know it at compiler
2040 time, so it is safe to assume target alignment is constant.
2041 */
2042 unsigned int target_align =
2043 DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
aec313e5 2044 npeel = ((mis & (target_align - 1))
abc9513d 2045 / vect_get_scalar_dr_size (dr0_info));
0822b158 2046 }
fb85abff 2047
48e1416a 2048 /* For interleaved data access every iteration accesses all the
fb85abff 2049 members of the group, therefore we divide the number of iterations
2050 by the group size. */
ee612634 2051 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
e1009321 2052 npeel /= DR_GROUP_SIZE (stmt_info);
fb85abff 2053
6d8fb6cf 2054 if (dump_enabled_p ())
7bd765d4 2055 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2056 "Try peeling by %d\n", npeel);
fb85abff 2057 }
2058
cd8306bf 2059 /* Ensure that all datarefs can be vectorized after the peel. */
abc9513d 2060 if (!vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
cd8306bf 2061 do_peeling = false;
fb85abff 2062
cd8306bf 2063 /* Check if all datarefs are supportable and log. */
abc9513d 2064 if (do_peeling && known_alignment_for_access_p (dr0_info) && npeel == 0)
0822b158 2065 {
ed9370cc 2066 opt_result stat = vect_verify_datarefs_alignment (loop_vinfo);
0822b158 2067 if (!stat)
2068 do_peeling = false;
2069 else
f0f51716 2070 return stat;
0822b158 2071 }
2072
eb10b471 2073 /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
d7d7032a 2074 if (do_peeling)
2075 {
2076 unsigned max_allowed_peel
2077 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
2078 if (max_allowed_peel != (unsigned)-1)
2079 {
2080 unsigned max_peel = npeel;
2081 if (max_peel == 0)
2082 {
e092c20e 2083 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
2084 unsigned HOST_WIDE_INT target_align_c;
2085 if (target_align.is_constant (&target_align_c))
2086 max_peel =
2087 target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
2088 else
2089 {
2090 do_peeling = false;
2091 if (dump_enabled_p ())
2092 dump_printf_loc (MSG_NOTE, vect_location,
2093 "Disable peeling, max peels set and vector"
2094 " alignment unknown\n");
2095 }
d7d7032a 2096 }
2097 if (max_peel > max_allowed_peel)
2098 {
2099 do_peeling = false;
2100 if (dump_enabled_p ())
2101 dump_printf_loc (MSG_NOTE, vect_location,
2102 "Disable peeling, max peels reached: %d\n", max_peel);
2103 }
2104 }
2105 }
2106
eb10b471 2107 /* Cost model #2 - if peeling may result in a remaining loop not
d75596cd 2108 iterating enough to be vectorized then do not peel. Since this
2109 is a cost heuristic rather than a correctness decision, use the
2110 most likely runtime value for variable vectorization factors. */
eb10b471 2111 if (do_peeling
2112 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2113 {
d75596cd 2114 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2115 unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
2116 if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2117 < assumed_vf + max_peel)
eb10b471 2118 do_peeling = false;
2119 }
2120
fb85abff 2121 if (do_peeling)
2122 {
2123 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
2124 If the misalignment of DR_i is identical to that of dr0 then set
2125 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
2126 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2127 by the peeling factor times the element size of DR_i (MOD the
2128 vectorization factor times the size). Otherwise, the
2129 misalignment of DR_i must be set to unknown. */
f1f41a6c 2130 FOR_EACH_VEC_ELT (datarefs, i, dr)
abc9513d 2131 if (dr != dr0_info->dr)
1ca1d9b2 2132 {
2133 /* Strided accesses perform only component accesses, alignment
2134 is irrelevant for them. */
db72d3bf 2135 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 2136 stmt_info = dr_info->stmt;
1ca1d9b2 2137 if (STMT_VINFO_STRIDED_P (stmt_info)
2138 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2139 continue;
2140
abc9513d 2141 vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
1ca1d9b2 2142 }
fb85abff 2143
ec5bf0fb 2144 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
0822b158 2145 if (npeel)
313a5120 2146 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
0822b158 2147 else
313a5120 2148 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
abc9513d 2149 = DR_MISALIGNMENT (dr0_info);
2150 SET_DR_MISALIGNMENT (dr0_info, 0);
6d8fb6cf 2151 if (dump_enabled_p ())
7bd765d4 2152 {
2153 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2154 "Alignment of access forced using peeling.\n");
7bd765d4 2155 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2156 "Peeling for alignment will be applied.\n");
7bd765d4 2157 }
f0f51716 2158
e4eca2de 2159 /* The inside-loop cost will be accounted for in vectorizable_load
2160 and vectorizable_store correctly with adjusted alignments.
2161 Drop the body_cst_vec on the floor here. */
ed9370cc 2162 opt_result stat = vect_verify_datarefs_alignment (loop_vinfo);
fb85abff 2163 gcc_assert (stat);
2164 return stat;
2165 }
2166 }
2167
fb85abff 2168 /* (2) Versioning to force alignment. */
2169
2170 /* Try versioning if:
1dbf9bd1 2171 1) optimize loop for speed
2172 2) there is at least one unsupported misaligned data ref with an unknown
fb85abff 2173 misalignment, and
1dbf9bd1 2174 3) all misaligned data refs with a known misalignment are supported, and
2175 4) the number of runtime alignment checks is within reason. */
fb85abff 2176
48e1416a 2177 do_versioning =
1dbf9bd1 2178 optimize_loop_nest_for_speed_p (loop)
fb85abff 2179 && (!loop->inner); /* FORNOW */
2180
2181 if (do_versioning)
2182 {
f1f41a6c 2183 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 2184 {
db72d3bf 2185 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 2186 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 2187
2188 /* For interleaving, only the alignment of the first access
2189 matters. */
abc9513d 2190 if (aligned_access_p (dr_info)
ee612634 2191 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 2192 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info))
fb85abff 2193 continue;
2194
e1c75243 2195 if (STMT_VINFO_STRIDED_P (stmt_info))
994be998 2196 {
2197 /* Strided loads perform only component accesses, alignment is
2198 irrelevant for them. */
2199 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2200 continue;
2201 do_versioning = false;
2202 break;
2203 }
f634c3e9 2204
abc9513d 2205 supportable_dr_alignment
2206 = vect_supportable_dr_alignment (dr_info, false);
fb85abff 2207
2208 if (!supportable_dr_alignment)
2209 {
fb85abff 2210 int mask;
2211 tree vectype;
2212
abc9513d 2213 if (known_alignment_for_access_p (dr_info)
f1f41a6c 2214 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
fb85abff 2215 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2216 {
2217 do_versioning = false;
2218 break;
2219 }
2220
0219dc42 2221 vectype = STMT_VINFO_VECTYPE (stmt_info);
2222 gcc_assert (vectype);
48e1416a 2223
52acb7ae 2224 /* At present we don't support versioning for alignment
2225 with variable VF, since there's no guarantee that the
2226 VF is a power of two. We could relax this if we added
2227 a way of enforcing a power-of-two size. */
2228 unsigned HOST_WIDE_INT size;
2229 if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
2230 {
2231 do_versioning = false;
2232 break;
2233 }
2234
dff96e64 2235 /* Forcing alignment in the first iteration is no good if
2236 we don't keep it across iterations. For now, just disable
2237 versioning in this case.
2588e836 2238 ?? We could actually unroll the loop to achieve the required
2239 overall step alignment, and forcing the alignment could be
dff96e64 2240 done by doing some iterations of the non-vectorized loop. */
2241 if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2242 * DR_STEP_ALIGNMENT (dr),
2588e836 2243 DR_TARGET_ALIGNMENT (dr_info)))
dff96e64 2244 {
2245 do_versioning = false;
2246 break;
2247 }
2248
fb85abff 2249 /* The rightmost bits of an aligned address must be zeros.
2250 Construct the mask needed for this test. For example,
2251 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2252 mask must be 15 = 0xf. */
52acb7ae 2253 mask = size - 1;
fb85abff 2254
2255 /* FORNOW: use the same mask to test all potentially unaligned
2256 references in the loop. The vectorizer currently supports
2257 a single vector size, see the reference to
2258 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2259 vectorization factor is computed. */
2260 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2261 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2262 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
0219dc42 2263 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
fb85abff 2264 }
2265 }
48e1416a 2266
fb85abff 2267 /* Versioning requires at least one misaligned data reference. */
10095225 2268 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
fb85abff 2269 do_versioning = false;
2270 else if (!do_versioning)
f1f41a6c 2271 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
fb85abff 2272 }
2273
2274 if (do_versioning)
2275 {
ab98e625 2276 vec<stmt_vec_info> may_misalign_stmts
fb85abff 2277 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
ab98e625 2278 stmt_vec_info stmt_info;
fb85abff 2279
2280 /* It can now be assumed that the data references in the statements
2281 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2282 of the loop being vectorized. */
ab98e625 2283 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
fb85abff 2284 {
abc9513d 2285 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2286 SET_DR_MISALIGNMENT (dr_info, 0);
6d8fb6cf 2287 if (dump_enabled_p ())
78bb46f5 2288 dump_printf_loc (MSG_NOTE, vect_location,
2289 "Alignment of access forced using versioning.\n");
fb85abff 2290 }
2291
6d8fb6cf 2292 if (dump_enabled_p ())
78bb46f5 2293 dump_printf_loc (MSG_NOTE, vect_location,
2294 "Versioning for alignment will be applied.\n");
fb85abff 2295
2296 /* Peeling and versioning can't be done together at this time. */
2297 gcc_assert (! (do_peeling && do_versioning));
2298
ed9370cc 2299 opt_result stat = vect_verify_datarefs_alignment (loop_vinfo);
fb85abff 2300 gcc_assert (stat);
2301 return stat;
2302 }
2303
2304 /* This point is reached if neither peeling nor versioning is being done. */
2305 gcc_assert (! (do_peeling || do_versioning));
2306
ed9370cc 2307 opt_result stat = vect_verify_datarefs_alignment (loop_vinfo);
fb85abff 2308 return stat;
2309}
2310
2311
91a74fc6 2312/* Function vect_find_same_alignment_drs.
2313
db72d3bf 2314 Update group and alignment relations in VINFO according to the chosen
91a74fc6 2315 vectorization factor. */
2316
2317static void
db72d3bf 2318vect_find_same_alignment_drs (vec_info *vinfo, data_dependence_relation *ddr)
91a74fc6 2319{
91a74fc6 2320 struct data_reference *dra = DDR_A (ddr);
2321 struct data_reference *drb = DDR_B (ddr);
db72d3bf 2322 dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
2323 dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
abc9513d 2324 stmt_vec_info stmtinfo_a = dr_info_a->stmt;
2325 stmt_vec_info stmtinfo_b = dr_info_b->stmt;
91a74fc6 2326
2327 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2328 return;
2329
0822b158 2330 if (dra == drb)
91a74fc6 2331 return;
2332
fa681b45 2333 if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
2334 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
2335 return;
2336
4f372c2c 2337 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
7d4e73a6 2338 || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0)
2339 || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
91a74fc6 2340 return;
2341
7d4e73a6 2342 /* Two references with distance zero have the same alignment. */
c4d25d8a 2343 poly_offset_int diff = (wi::to_poly_offset (DR_INIT (dra))
2344 - wi::to_poly_offset (DR_INIT (drb)));
2345 if (maybe_ne (diff, 0))
91a74fc6 2346 {
7d4e73a6 2347 /* Get the wider of the two alignments. */
e092c20e 2348 poly_uint64 align_a =
2349 exact_div (vect_calculate_target_alignment (dr_info_a),
2350 BITS_PER_UNIT);
2351 poly_uint64 align_b =
2352 exact_div (vect_calculate_target_alignment (dr_info_b),
2353 BITS_PER_UNIT);
2354 unsigned HOST_WIDE_INT align_a_c, align_b_c;
2355 if (!align_a.is_constant (&align_a_c)
2356 || !align_b.is_constant (&align_b_c))
2357 return;
2358
2359 unsigned HOST_WIDE_INT max_align = MAX (align_a_c, align_b_c);
7d4e73a6 2360
2361 /* Require the gap to be a multiple of the larger vector alignment. */
c4d25d8a 2362 if (!multiple_p (diff, max_align))
7d4e73a6 2363 return;
2364 }
91a74fc6 2365
7d4e73a6 2366 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
2367 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
2368 if (dump_enabled_p ())
a4e972e3 2369 dump_printf_loc (MSG_NOTE, vect_location,
2370 "accesses have the same alignment: %T and %T\n",
2371 DR_REF (dra), DR_REF (drb));
91a74fc6 2372}
2373
2374
fb85abff 2375/* Function vect_analyze_data_refs_alignment
2376
2377 Analyze the alignment of the data-references in the loop.
2378 Return FALSE if a data reference is found that cannot be vectorized. */
2379
ed9370cc 2380opt_result
2f6fec15 2381vect_analyze_data_refs_alignment (loop_vec_info vinfo)
fb85abff 2382{
88f6eb8f 2383 DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
fb85abff 2384
91a74fc6 2385 /* Mark groups of data references with same alignment using
2386 data dependence information. */
a99aba41 2387 vec<ddr_p> ddrs = vinfo->shared->ddrs;
2f6fec15 2388 struct data_dependence_relation *ddr;
2389 unsigned int i;
2390
2391 FOR_EACH_VEC_ELT (ddrs, i, ddr)
db72d3bf 2392 vect_find_same_alignment_drs (vinfo, ddr);
2f6fec15 2393
a99aba41 2394 vec<data_reference_p> datarefs = vinfo->shared->datarefs;
2f6fec15 2395 struct data_reference *dr;
2396
4f372c2c 2397 vect_record_base_alignments (vinfo);
2f6fec15 2398 FOR_EACH_VEC_ELT (datarefs, i, dr)
91a74fc6 2399 {
db72d3bf 2400 dr_vec_info *dr_info = vinfo->lookup_dr (dr);
abc9513d 2401 if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
2402 vect_compute_data_ref_alignment (dr_info);
91a74fc6 2403 }
2404
ed9370cc 2405 return opt_result::success ();
2f6fec15 2406}
2407
2408
2409/* Analyze alignment of DRs of stmts in NODE. */
2410
2411static bool
2412vect_slp_analyze_and_verify_node_alignment (slp_tree node)
2413{
f6593f36 2414 /* We vectorize from the first scalar stmt in the node unless
2415 the node is permuted in which case we start from the first
2416 element in the group. */
06bb64b8 2417 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
abc9513d 2418 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
f6593f36 2419 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
cd24aa3c 2420 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
f6593f36 2421
abc9513d 2422 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2423 vect_compute_data_ref_alignment (dr_info);
fa681b45 2424 /* For creating the data-ref pointer we need alignment of the
2425 first element anyway. */
abc9513d 2426 if (dr_info != first_dr_info)
2427 vect_compute_data_ref_alignment (first_dr_info);
2428 if (! verify_data_ref_alignment (dr_info))
fb85abff 2429 {
f6593f36 2430 if (dump_enabled_p ())
2431 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2432 "not vectorized: bad data alignment in basic "
2433 "block.\n");
2434 return false;
fb85abff 2435 }
2436
2437 return true;
2438}
2439
2f6fec15 2440/* Function vect_slp_analyze_instance_alignment
2441
2442 Analyze the alignment of the data-references in the SLP instance.
2443 Return FALSE if a data reference is found that cannot be vectorized. */
2444
2445bool
2446vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
2447{
88f6eb8f 2448 DUMP_VECT_SCOPE ("vect_slp_analyze_and_verify_instance_alignment");
2f6fec15 2449
2450 slp_tree node;
2451 unsigned i;
2452 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
2453 if (! vect_slp_analyze_and_verify_node_alignment (node))
2454 return false;
2455
2456 node = SLP_INSTANCE_TREE (instance);
06bb64b8 2457 if (STMT_VINFO_DATA_REF (SLP_TREE_SCALAR_STMTS (node)[0])
2f6fec15 2458 && ! vect_slp_analyze_and_verify_node_alignment
2459 (SLP_INSTANCE_TREE (instance)))
2460 return false;
2461
2462 return true;
2463}
2464
fb85abff 2465
abc9513d 2466/* Analyze groups of accesses: check that DR_INFO belongs to a group of
ee612634 2467 accesses of legal size, step, etc. Detect gaps, single element
2468 interleaving, and other special cases. Set grouped access info.
39e23eaa 2469 Collect groups of strided stores for further use in SLP analysis.
2470 Worker for vect_analyze_group_access. */
fb85abff 2471
2472static bool
abc9513d 2473vect_analyze_group_access_1 (dr_vec_info *dr_info)
fb85abff 2474{
abc9513d 2475 data_reference *dr = dr_info->dr;
fb85abff 2476 tree step = DR_STEP (dr);
2477 tree scalar_type = TREE_TYPE (DR_REF (dr));
f9ae6f95 2478 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
abc9513d 2479 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 2480 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 2481 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
994be998 2482 HOST_WIDE_INT dr_step = -1;
ee612634 2483 HOST_WIDE_INT groupsize, last_accessed_element = 1;
fb85abff 2484 bool slp_impossible = false;
2485
ee612634 2486 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2487 size of the interleaving group (including gaps). */
994be998 2488 if (tree_fits_shwi_p (step))
2489 {
2490 dr_step = tree_to_shwi (step);
0d77042c 2491 /* Check that STEP is a multiple of type size. Otherwise there is
2492 a non-element-sized gap at the end of the group which we
e1009321 2493 cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
0d77042c 2494 ??? As we can handle non-constant step fine here we should
e1009321 2495 simply remove uses of DR_GROUP_GAP between the last and first
2496 element and instead rely on DR_STEP. DR_GROUP_SIZE then would
0d77042c 2497 simply not include that gap. */
2498 if ((dr_step % type_size) != 0)
2499 {
2500 if (dump_enabled_p ())
a4e972e3 2501 dump_printf_loc (MSG_NOTE, vect_location,
2502 "Step %T is not a multiple of the element size"
2503 " for %T\n",
2504 step, DR_REF (dr));
0d77042c 2505 return false;
2506 }
994be998 2507 groupsize = absu_hwi (dr_step) / type_size;
2508 }
2509 else
2510 groupsize = 0;
fb85abff 2511
2512 /* Not consecutive access is possible only if it is a part of interleaving. */
0219dc42 2513 if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
fb85abff 2514 {
2515 /* Check if it this DR is a part of interleaving, and is a single
2516 element of the group that is accessed in the loop. */
48e1416a 2517
fb85abff 2518 /* Gaps are supported only for loads. STEP must be a multiple of the type
f5d5e8fa 2519 size. */
fb85abff 2520 if (DR_IS_READ (dr)
2521 && (dr_step % type_size) == 0
f5d5e8fa 2522 && groupsize > 0)
fb85abff 2523 {
0219dc42 2524 DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
2525 DR_GROUP_SIZE (stmt_info) = groupsize;
e1009321 2526 DR_GROUP_GAP (stmt_info) = groupsize - 1;
6d8fb6cf 2527 if (dump_enabled_p ())
a4e972e3 2528 dump_printf_loc (MSG_NOTE, vect_location,
2529 "Detected single element interleaving %T"
2530 " step %T\n",
2531 DR_REF (dr), step);
a4ee7fac 2532
fb85abff 2533 return true;
2534 }
6ea6a380 2535
6d8fb6cf 2536 if (dump_enabled_p ())
a4e972e3 2537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2538 "not consecutive access %G", stmt_info->stmt);
6ea6a380 2539
2540 if (bb_vinfo)
0219dc42 2541 {
2542 /* Mark the statement as unvectorizable. */
abc9513d 2543 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
0219dc42 2544 return true;
2545 }
7bd765d4 2546
91f42adc 2547 if (dump_enabled_p ())
2548 dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
71de77d8 2549 STMT_VINFO_STRIDED_P (stmt_info) = true;
2550 return true;
fb85abff 2551 }
2552
0219dc42 2553 if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
fb85abff 2554 {
2555 /* First stmt in the interleaving chain. Check the chain. */
cd24aa3c 2556 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
fb85abff 2557 struct data_reference *data_ref = dr;
1a0e7d51 2558 unsigned int count = 1;
fb85abff 2559 tree prev_init = DR_INIT (data_ref);
8bbe6b75 2560 HOST_WIDE_INT diff, gaps = 0;
fb85abff 2561
c4d25d8a 2562 /* By construction, all group members have INTEGER_CST DR_INITs. */
fb85abff 2563 while (next)
2564 {
6883ce83 2565 /* We never have the same DR multiple times. */
2566 gcc_assert (tree_int_cst_compare (DR_INIT (data_ref),
2567 DR_INIT (STMT_VINFO_DATA_REF (next))) != 0);
a4ee7fac 2568
cd24aa3c 2569 data_ref = STMT_VINFO_DATA_REF (next);
fb85abff 2570
8bbe6b75 2571 /* All group members have the same STEP by construction. */
2572 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
fb85abff 2573
fb85abff 2574 /* Check that the distance between two accesses is equal to the type
2575 size. Otherwise, we have gaps. */
f9ae6f95 2576 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2577 - TREE_INT_CST_LOW (prev_init)) / type_size;
fb85abff 2578 if (diff != 1)
2579 {
2580 /* FORNOW: SLP of accesses with gaps is not supported. */
2581 slp_impossible = true;
9ff25603 2582 if (DR_IS_WRITE (data_ref))
fb85abff 2583 {
6d8fb6cf 2584 if (dump_enabled_p ())
78bb46f5 2585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2586 "interleaved store with gaps\n");
fb85abff 2587 return false;
2588 }
b11576bf 2589
2590 gaps += diff - 1;
fb85abff 2591 }
2592
a4ee7fac 2593 last_accessed_element += diff;
2594
fb85abff 2595 /* Store the gap from the previous member of the group. If there is no
e1009321 2596 gap in the access, DR_GROUP_GAP is always 1. */
cd24aa3c 2597 DR_GROUP_GAP (next) = diff;
fb85abff 2598
cd24aa3c 2599 prev_init = DR_INIT (data_ref);
2600 next = DR_GROUP_NEXT_ELEMENT (next);
2601 /* Count the number of data-refs in the chain. */
2602 count++;
fb85abff 2603 }
2604
994be998 2605 if (groupsize == 0)
2606 groupsize = count + gaps;
fb85abff 2607
26aad5fc 2608 /* This could be UINT_MAX but as we are generating code in a very
2609 inefficient way we have to cap earlier. See PR78699 for example. */
2610 if (groupsize > 4096)
39e23eaa 2611 {
2612 if (dump_enabled_p ())
2613 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2614 "group is too large\n");
2615 return false;
2616 }
2617
994be998 2618 /* Check that the size of the interleaving is equal to count for stores,
fb85abff 2619 i.e., that there are no gaps. */
904bd865 2620 if (groupsize != count
2621 && !DR_IS_READ (dr))
fb85abff 2622 {
05b97b35 2623 groupsize = count;
2624 STMT_VINFO_STRIDED_P (stmt_info) = true;
904bd865 2625 }
2626
2627 /* If there is a gap after the last load in the group it is the
2628 difference between the groupsize and the last accessed
2629 element.
2630 When there is no gap, this difference should be 0. */
0219dc42 2631 DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
fb85abff 2632
0219dc42 2633 DR_GROUP_SIZE (stmt_info) = groupsize;
6d8fb6cf 2634 if (dump_enabled_p ())
904bd865 2635 {
2636 dump_printf_loc (MSG_NOTE, vect_location,
39e23eaa 2637 "Detected interleaving ");
2638 if (DR_IS_READ (dr))
2639 dump_printf (MSG_NOTE, "load ");
05b97b35 2640 else if (STMT_VINFO_STRIDED_P (stmt_info))
2641 dump_printf (MSG_NOTE, "strided store ");
39e23eaa 2642 else
2643 dump_printf (MSG_NOTE, "store ");
b4d2979c 2644 dump_printf (MSG_NOTE, "of size %u\n",
2645 (unsigned)groupsize);
2646 dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
2647 next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2648 while (next)
2649 {
2650 if (DR_GROUP_GAP (next) != 1)
2651 dump_printf_loc (MSG_NOTE, vect_location,
2652 "\t<gap of %d elements>\n",
2653 DR_GROUP_GAP (next) - 1);
2654 dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
2655 next = DR_GROUP_NEXT_ELEMENT (next);
2656 }
0219dc42 2657 if (DR_GROUP_GAP (stmt_info) != 0)
904bd865 2658 dump_printf_loc (MSG_NOTE, vect_location,
b4d2979c 2659 "\t<gap of %d elements>\n",
0219dc42 2660 DR_GROUP_GAP (stmt_info));
904bd865 2661 }
fb85abff 2662
48e1416a 2663 /* SLP: create an SLP data structure for every interleaving group of
fb85abff 2664 stores for further analysis in vect_analyse_slp. */
9ff25603 2665 if (DR_IS_WRITE (dr) && !slp_impossible)
0219dc42 2666 {
2667 if (loop_vinfo)
2668 LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
2669 if (bb_vinfo)
2670 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
2671 }
fb85abff 2672 }
2673
2674 return true;
2675}
2676
abc9513d 2677/* Analyze groups of accesses: check that DR_INFO belongs to a group of
39e23eaa 2678 accesses of legal size, step, etc. Detect gaps, single element
2679 interleaving, and other special cases. Set grouped access info.
2680 Collect groups of strided stores for further use in SLP analysis. */
2681
2682static bool
abc9513d 2683vect_analyze_group_access (dr_vec_info *dr_info)
39e23eaa 2684{
abc9513d 2685 if (!vect_analyze_group_access_1 (dr_info))
39e23eaa 2686 {
2687 /* Dissolve the group if present. */
abc9513d 2688 stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
cd24aa3c 2689 while (stmt_info)
39e23eaa 2690 {
cd24aa3c 2691 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2692 DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2693 DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
2694 stmt_info = next;
39e23eaa 2695 }
2696 return false;
2697 }
2698 return true;
2699}
fb85abff 2700
abc9513d 2701/* Analyze the access pattern of the data-reference DR_INFO.
fb85abff 2702 In case of non-consecutive accesses call vect_analyze_group_access() to
ee612634 2703 analyze groups of accesses. */
fb85abff 2704
2705static bool
abc9513d 2706vect_analyze_data_ref_access (dr_vec_info *dr_info)
fb85abff 2707{
abc9513d 2708 data_reference *dr = dr_info->dr;
fb85abff 2709 tree step = DR_STEP (dr);
2710 tree scalar_type = TREE_TYPE (DR_REF (dr));
abc9513d 2711 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 2712 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2e966e2a 2713 class loop *loop = NULL;
fb85abff 2714
0bf8b382 2715 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2716 return true;
2717
37545e54 2718 if (loop_vinfo)
2719 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 2720
37545e54 2721 if (loop_vinfo && !step)
fb85abff 2722 {
6d8fb6cf 2723 if (dump_enabled_p ())
78bb46f5 2724 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2725 "bad data-ref access in loop\n");
fb85abff 2726 return false;
2727 }
2728
9b0be19c 2729 /* Allow loads with zero step in inner-loop vectorization. */
f634c3e9 2730 if (loop_vinfo && integer_zerop (step))
b04940e7 2731 {
0219dc42 2732 DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2733 if (!nested_in_vect_loop_p (loop, stmt_info))
9b0be19c 2734 return DR_IS_READ (dr);
2735 /* Allow references with zero step for outer loops marked
2736 with pragma omp simd only - it guarantees absence of
2737 loop-carried dependencies between inner loop iterations. */
84017e0e 2738 if (loop->safelen < 2)
afa60cb4 2739 {
2740 if (dump_enabled_p ())
2741 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2742 "zero step in inner loop of nest\n");
afa60cb4 2743 return false;
2744 }
b04940e7 2745 }
fb85abff 2746
0219dc42 2747 if (loop && nested_in_vect_loop_p (loop, stmt_info))
fb85abff 2748 {
2749 /* Interleaved accesses are not yet supported within outer-loop
2750 vectorization for references in the inner-loop. */
0219dc42 2751 DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
fb85abff 2752
2753 /* For the rest of the analysis we use the outer-loop step. */
2754 step = STMT_VINFO_DR_STEP (stmt_info);
f634c3e9 2755 if (integer_zerop (step))
fb85abff 2756 {
6d8fb6cf 2757 if (dump_enabled_p ())
7bd765d4 2758 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2759 "zero step in outer loop.\n");
0bd6d857 2760 return DR_IS_READ (dr);
fb85abff 2761 }
2762 }
2763
2764 /* Consecutive? */
f634c3e9 2765 if (TREE_CODE (step) == INTEGER_CST)
fb85abff 2766 {
f9ae6f95 2767 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
f634c3e9 2768 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2769 || (dr_step < 0
2770 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2771 {
2772 /* Mark that it is not interleaving. */
0219dc42 2773 DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
f634c3e9 2774 return true;
2775 }
fb85abff 2776 }
2777
0219dc42 2778 if (loop && nested_in_vect_loop_p (loop, stmt_info))
fb85abff 2779 {
6d8fb6cf 2780 if (dump_enabled_p ())
7bd765d4 2781 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2782 "grouped access in outer loop.\n");
fb85abff 2783 return false;
2784 }
2785
994be998 2786
f634c3e9 2787 /* Assume this is a DR handled by non-constant strided load case. */
2788 if (TREE_CODE (step) != INTEGER_CST)
e1c75243 2789 return (STMT_VINFO_STRIDED_P (stmt_info)
994be998 2790 && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
abc9513d 2791 || vect_analyze_group_access (dr_info)));
f634c3e9 2792
fb85abff 2793 /* Not consecutive access - check if it's a part of interleaving group. */
abc9513d 2794 return vect_analyze_group_access (dr_info);
fb85abff 2795}
2796
68f15e9d 2797/* Compare two data-references DRA and DRB to group them into chunks
2798 suitable for grouping. */
2799
2800static int
2801dr_group_sort_cmp (const void *dra_, const void *drb_)
2802{
2803 data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2804 data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
68f15e9d 2805 int cmp;
2806
2807 /* Stabilize sort. */
2808 if (dra == drb)
2809 return 0;
2810
8167d6ad 2811 /* DRs in different loops never belong to the same group. */
2812 loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
2813 loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
2814 if (loopa != loopb)
2815 return loopa->num < loopb->num ? -1 : 1;
2816
68f15e9d 2817 /* Ordering of DRs according to base. */
ce55060f 2818 cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2819 DR_BASE_ADDRESS (drb));
2820 if (cmp != 0)
2821 return cmp;
68f15e9d 2822
2823 /* And according to DR_OFFSET. */
ce55060f 2824 cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2825 if (cmp != 0)
2826 return cmp;
68f15e9d 2827
2828 /* Put reads before writes. */
2829 if (DR_IS_READ (dra) != DR_IS_READ (drb))
2830 return DR_IS_READ (dra) ? -1 : 1;
2831
2832 /* Then sort after access size. */
ce55060f 2833 cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2834 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2835 if (cmp != 0)
2836 return cmp;
68f15e9d 2837
2838 /* And after step. */
ce55060f 2839 cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2840 if (cmp != 0)
2841 return cmp;
68f15e9d 2842
2843 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
8672ee56 2844 cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
68f15e9d 2845 if (cmp == 0)
2846 return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2847 return cmp;
2848}
fb85abff 2849
2dd8e84c 2850/* If OP is the result of a conversion, return the unconverted value,
2851 otherwise return null. */
2852
2853static tree
2854strip_conversion (tree op)
2855{
2856 if (TREE_CODE (op) != SSA_NAME)
2857 return NULL_TREE;
2858 gimple *stmt = SSA_NAME_DEF_STMT (op);
2859 if (!is_gimple_assign (stmt)
2860 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
2861 return NULL_TREE;
2862 return gimple_assign_rhs1 (stmt);
2863}
2864
ecc42a77 2865/* Return true if vectorizable_* routines can handle statements STMT1_INFO
f92474f8 2866 and STMT2_INFO being in a single group. When ALLOW_SLP_P, masked loads can
2867 be grouped in SLP mode. */
2dd8e84c 2868
2869static bool
f92474f8 2870can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
2871 bool allow_slp_p)
2dd8e84c 2872{
ecc42a77 2873 if (gimple_assign_single_p (stmt1_info->stmt))
2874 return gimple_assign_single_p (stmt2_info->stmt);
2dd8e84c 2875
ecc42a77 2876 gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
5b4b7bcc 2877 if (call1 && gimple_call_internal_p (call1))
2dd8e84c 2878 {
2879 /* Check for two masked loads or two masked stores. */
ecc42a77 2880 gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
5b4b7bcc 2881 if (!call2 || !gimple_call_internal_p (call2))
2dd8e84c 2882 return false;
5b4b7bcc 2883 internal_fn ifn = gimple_call_internal_fn (call1);
2dd8e84c 2884 if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
2885 return false;
5b4b7bcc 2886 if (ifn != gimple_call_internal_fn (call2))
2dd8e84c 2887 return false;
2888
2889 /* Check that the masks are the same. Cope with casts of masks,
2890 like those created by build_mask_conversion. */
5b4b7bcc 2891 tree mask1 = gimple_call_arg (call1, 2);
2892 tree mask2 = gimple_call_arg (call2, 2);
f92474f8 2893 if (!operand_equal_p (mask1, mask2, 0)
2894 && (ifn == IFN_MASK_STORE || !allow_slp_p))
2dd8e84c 2895 {
2896 mask1 = strip_conversion (mask1);
2897 if (!mask1)
2898 return false;
2899 mask2 = strip_conversion (mask2);
2900 if (!mask2)
2901 return false;
2902 if (!operand_equal_p (mask1, mask2, 0))
2903 return false;
2904 }
2905 return true;
2906 }
2907
2908 return false;
2909}
2910
fb85abff 2911/* Function vect_analyze_data_ref_accesses.
2912
2913 Analyze the access pattern of all the data references in the loop.
2914
2915 FORNOW: the only access pattern that is considered vectorizable is a
2916 simple step 1 (consecutive) access.
2917
2918 FORNOW: handle only arrays and pointer accesses. */
2919
ed9370cc 2920opt_result
e2c5c678 2921vect_analyze_data_ref_accesses (vec_info *vinfo)
fb85abff 2922{
2923 unsigned int i;
a99aba41 2924 vec<data_reference_p> datarefs = vinfo->shared->datarefs;
fb85abff 2925 struct data_reference *dr;
2926
88f6eb8f 2927 DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
fb85abff 2928
68f15e9d 2929 if (datarefs.is_empty ())
ed9370cc 2930 return opt_result::success ();
68f15e9d 2931
2932 /* Sort the array of datarefs to make building the interleaving chains
863a3781 2933 linear. Don't modify the original vector's order, it is needed for
2934 determining what dependencies are reversed. */
2935 vec<data_reference_p> datarefs_copy = datarefs.copy ();
90a2d741 2936 datarefs_copy.qsort (dr_group_sort_cmp);
e0599ca4 2937 hash_set<stmt_vec_info> to_fixup;
68f15e9d 2938
2939 /* Build the interleaving chains. */
863a3781 2940 for (i = 0; i < datarefs_copy.length () - 1;)
68f15e9d 2941 {
863a3781 2942 data_reference_p dra = datarefs_copy[i];
db72d3bf 2943 dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
abc9513d 2944 stmt_vec_info stmtinfo_a = dr_info_a->stmt;
68f15e9d 2945 stmt_vec_info lastinfo = NULL;
ab053afe 2946 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
2947 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
f6aeb966 2948 {
2949 ++i;
2950 continue;
2951 }
863a3781 2952 for (i = i + 1; i < datarefs_copy.length (); ++i)
68f15e9d 2953 {
863a3781 2954 data_reference_p drb = datarefs_copy[i];
db72d3bf 2955 dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
abc9513d 2956 stmt_vec_info stmtinfo_b = dr_info_b->stmt;
ab053afe 2957 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
2958 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
f6aeb966 2959 break;
68f15e9d 2960
2961 /* ??? Imperfect sorting (non-compatible types, non-modulo
2962 accesses, same accesses) can lead to a group to be artificially
2963 split here as we don't just skip over those. If it really
2964 matters we can push those to a worklist and re-iterate
2965 over them. The we can just skip ahead to the next DR here. */
2966
8167d6ad 2967 /* DRs in a different loop should not be put into the same
2968 interleaving group. */
2969 if (gimple_bb (DR_STMT (dra))->loop_father
2970 != gimple_bb (DR_STMT (drb))->loop_father)
2971 break;
2972
68f15e9d 2973 /* Check that the data-refs have same first location (except init)
5c0fac99 2974 and they are both either store or load (not load and store,
2975 not masked loads or stores). */
68f15e9d 2976 if (DR_IS_READ (dra) != DR_IS_READ (drb)
ce55060f 2977 || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2978 DR_BASE_ADDRESS (drb)) != 0
2979 || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
f92474f8 2980 || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
68f15e9d 2981 break;
2982
994be998 2983 /* Check that the data-refs have the same constant size. */
68f15e9d 2984 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2985 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
e913b5cd 2986 if (!tree_fits_uhwi_p (sza)
2987 || !tree_fits_uhwi_p (szb)
994be998 2988 || !tree_int_cst_equal (sza, szb))
2989 break;
2990
2991 /* Check that the data-refs have the same step. */
ce55060f 2992 if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
68f15e9d 2993 break;
2994
68f15e9d 2995 /* Check the types are compatible.
2996 ??? We don't distinguish this during sorting. */
2997 if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2998 TREE_TYPE (DR_REF (drb))))
2999 break;
3000
c4d25d8a 3001 /* Check that the DR_INITs are compile-time constants. */
3002 if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST
3003 || TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
3004 break;
3005
da008d72 3006 /* Different .GOMP_SIMD_LANE calls still give the same lane,
3007 just hold extra information. */
3008 if (STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_a)
3009 && STMT_VINFO_SIMD_LANE_ACCESS_P (stmtinfo_b)
3010 && data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb)) == 0)
3011 break;
3012
68f15e9d 3013 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
f9ae6f95 3014 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
3015 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
9c9cb9cf 3016 HOST_WIDE_INT init_prev
3017 = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
3018 gcc_assert (init_a <= init_b
3019 && init_a <= init_prev
3020 && init_prev <= init_b);
3021
3022 /* Do not place the same access in the interleaving chain twice. */
3023 if (init_b == init_prev)
3024 {
3025 gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
3026 < gimple_uid (DR_STMT (drb)));
e0599ca4 3027 /* Simply link in duplicates and fix up the chain below. */
9c9cb9cf 3028 }
e0599ca4 3029 else
994be998 3030 {
e0599ca4 3031 /* If init_b == init_a + the size of the type * k, we have an
3032 interleaving, and DRA is accessed before DRB. */
3033 HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
3034 if (type_size_a == 0
3035 || (init_b - init_a) % type_size_a != 0)
994be998 3036 break;
e0599ca4 3037
3038 /* If we have a store, the accesses are adjacent. This splits
3039 groups into chunks we support (we don't support vectorization
3040 of stores with gaps). */
3041 if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
3042 break;
3043
3044 /* If the step (if not zero or non-constant) is greater than the
3045 difference between data-refs' inits this splits groups into
3046 suitable sizes. */
3047 if (tree_fits_shwi_p (DR_STEP (dra)))
3048 {
3049 HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
3050 if (step != 0 && step <= (init_b - init_a))
3051 break;
3052 }
994be998 3053 }
68f15e9d 3054
3055 if (dump_enabled_p ())
a4e972e3 3056 dump_printf_loc (MSG_NOTE, vect_location,
3057 DR_IS_READ (dra)
3058 ? "Detected interleaving load %T and %T\n"
3059 : "Detected interleaving store %T and %T\n",
3060 DR_REF (dra), DR_REF (drb));
68f15e9d 3061
3062 /* Link the found element into the group list. */
e1009321 3063 if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
68f15e9d 3064 {
1c2fef9a 3065 DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
68f15e9d 3066 lastinfo = stmtinfo_a;
3067 }
1c2fef9a 3068 DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
3069 DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
68f15e9d 3070 lastinfo = stmtinfo_b;
e0599ca4 3071
f92474f8 3072 STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
3073 = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
3074
3075 if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
3076 dump_printf_loc (MSG_NOTE, vect_location,
3077 "Load suitable for SLP vectorization only.\n");
3078
e0599ca4 3079 if (init_b == init_prev
3080 && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3081 && dump_enabled_p ())
3082 dump_printf_loc (MSG_NOTE, vect_location,
3083 "Queuing group with duplicate access for fixup\n");
68f15e9d 3084 }
3085 }
3086
e0599ca4 3087 /* Fixup groups with duplicate entries by splitting it. */
3088 while (1)
3089 {
3090 hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
3091 if (!(it != to_fixup.end ()))
3092 break;
3093 stmt_vec_info grp = *it;
3094 to_fixup.remove (grp);
3095
3096 /* Find the earliest duplicate group member. */
3097 unsigned first_duplicate = -1u;
3098 stmt_vec_info next, g = grp;
3099 while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3100 {
6883ce83 3101 if (tree_int_cst_equal (DR_INIT (STMT_VINFO_DR_INFO (next)->dr),
3102 DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
e0599ca4 3103 && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
3104 first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
3105 g = next;
3106 }
3107 if (first_duplicate == -1U)
3108 continue;
3109
3110 /* Then move all stmts after the first duplicate to a new group.
3111 Note this is a heuristic but one with the property that *it
3112 is fixed up completely. */
3113 g = grp;
bbe43331 3114 stmt_vec_info newgroup = NULL, ng = grp;
e0599ca4 3115 while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3116 {
3117 if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
3118 {
3119 DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
3120 if (!newgroup)
3121 newgroup = next;
3122 else
3123 DR_GROUP_NEXT_ELEMENT (ng) = next;
3124 ng = next;
3125 DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
3126 }
3127 else
3128 g = DR_GROUP_NEXT_ELEMENT (g);
3129 }
3130 DR_GROUP_NEXT_ELEMENT (ng) = NULL;
3131
3132 /* Fixup the new group which still may contain duplicates. */
3133 to_fixup.add (newgroup);
3134 }
3135
863a3781 3136 FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
abc9513d 3137 {
db72d3bf 3138 dr_vec_info *dr_info = vinfo->lookup_dr (dr);
abc9513d 3139 if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
3140 && !vect_analyze_data_ref_access (dr_info))
3141 {
3142 if (dump_enabled_p ())
3143 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3144 "not vectorized: complicated access pattern.\n");
6ea6a380 3145
abc9513d 3146 if (is_a <bb_vec_info> (vinfo))
3147 {
3148 /* Mark the statement as not vectorizable. */
3149 STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
3150 continue;
3151 }
3152 else
3153 {
3154 datarefs_copy.release ();
ed9370cc 3155 return opt_result::failure_at (dr_info->stmt->stmt,
3156 "not vectorized:"
3157 " complicated access pattern.\n");
abc9513d 3158 }
3159 }
3160 }
fb85abff 3161
863a3781 3162 datarefs_copy.release ();
ed9370cc 3163 return opt_result::success ();
fb85abff 3164}
3165
8a7b0f48 3166/* Function vect_vfa_segment_size.
3167
8a7b0f48 3168 Input:
abc9513d 3169 DR_INFO: The data reference.
8a7b0f48 3170 LENGTH_FACTOR: segment length to consider.
3171
e85b4a5e 3172 Return a value suitable for the dr_with_seg_len::seg_len field.
3173 This is the "distance travelled" by the pointer from the first
3174 iteration in the segment to the last. Note that it does not include
3175 the size of the access; in effect it only describes the first byte. */
8a7b0f48 3176
3177static tree
abc9513d 3178vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
8a7b0f48 3179{
e85b4a5e 3180 length_factor = size_binop (MINUS_EXPR,
3181 fold_convert (sizetype, length_factor),
3182 size_one_node);
abc9513d 3183 return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
e85b4a5e 3184 length_factor);
3185}
8a7b0f48 3186
abc9513d 3187/* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
e85b4a5e 3188 gives the worst-case number of bytes covered by the segment. */
8a7b0f48 3189
e85b4a5e 3190static unsigned HOST_WIDE_INT
abc9513d 3191vect_vfa_access_size (dr_vec_info *dr_info)
e85b4a5e 3192{
abc9513d 3193 stmt_vec_info stmt_vinfo = dr_info->stmt;
3194 tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
e85b4a5e 3195 unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
3196 unsigned HOST_WIDE_INT access_size = ref_size;
e1009321 3197 if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
8a7b0f48 3198 {
abc9513d 3199 gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
e1009321 3200 access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
e85b4a5e 3201 }
3202 if (STMT_VINFO_VEC_STMT (stmt_vinfo)
abc9513d 3203 && (vect_supportable_dr_alignment (dr_info, false)
e85b4a5e 3204 == dr_explicit_realign_optimized))
3205 {
3206 /* We might access a full vector's worth. */
3207 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3208 access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
8a7b0f48 3209 }
e85b4a5e 3210 return access_size;
3211}
3212
abc9513d 3213/* Get the minimum alignment for all the scalar accesses that DR_INFO
3214 describes. */
e85b4a5e 3215
3216static unsigned int
abc9513d 3217vect_vfa_align (dr_vec_info *dr_info)
e85b4a5e 3218{
abc9513d 3219 return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
8a7b0f48 3220}
3221
a5af7a75 3222/* Function vect_no_alias_p.
3223
63bc418d 3224 Given data references A and B with equal base and offset, see whether
3225 the alias relation can be decided at compilation time. Return 1 if
3226 it can and the references alias, 0 if it can and the references do
e85b4a5e 3227 not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A,
3228 SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
3229 of dr_with_seg_len::{seg_len,access_size} for A and B. */
a5af7a75 3230
63bc418d 3231static int
abc9513d 3232vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
e85b4a5e 3233 tree segment_length_a, tree segment_length_b,
3234 unsigned HOST_WIDE_INT access_size_a,
3235 unsigned HOST_WIDE_INT access_size_b)
a5af7a75 3236{
abc9513d 3237 poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
3238 poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
63bc418d 3239 poly_uint64 const_length_a;
3240 poly_uint64 const_length_b;
a5af7a75 3241
a5af7a75 3242 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
3243 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
3244 [a, a+12) */
abc9513d 3245 if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
a5af7a75 3246 {
63bc418d 3247 const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
e85b4a5e 3248 offset_a = (offset_a + access_size_a) - const_length_a;
a5af7a75 3249 }
63bc418d 3250 else
3251 const_length_a = tree_to_poly_uint64 (segment_length_a);
abc9513d 3252 if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
a5af7a75 3253 {
63bc418d 3254 const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
e85b4a5e 3255 offset_b = (offset_b + access_size_b) - const_length_b;
a5af7a75 3256 }
63bc418d 3257 else
3258 const_length_b = tree_to_poly_uint64 (segment_length_b);
a5af7a75 3259
e85b4a5e 3260 const_length_a += access_size_a;
3261 const_length_b += access_size_b;
3262
63bc418d 3263 if (ranges_known_overlap_p (offset_a, const_length_a,
3264 offset_b, const_length_b))
3265 return 1;
a5af7a75 3266
63bc418d 3267 if (!ranges_maybe_overlap_p (offset_a, const_length_a,
3268 offset_b, const_length_b))
3269 return 0;
3270
3271 return -1;
a5af7a75 3272}
3273
403965f7 3274/* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
3275 in DDR is >= VF. */
3276
3277static bool
3278dependence_distance_ge_vf (data_dependence_relation *ddr,
d75596cd 3279 unsigned int loop_depth, poly_uint64 vf)
403965f7 3280{
3281 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
3282 || DDR_NUM_DIST_VECTS (ddr) == 0)
3283 return false;
3284
3285 /* If the dependence is exact, we should have limited the VF instead. */
3286 gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
3287
3288 unsigned int i;
3289 lambda_vector dist_v;
3290 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3291 {
3292 HOST_WIDE_INT dist = dist_v[loop_depth];
3293 if (dist != 0
3294 && !(dist > 0 && DDR_REVERSED_P (ddr))
d75596cd 3295 && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
403965f7 3296 return false;
3297 }
3298
3299 if (dump_enabled_p ())
a4e972e3 3300 dump_printf_loc (MSG_NOTE, vect_location,
3301 "dependence distance between %T and %T is >= VF\n",
3302 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
403965f7 3303
3304 return true;
3305}
3306
e85b4a5e 3307/* Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. */
3308
3309static void
54e7de93 3310dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
e85b4a5e 3311{
a4e972e3 3312 dump_printf (dump_kind, "%s (%T) >= ",
3313 lower_bound.unsigned_p ? "unsigned" : "abs",
3314 lower_bound.expr);
e85b4a5e 3315 dump_dec (dump_kind, lower_bound.min_value);
3316}
3317
3318/* Record that the vectorized loop requires the vec_lower_bound described
3319 by EXPR, UNSIGNED_P and MIN_VALUE. */
3320
3321static void
3322vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
3323 poly_uint64 min_value)
3324{
3325 vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3326 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
3327 if (operand_equal_p (lower_bounds[i].expr, expr, 0))
3328 {
3329 unsigned_p &= lower_bounds[i].unsigned_p;
3330 min_value = upper_bound (lower_bounds[i].min_value, min_value);
3331 if (lower_bounds[i].unsigned_p != unsigned_p
3332 || maybe_lt (lower_bounds[i].min_value, min_value))
3333 {
3334 lower_bounds[i].unsigned_p = unsigned_p;
3335 lower_bounds[i].min_value = min_value;
3336 if (dump_enabled_p ())
3337 {
3338 dump_printf_loc (MSG_NOTE, vect_location,
3339 "updating run-time check to ");
3340 dump_lower_bound (MSG_NOTE, lower_bounds[i]);
3341 dump_printf (MSG_NOTE, "\n");
3342 }
3343 }
3344 return;
3345 }
3346
3347 vec_lower_bound lower_bound (expr, unsigned_p, min_value);
3348 if (dump_enabled_p ())
3349 {
3350 dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
3351 dump_lower_bound (MSG_NOTE, lower_bound);
3352 dump_printf (MSG_NOTE, "\n");
3353 }
3354 LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
3355}
3356
abc9513d 3357/* Return true if it's unlikely that the step of the vectorized form of DR_INFO
e85b4a5e 3358 will span fewer than GAP bytes. */
3359
3360static bool
abc9513d 3361vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
3362 poly_int64 gap)
e85b4a5e 3363{
abc9513d 3364 stmt_vec_info stmt_info = dr_info->stmt;
e85b4a5e 3365 HOST_WIDE_INT count
3366 = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
e1009321 3367 if (DR_GROUP_FIRST_ELEMENT (stmt_info))
cd24aa3c 3368 count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
abc9513d 3369 return (estimated_poly_value (gap)
3370 <= count * vect_get_scalar_dr_size (dr_info));
e85b4a5e 3371}
3372
abc9513d 3373/* Return true if we know that there is no alias between DR_INFO_A and
3374 DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
3375 When returning true, set *LOWER_BOUND_OUT to this N. */
e85b4a5e 3376
3377static bool
abc9513d 3378vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
e85b4a5e 3379 poly_uint64 *lower_bound_out)
3380{
3381 /* Check that there is a constant gap of known sign between DR_A
3382 and DR_B. */
abc9513d 3383 data_reference *dr_a = dr_info_a->dr;
3384 data_reference *dr_b = dr_info_b->dr;
e85b4a5e 3385 poly_int64 init_a, init_b;
3386 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
3387 || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
3388 || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
3389 || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
3390 || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
3391 || !ordered_p (init_a, init_b))
3392 return false;
3393
3394 /* Sort DR_A and DR_B by the address they access. */
3395 if (maybe_lt (init_b, init_a))
3396 {
3397 std::swap (init_a, init_b);
abc9513d 3398 std::swap (dr_info_a, dr_info_b);
e85b4a5e 3399 std::swap (dr_a, dr_b);
3400 }
3401
3402 /* If the two accesses could be dependent within a scalar iteration,
3403 make sure that we'd retain their order. */
abc9513d 3404 if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
3405 && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
e85b4a5e 3406 return false;
3407
3408 /* There is no alias if abs (DR_STEP) is greater than or equal to
3409 the bytes spanned by the combination of the two accesses. */
abc9513d 3410 *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
e85b4a5e 3411 return true;
3412}
3413
fb85abff 3414/* Function vect_prune_runtime_alias_test_list.
3415
3416 Prune a list of ddrs to be tested at run-time by versioning for alias.
8a7b0f48 3417 Merge several alias checks into one if possible.
fb85abff 3418 Return FALSE if resulting list of ddrs is longer then allowed by
3419 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
3420
ed9370cc 3421opt_result
fb85abff 3422vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
3423{
f68a7726 3424 typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
3425 hash_set <tree_pair_hash> compared_objects;
3426
3427 vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
3428 vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
3429 = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3430 vec<vec_object_pair> &check_unequal_addrs
3431 = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
d75596cd 3432 poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8a7b0f48 3433 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
3434
3435 ddr_p ddr;
3436 unsigned int i;
3437 tree length_factor;
fb85abff 3438
88f6eb8f 3439 DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
fb85abff 3440
e85b4a5e 3441 /* Step values are irrelevant for aliasing if the number of vector
3442 iterations is equal to the number of scalar iterations (which can
3443 happen for fully-SLP loops). */
3444 bool ignore_step_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
3445
3446 if (!ignore_step_p)
3447 {
3448 /* Convert the checks for nonzero steps into bound tests. */
3449 tree value;
3450 FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
3451 vect_check_lower_bound (loop_vinfo, value, true, 1);
3452 }
3453
8a7b0f48 3454 if (may_alias_ddrs.is_empty ())
ed9370cc 3455 return opt_result::success ();
8a7b0f48 3456
8a7b0f48 3457 comp_alias_ddrs.create (may_alias_ddrs.length ());
3458
403965f7 3459 unsigned int loop_depth
3460 = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
3461 LOOP_VINFO_LOOP_NEST (loop_vinfo));
3462
8a7b0f48 3463 /* First, we collect all data ref pairs for aliasing checks. */
3464 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
fb85abff 3465 {
c1e75671 3466 int comp_res;
e85b4a5e 3467 poly_uint64 lower_bound;
8a7b0f48 3468 tree segment_length_a, segment_length_b;
e85b4a5e 3469 unsigned HOST_WIDE_INT access_size_a, access_size_b;
3470 unsigned int align_a, align_b;
8a7b0f48 3471
403965f7 3472 /* Ignore the alias if the VF we chose ended up being no greater
3473 than the dependence distance. */
3474 if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
3475 continue;
3476
f68a7726 3477 if (DDR_OBJECT_A (ddr))
3478 {
3479 vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
3480 if (!compared_objects.add (new_pair))
3481 {
3482 if (dump_enabled_p ())
a4e972e3 3483 dump_printf_loc (MSG_NOTE, vect_location,
3484 "checking that %T and %T"
3485 " have different addresses\n",
3486 new_pair.first, new_pair.second);
f68a7726 3487 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
3488 }
3489 continue;
3490 }
3491
db72d3bf 3492 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
abc9513d 3493 stmt_vec_info stmt_info_a = dr_info_a->stmt;
e85b4a5e 3494
db72d3bf 3495 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
abc9513d 3496 stmt_vec_info stmt_info_b = dr_info_b->stmt;
e85b4a5e 3497
3498 /* Skip the pair if inter-iteration dependencies are irrelevant
3499 and intra-iteration dependencies are guaranteed to be honored. */
3500 if (ignore_step_p
abc9513d 3501 && (vect_preserves_scalar_order_p (dr_info_a, dr_info_b)
3502 || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3503 &lower_bound)))
e85b4a5e 3504 {
3505 if (dump_enabled_p ())
a4e972e3 3506 dump_printf_loc (MSG_NOTE, vect_location,
3507 "no need for alias check between "
3508 "%T and %T when VF is 1\n",
3509 DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
e85b4a5e 3510 continue;
3511 }
3512
3513 /* See whether we can handle the alias using a bounds check on
3514 the step, and whether that's likely to be the best approach.
3515 (It might not be, for example, if the minimum step is much larger
3516 than the number of bytes handled by one vector iteration.) */
3517 if (!ignore_step_p
abc9513d 3518 && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
3519 && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3520 &lower_bound)
3521 && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
3522 || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
e85b4a5e 3523 {
abc9513d 3524 bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
e85b4a5e 3525 if (dump_enabled_p ())
3526 {
a4e972e3 3527 dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
3528 "%T and %T when the step %T is outside ",
3529 DR_REF (dr_info_a->dr),
3530 DR_REF (dr_info_b->dr),
3531 DR_STEP (dr_info_a->dr));
e85b4a5e 3532 if (unsigned_p)
3533 dump_printf (MSG_NOTE, "[0");
3534 else
3535 {
3536 dump_printf (MSG_NOTE, "(");
3537 dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
3538 }
3539 dump_printf (MSG_NOTE, ", ");
3540 dump_dec (MSG_NOTE, lower_bound);
3541 dump_printf (MSG_NOTE, ")\n");
3542 }
abc9513d 3543 vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
3544 unsigned_p, lower_bound);
e85b4a5e 3545 continue;
3546 }
3547
cd24aa3c 3548 stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
8a7b0f48 3549 if (dr_group_first_a)
3550 {
cd24aa3c 3551 stmt_info_a = dr_group_first_a;
abc9513d 3552 dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
8a7b0f48 3553 }
fb85abff 3554
cd24aa3c 3555 stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
8a7b0f48 3556 if (dr_group_first_b)
3557 {
cd24aa3c 3558 stmt_info_b = dr_group_first_b;
abc9513d 3559 dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
8a7b0f48 3560 }
fb85abff 3561
e85b4a5e 3562 if (ignore_step_p)
3563 {
3564 segment_length_a = size_zero_node;
3565 segment_length_b = size_zero_node;
3566 }
8a7b0f48 3567 else
e85b4a5e 3568 {
abc9513d 3569 if (!operand_equal_p (DR_STEP (dr_info_a->dr),
3570 DR_STEP (dr_info_b->dr), 0))
e85b4a5e 3571 length_factor = scalar_loop_iters;
3572 else
3573 length_factor = size_int (vect_factor);
abc9513d 3574 segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
3575 segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
e85b4a5e 3576 }
abc9513d 3577 access_size_a = vect_vfa_access_size (dr_info_a);
3578 access_size_b = vect_vfa_access_size (dr_info_b);
3579 align_a = vect_vfa_align (dr_info_a);
3580 align_b = vect_vfa_align (dr_info_b);
8a7b0f48 3581
abc9513d 3582 comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_info_a->dr),
3583 DR_BASE_ADDRESS (dr_info_b->dr));
a5af7a75 3584 if (comp_res == 0)
abc9513d 3585 comp_res = data_ref_compare_tree (DR_OFFSET (dr_info_a->dr),
3586 DR_OFFSET (dr_info_b->dr));
a5af7a75 3587
63bc418d 3588 /* See whether the alias is known at compilation time. */
a5af7a75 3589 if (comp_res == 0
abc9513d 3590 && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
3591 && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
63bc418d 3592 && poly_int_tree_p (segment_length_a)
3593 && poly_int_tree_p (segment_length_b))
a5af7a75 3594 {
abc9513d 3595 int res = vect_compile_time_alias (dr_info_a, dr_info_b,
63bc418d 3596 segment_length_a,
e85b4a5e 3597 segment_length_b,
3598 access_size_a,
3599 access_size_b);
3600 if (res >= 0 && dump_enabled_p ())
3601 {
3602 dump_printf_loc (MSG_NOTE, vect_location,
a4e972e3 3603 "can tell at compile time that %T and %T",
3604 DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
e85b4a5e 3605 if (res == 0)
3606 dump_printf (MSG_NOTE, " do not alias\n");
3607 else
3608 dump_printf (MSG_NOTE, " alias\n");
3609 }
3610
63bc418d 3611 if (res == 0)
a5af7a75 3612 continue;
3613
63bc418d 3614 if (res == 1)
ed9370cc 3615 return opt_result::failure_at (stmt_info_b->stmt,
3616 "not vectorized:"
3617 " compilation time alias: %G%G",
3618 stmt_info_a->stmt,
3619 stmt_info_b->stmt);
a5af7a75 3620 }
3621
43d14b66 3622 dr_with_seg_len_pair_t dr_with_seg_len_pair
abc9513d 3623 (dr_with_seg_len (dr_info_a->dr, segment_length_a,
3624 access_size_a, align_a),
3625 dr_with_seg_len (dr_info_b->dr, segment_length_b,
3626 access_size_b, align_b));
43d14b66 3627
c1e75671 3628 /* Canonicalize pairs by sorting the two DR members. */
a5af7a75 3629 if (comp_res > 0)
3d4d7ad1 3630 std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
8a7b0f48 3631
3632 comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
3633 }
3634
d75596cd 3635 prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
f68a7726 3636
3637 unsigned int count = (comp_alias_ddrs.length ()
3638 + check_unequal_addrs.length ());
e85b4a5e 3639
91f42adc 3640 if (dump_enabled_p ())
3641 dump_printf_loc (MSG_NOTE, vect_location,
3642 "improved number of alias checks from %d to %d\n",
3643 may_alias_ddrs.length (), count);
f68a7726 3644 if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
ed9370cc 3645 return opt_result::failure_at
3646 (vect_location,
3647 "number of versioning for alias "
3648 "run-time tests exceeds %d "
3649 "(--param vect-max-version-for-alias-checks)\n",
3650 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
3651
3652 return opt_result::success ();
fb85abff 3653}
3654
1619606c 3655/* Check whether we can use an internal function for a gather load
3656 or scatter store. READ_P is true for loads and false for stores.
3657 MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
3658 the type of the memory elements being loaded or stored. OFFSET_BITS
3659 is the number of bits in each scalar offset and OFFSET_SIGN is the
3660 sign of the offset. SCALE is the amount by which the offset should
3661 be multiplied *after* it has been converted to address width.
3662
3663 Return true if the function is supported, storing the function
3664 id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT. */
3665
1d2c127d 3666bool
1619606c 3667vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype,
3668 tree memory_type, unsigned int offset_bits,
3669 signop offset_sign, int scale,
3670 internal_fn *ifn_out, tree *element_type_out)
3671{
3672 unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
3673 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
3674 if (offset_bits > element_bits)
3675 /* Internal functions require the offset to be the same width as
3676 the vector elements. We can extend narrower offsets, but it isn't
3677 safe to truncate wider offsets. */
3678 return false;
3679
3680 if (element_bits != memory_bits)
3681 /* For now the vector elements must be the same width as the
3682 memory elements. */
3683 return false;
3684
3685 /* Work out which function we need. */
3686 internal_fn ifn;
3687 if (read_p)
3688 ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
3689 else
0bf8b382 3690 ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
1619606c 3691
3692 /* Test whether the target supports this combination. */
3693 if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
3694 offset_sign, scale))
3695 return false;
3696
3697 *ifn_out = ifn;
3698 *element_type_out = TREE_TYPE (vectype);
3699 return true;
3700}
3701
e068828a 3702/* STMT_INFO is a call to an internal gather load or scatter store function.
1619606c 3703 Describe the operation in INFO. */
3704
3705static void
e068828a 3706vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
3707 gather_scatter_info *info)
1619606c 3708{
e068828a 3709 gcall *call = as_a <gcall *> (stmt_info->stmt);
1619606c 3710 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3711 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3712
3713 info->ifn = gimple_call_internal_fn (call);
3714 info->decl = NULL_TREE;
3715 info->base = gimple_call_arg (call, 0);
3716 info->offset = gimple_call_arg (call, 1);
3717 info->offset_dt = vect_unknown_def_type;
3718 info->offset_vectype = NULL_TREE;
3719 info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
3720 info->element_type = TREE_TYPE (vectype);
3721 info->memory_type = TREE_TYPE (DR_REF (dr));
3722}
3723
ecc42a77 3724/* Return true if a non-affine read or write in STMT_INFO is suitable for a
cf60da07 3725 gather load or scatter store. Describe the operation in *INFO if so. */
16dfb112 3726
cf60da07 3727bool
ecc42a77 3728vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
cf60da07 3729 gather_scatter_info *info)
16dfb112 3730{
81bc0f0f 3731 HOST_WIDE_INT scale = 1;
3732 poly_int64 pbitpos, pbitsize;
2e966e2a 3733 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
16dfb112 3734 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3735 tree offtype = NULL_TREE;
1619606c 3736 tree decl = NULL_TREE, base, off;
3737 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3738 tree memory_type = TREE_TYPE (DR_REF (dr));
3754d046 3739 machine_mode pmode;
292237f3 3740 int punsignedp, reversep, pvolatilep = 0;
1619606c 3741 internal_fn ifn;
3742 tree element_type;
3743 bool masked_p = false;
3744
3745 /* See whether this is already a call to a gather/scatter internal function.
3746 If not, see whether it's a masked load or store. */
a73182ff 3747 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
1619606c 3748 if (call && gimple_call_internal_p (call))
3749 {
5b4b7bcc 3750 ifn = gimple_call_internal_fn (call);
1619606c 3751 if (internal_gather_scatter_fn_p (ifn))
3752 {
e068828a 3753 vect_describe_gather_scatter_call (stmt_info, info);
1619606c 3754 return true;
3755 }
3756 masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
3757 }
3758
3759 /* True if we should aim to use internal functions rather than
3760 built-in functions. */
3761 bool use_ifn_p = (DR_IS_READ (dr)
0bf8b382 3762 ? supports_vec_gather_load_p ()
3763 : supports_vec_scatter_store_p ());
16dfb112 3764
c71d3c24 3765 base = DR_REF (dr);
3766 /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
3767 see if we can use the def stmt of the address. */
1619606c 3768 if (masked_p
c71d3c24 3769 && TREE_CODE (base) == MEM_REF
3770 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
3771 && integer_zerop (TREE_OPERAND (base, 1))
3772 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
3773 {
42acab1c 3774 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
c71d3c24 3775 if (is_gimple_assign (def_stmt)
3776 && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
3777 base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
3778 }
3779
0bd6d857 3780 /* The gather and scatter builtins need address of the form
16dfb112 3781 loop_invariant + vector * {1, 2, 4, 8}
3782 or
3783 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
3784 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
3785 of loop invariants/SSA_NAMEs defined in the loop, with casts,
3786 multiplications and additions in it. To get a vector, we need
3787 a single SSA_NAME that will be defined in the loop and will
3788 contain everything that is not loop invariant and that can be
3789 vectorized. The following code attempts to find such a preexistng
3790 SSA_NAME OFF and put the loop invariants into a tree BASE
3791 that can be gimplified before the loop. */
292237f3 3792 base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
b3b6e4b5 3793 &punsignedp, &reversep, &pvolatilep);
8a51585f 3794 if (reversep)
3795 return false;
3796
81bc0f0f 3797 poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
16dfb112 3798
3799 if (TREE_CODE (base) == MEM_REF)
3800 {
3801 if (!integer_zerop (TREE_OPERAND (base, 1)))
3802 {
3803 if (off == NULL_TREE)
90ca1268 3804 off = wide_int_to_tree (sizetype, mem_ref_offset (base));
16dfb112 3805 else
3806 off = size_binop (PLUS_EXPR, off,
3807 fold_convert (sizetype, TREE_OPERAND (base, 1)));
3808 }
3809 base = TREE_OPERAND (base, 0);
3810 }
3811 else
3812 base = build_fold_addr_expr (base);
3813
3814 if (off == NULL_TREE)
3815 off = size_zero_node;
3816
3817 /* If base is not loop invariant, either off is 0, then we start with just
3818 the constant offset in the loop invariant BASE and continue with base
3819 as OFF, otherwise give up.
3820 We could handle that case by gimplifying the addition of base + off
3821 into some SSA_NAME and use that as off, but for now punt. */
3822 if (!expr_invariant_in_loop_p (loop, base))
3823 {
3824 if (!integer_zerop (off))
cf60da07 3825 return false;
16dfb112 3826 off = base;
81bc0f0f 3827 base = size_int (pbytepos);
16dfb112 3828 }
3829 /* Otherwise put base + constant offset into the loop invariant BASE
3830 and continue with OFF. */
3831 else
3832 {
3833 base = fold_convert (sizetype, base);
81bc0f0f 3834 base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
16dfb112 3835 }
3836
3837 /* OFF at this point may be either a SSA_NAME or some tree expression
3838 from get_inner_reference. Try to peel off loop invariants from it
3839 into BASE as long as possible. */
3840 STRIP_NOPS (off);
3841 while (offtype == NULL_TREE)
3842 {
3843 enum tree_code code;
3844 tree op0, op1, add = NULL_TREE;
3845
3846 if (TREE_CODE (off) == SSA_NAME)
3847 {
42acab1c 3848 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
16dfb112 3849
3850 if (expr_invariant_in_loop_p (loop, off))
cf60da07 3851 return false;
16dfb112 3852
3853 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3854 break;
3855
3856 op0 = gimple_assign_rhs1 (def_stmt);
3857 code = gimple_assign_rhs_code (def_stmt);
3858 op1 = gimple_assign_rhs2 (def_stmt);
3859 }
3860 else
3861 {
3862 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
cf60da07 3863 return false;
16dfb112 3864 code = TREE_CODE (off);
3865 extract_ops_from_tree (off, &code, &op0, &op1);
3866 }
3867 switch (code)
3868 {
3869 case POINTER_PLUS_EXPR:
3870 case PLUS_EXPR:
3871 if (expr_invariant_in_loop_p (loop, op0))
3872 {
3873 add = op0;
3874 off = op1;
3875 do_add:
3876 add = fold_convert (sizetype, add);
3877 if (scale != 1)
3878 add = size_binop (MULT_EXPR, add, size_int (scale));
3879 base = size_binop (PLUS_EXPR, base, add);
3880 continue;
3881 }
3882 if (expr_invariant_in_loop_p (loop, op1))
3883 {
3884 add = op1;
3885 off = op0;
3886 goto do_add;
3887 }
3888 break;
3889 case MINUS_EXPR:
3890 if (expr_invariant_in_loop_p (loop, op1))
3891 {
3892 add = fold_convert (sizetype, op1);
3893 add = size_binop (MINUS_EXPR, size_zero_node, add);
3894 off = op0;
3895 goto do_add;
3896 }
3897 break;
3898 case MULT_EXPR:
e913b5cd 3899 if (scale == 1 && tree_fits_shwi_p (op1))
16dfb112 3900 {
1619606c 3901 int new_scale = tree_to_shwi (op1);
3902 /* Only treat this as a scaling operation if the target
3903 supports it. */
3904 if (use_ifn_p
3905 && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p,
3906 vectype, memory_type, 1,
3907 TYPE_SIGN (TREE_TYPE (op0)),
3908 new_scale, &ifn,
3909 &element_type))
3910 break;
3911 scale = new_scale;
16dfb112 3912 off = op0;
3913 continue;
3914 }
3915 break;
3916 case SSA_NAME:
3917 off = op0;
3918 continue;
3919 CASE_CONVERT:
3920 if (!POINTER_TYPE_P (TREE_TYPE (op0))
3921 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3922 break;
3923 if (TYPE_PRECISION (TREE_TYPE (op0))
3924 == TYPE_PRECISION (TREE_TYPE (off)))
3925 {
3926 off = op0;
3927 continue;
3928 }
1619606c 3929
3930 /* The internal functions need the offset to be the same width
3931 as the elements of VECTYPE. Don't include operations that
3932 cast the offset from that width to a different width. */
3933 if (use_ifn_p
3934 && (int_size_in_bytes (TREE_TYPE (vectype))
3935 == int_size_in_bytes (TREE_TYPE (off))))
3936 break;
3937
16dfb112 3938 if (TYPE_PRECISION (TREE_TYPE (op0))
3939 < TYPE_PRECISION (TREE_TYPE (off)))
3940 {
3941 off = op0;
3942 offtype = TREE_TYPE (off);
3943 STRIP_NOPS (off);
3944 continue;
3945 }
3946 break;
3947 default:
3948 break;
3949 }
3950 break;
3951 }
3952
3953 /* If at the end OFF still isn't a SSA_NAME or isn't
3954 defined in the loop, punt. */
3955 if (TREE_CODE (off) != SSA_NAME
3956 || expr_invariant_in_loop_p (loop, off))
cf60da07 3957 return false;
16dfb112 3958
3959 if (offtype == NULL_TREE)
3960 offtype = TREE_TYPE (off);
3961
1619606c 3962 if (use_ifn_p)
3963 {
3964 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
3965 memory_type, TYPE_PRECISION (offtype),
3966 TYPE_SIGN (offtype), scale, &ifn,
3967 &element_type))
3968 return false;
3969 }
0bd6d857 3970 else
1619606c 3971 {
3972 if (DR_IS_READ (dr))
1f9a3b5c 3973 {
3974 if (targetm.vectorize.builtin_gather)
3975 decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
3976 }
1619606c 3977 else
1f9a3b5c 3978 {
3979 if (targetm.vectorize.builtin_scatter)
3980 decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
3981 }
0bd6d857 3982
1619606c 3983 if (!decl)
3984 return false;
3985
3986 ifn = IFN_LAST;
3987 element_type = TREE_TYPE (vectype);
3988 }
cf60da07 3989
1619606c 3990 info->ifn = ifn;
cf60da07 3991 info->decl = decl;
3992 info->base = base;
3993 info->offset = off;
3994 info->offset_dt = vect_unknown_def_type;
3995 info->offset_vectype = NULL_TREE;
3996 info->scale = scale;
1619606c 3997 info->element_type = element_type;
3998 info->memory_type = memory_type;
cf60da07 3999 return true;
16dfb112 4000}
4001
ed9d8730 4002/* Find the data references in STMT, analyze them with respect to LOOP and
4003 append them to DATAREFS. Return false if datarefs in this stmt cannot
4004 be handled. */
4005
ed9370cc 4006opt_result
ed9d8730 4007vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
4008 vec<data_reference_p> *datarefs)
4009{
4010 /* We can ignore clobbers for dataref analysis - they are removed during
4011 loop vectorization and BB vectorization checks dependences with a
4012 stmt walk. */
4013 if (gimple_clobber_p (stmt))
ed9370cc 4014 return opt_result::success ();
ed9d8730 4015
4016 if (gimple_has_volatile_ops (stmt))
ed9370cc 4017 return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
4018 stmt);
ed9d8730 4019
aac19106 4020 if (stmt_can_throw_internal (cfun, stmt))
ed9370cc 4021 return opt_result::failure_at (stmt,
4022 "not vectorized:"
4023 " statement can throw an exception: %G",
4024 stmt);
ed9d8730 4025
4026 auto_vec<data_reference_p, 2> refs;
ed9370cc 4027 opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
4028 if (!res)
4029 return res;
ed9d8730 4030
4031 if (refs.is_empty ())
ed9370cc 4032 return opt_result::success ();
ed9d8730 4033
4034 if (refs.length () > 1)
ed9370cc 4035 return opt_result::failure_at (stmt,
4036 "not vectorized:"
4037 " more than one data ref in stmt: %G", stmt);
ed9d8730 4038
4039 if (gcall *call = dyn_cast <gcall *> (stmt))
4040 if (!gimple_call_internal_p (call)
4041 || (gimple_call_internal_fn (call) != IFN_MASK_LOAD
4042 && gimple_call_internal_fn (call) != IFN_MASK_STORE))
ed9370cc 4043 return opt_result::failure_at (stmt,
4044 "not vectorized: dr in a call %G", stmt);
ed9d8730 4045
4046 data_reference_p dr = refs.pop ();
4047 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
4048 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
ed9370cc 4049 return opt_result::failure_at (stmt,
4050 "not vectorized:"
4051 " statement is bitfield access %G", stmt);
ed9d8730 4052
4053 if (DR_BASE_ADDRESS (dr)
4054 && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
ed9370cc 4055 return opt_result::failure_at (stmt,
4056 "not vectorized:"
4057 " base addr of dr is a constant\n");
ed9d8730 4058
369a4f17 4059 /* Check whether this may be a SIMD lane access and adjust the
4060 DR to make it easier for us to handle it. */
4061 if (loop
4062 && loop->simduid
4063 && (!DR_BASE_ADDRESS (dr)
4064 || !DR_OFFSET (dr)
4065 || !DR_INIT (dr)
4066 || !DR_STEP (dr)))
4067 {
4068 struct data_reference *newdr
4069 = create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
4070 DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
4071 if (DR_BASE_ADDRESS (newdr)
4072 && DR_OFFSET (newdr)
4073 && DR_INIT (newdr)
4074 && DR_STEP (newdr)
1da67136 4075 && TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
369a4f17 4076 && integer_zerop (DR_STEP (newdr)))
4077 {
1da67136 4078 tree base_address = DR_BASE_ADDRESS (newdr);
369a4f17 4079 tree off = DR_OFFSET (newdr);
4738cd0d 4080 tree step = ssize_int (1);
1da67136 4081 if (integer_zerop (off)
4082 && TREE_CODE (base_address) == POINTER_PLUS_EXPR)
4083 {
4084 off = TREE_OPERAND (base_address, 1);
4085 base_address = TREE_OPERAND (base_address, 0);
4086 }
369a4f17 4087 STRIP_NOPS (off);
4738cd0d 4088 if (TREE_CODE (off) == MULT_EXPR
369a4f17 4089 && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
4090 {
4738cd0d 4091 step = TREE_OPERAND (off, 1);
369a4f17 4092 off = TREE_OPERAND (off, 0);
4093 STRIP_NOPS (off);
4738cd0d 4094 }
1da67136 4095 if (CONVERT_EXPR_P (off)
4096 && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
4097 < TYPE_PRECISION (TREE_TYPE (off))))
4098 off = TREE_OPERAND (off, 0);
4099 if (TREE_CODE (off) == SSA_NAME)
4738cd0d 4100 {
1da67136 4101 gimple *def = SSA_NAME_DEF_STMT (off);
4102 /* Look through widening conversion. */
4103 if (is_gimple_assign (def)
4104 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def)))
4105 {
4106 tree rhs1 = gimple_assign_rhs1 (def);
4107 if (TREE_CODE (rhs1) == SSA_NAME
4108 && INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
4109 && (TYPE_PRECISION (TREE_TYPE (off))
4110 > TYPE_PRECISION (TREE_TYPE (rhs1))))
4111 def = SSA_NAME_DEF_STMT (rhs1);
4112 }
4113 if (is_gimple_call (def)
4114 && gimple_call_internal_p (def)
4115 && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
369a4f17 4116 {
1da67136 4117 tree arg = gimple_call_arg (def, 0);
369a4f17 4118 tree reft = TREE_TYPE (DR_REF (newdr));
1da67136 4119 gcc_assert (TREE_CODE (arg) == SSA_NAME);
4120 arg = SSA_NAME_VAR (arg);
4121 if (arg == loop->simduid
4122 /* For now. */
4123 && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
369a4f17 4124 {
1da67136 4125 DR_BASE_ADDRESS (newdr) = base_address;
4126 DR_OFFSET (newdr) = ssize_int (0);
4127 DR_STEP (newdr) = step;
4128 DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
4129 DR_STEP_ALIGNMENT (newdr) = highest_pow2_factor (step);
4130 /* Mark as simd-lane access. */
4131 tree arg2 = gimple_call_arg (def, 1);
4132 newdr->aux = (void *) (-1 - tree_to_uhwi (arg2));
4133 free_data_ref (dr);
4134 datarefs->safe_push (newdr);
4135 return opt_result::success ();
369a4f17 4136 }
4137 }
4138 }
4139 }
4140 free_data_ref (newdr);
4141 }
4142
ed9d8730 4143 datarefs->safe_push (dr);
ed9370cc 4144 return opt_result::success ();
ed9d8730 4145}
4146
fb85abff 4147/* Function vect_analyze_data_refs.
4148
37545e54 4149 Find all the data references in the loop or basic block.
fb85abff 4150
4151 The general structure of the analysis of data refs in the vectorizer is as
4152 follows:
48e1416a 4153 1- vect_analyze_data_refs(loop/bb): call
37545e54 4154 compute_data_dependences_for_loop/bb to find and analyze all data-refs
4155 in the loop/bb and their dependences.
fb85abff 4156 2- vect_analyze_dependences(): apply dependence testing using ddrs.
4157 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4158 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4159
4160*/
4161
ed9370cc 4162opt_result
2403338f 4163vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
fb85abff 4164{
2e966e2a 4165 class loop *loop = NULL;
fb85abff 4166 unsigned int i;
fb85abff 4167 struct data_reference *dr;
4168 tree scalar_type;
4169
88f6eb8f 4170 DUMP_VECT_SCOPE ("vect_analyze_data_refs");
48e1416a 4171
e2c5c678 4172 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
0a08c1bc 4173 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 4174
282bf14c 4175 /* Go through the data-refs, check that the analysis succeeded. Update
4176 pointer from stmt_vec_info struct to DR and vectype. */
fb85abff 4177
a99aba41 4178 vec<data_reference_p> datarefs = vinfo->shared->datarefs;
f1f41a6c 4179 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 4180 {
0bd6d857 4181 enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
d75596cd 4182 poly_uint64 vf;
48e1416a 4183
ed9d8730 4184 gcc_assert (DR_REF (dr));
5f02ee72 4185 stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
4186 gcc_assert (!stmt_info->dr_aux.dr);
4187 stmt_info->dr_aux.dr = dr;
4188 stmt_info->dr_aux.stmt = stmt_info;
fb85abff 4189
4190 /* Check that analysis of the data-ref succeeded. */
4191 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
16dfb112 4192 || !DR_STEP (dr))
fb85abff 4193 {
3d483a94 4194 bool maybe_gather
4195 = DR_IS_READ (dr)
16dfb112 4196 && !TREE_THIS_VOLATILE (DR_REF (dr))
1619606c 4197 && (targetm.vectorize.builtin_gather != NULL
4198 || supports_vec_gather_load_p ());
0bd6d857 4199 bool maybe_scatter
4200 = DR_IS_WRITE (dr)
4201 && !TREE_THIS_VOLATILE (DR_REF (dr))
0bf8b382 4202 && (targetm.vectorize.builtin_scatter != NULL
4203 || supports_vec_scatter_store_p ());
3d483a94 4204
369a4f17 4205 /* If target supports vector gather loads or scatter stores,
4206 see if they can't be used. */
e2c5c678 4207 if (is_a <loop_vec_info> (vinfo)
0219dc42 4208 && !nested_in_vect_loop_p (loop, stmt_info))
16dfb112 4209 {
369a4f17 4210 if (maybe_gather || maybe_scatter)
fa681b45 4211 {
4212 if (maybe_gather)
4213 gatherscatter = GATHER;
4214 else
4215 gatherscatter = SCATTER;
16dfb112 4216 }
16dfb112 4217 }
6ea6a380 4218
369a4f17 4219 if (gatherscatter == SG_NONE)
16dfb112 4220 {
6d8fb6cf 4221 if (dump_enabled_p ())
a4e972e3 4222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4223 "not vectorized: data ref analysis "
4224 "failed %G", stmt_info->stmt);
e2c5c678 4225 if (is_a <bb_vec_info> (vinfo))
58cfef6b 4226 {
4227 /* In BB vectorization the ref can still participate
4228 in dependence analysis, we just can't vectorize it. */
4229 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4230 continue;
4231 }
ed9370cc 4232 return opt_result::failure_at (stmt_info->stmt,
4233 "not vectorized:"
4234 " data ref analysis failed: %G",
4235 stmt_info->stmt);
16dfb112 4236 }
fb85abff 4237 }
4238
369a4f17 4239 /* See if this was detected as SIMD lane access. */
da008d72 4240 if (dr->aux == (void *)-1
4241 || dr->aux == (void *)-2
b05c7e43 4242 || dr->aux == (void *)-3
4243 || dr->aux == (void *)-4)
369a4f17 4244 {
0219dc42 4245 if (nested_in_vect_loop_p (loop, stmt_info))
ed9370cc 4246 return opt_result::failure_at (stmt_info->stmt,
4247 "not vectorized:"
4248 " data ref analysis failed: %G",
4249 stmt_info->stmt);
da008d72 4250 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info)
4251 = -(uintptr_t) dr->aux;
369a4f17 4252 }
4253
fa681b45 4254 tree base = get_base_address (DR_REF (dr));
4255 if (base && VAR_P (base) && DECL_NONALIASED (base))
87c952b8 4256 {
6d8fb6cf 4257 if (dump_enabled_p ())
a4e972e3 4258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4259 "not vectorized: base object not addressable "
4260 "for stmt: %G", stmt_info->stmt);
e2c5c678 4261 if (is_a <bb_vec_info> (vinfo))
ed9d8730 4262 {
4263 /* In BB vectorization the ref can still participate
4264 in dependence analysis, we just can't vectorize it. */
4265 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4266 continue;
4267 }
ed9370cc 4268 return opt_result::failure_at (stmt_info->stmt,
4269 "not vectorized: base object not"
4270 " addressable for stmt: %G",
4271 stmt_info->stmt);
87c952b8 4272 }
4273
ed9d8730 4274 if (is_a <loop_vec_info> (vinfo)
fa681b45 4275 && DR_STEP (dr)
ed9d8730 4276 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
635bf3aa 4277 {
0219dc42 4278 if (nested_in_vect_loop_p (loop, stmt_info))
ed9370cc 4279 return opt_result::failure_at (stmt_info->stmt,
4280 "not vectorized:"
4281 "not suitable for strided load %G",
4282 stmt_info->stmt);
ed9d8730 4283 STMT_VINFO_STRIDED_P (stmt_info) = true;
635bf3aa 4284 }
4285
fb85abff 4286 /* Update DR field in stmt_vec_info struct. */
fb85abff 4287
4288 /* If the dataref is in an inner-loop of the loop that is considered for
4289 for vectorization, we also want to analyze the access relative to
48e1416a 4290 the outer-loop (DR contains information only relative to the
fb85abff 4291 inner-most enclosing loop). We do that by building a reference to the
4292 first location accessed by the inner-loop, and analyze it relative to
48e1416a 4293 the outer-loop. */
0219dc42 4294 if (loop && nested_in_vect_loop_p (loop, stmt_info))
fb85abff 4295 {
48e1416a 4296 /* Build a reference to the first location accessed by the
a5456a6d 4297 inner loop: *(BASE + INIT + OFFSET). By construction,
4298 this address must be invariant in the inner loop, so we
4299 can consider it as being used in the outer loop. */
ed9d8730 4300 tree base = unshare_expr (DR_BASE_ADDRESS (dr));
4301 tree offset = unshare_expr (DR_OFFSET (dr));
4302 tree init = unshare_expr (DR_INIT (dr));
a5456a6d 4303 tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
4304 init, offset);
4305 tree init_addr = fold_build_pointer_plus (base, init_offset);
4306 tree init_ref = build_fold_indirect_ref (init_addr);
fb85abff 4307
6d8fb6cf 4308 if (dump_enabled_p ())
a4e972e3 4309 dump_printf_loc (MSG_NOTE, vect_location,
4310 "analyze in outer loop: %T\n", init_ref);
fb85abff 4311
ed9370cc 4312 opt_result res
4313 = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
4314 init_ref, loop, stmt_info->stmt);
4315 if (!res)
a5456a6d 4316 /* dr_analyze_innermost already explained the failure. */
ed9370cc 4317 return res;
fb85abff 4318
6d8fb6cf 4319 if (dump_enabled_p ())
a4e972e3 4320 dump_printf_loc (MSG_NOTE, vect_location,
4321 "\touter base_address: %T\n"
4322 "\touter offset from base address: %T\n"
4323 "\touter constant offset from base address: %T\n"
4324 "\touter step: %T\n"
4325 "\touter base alignment: %d\n\n"
4326 "\touter base misalignment: %d\n"
4327 "\touter offset alignment: %d\n"
4328 "\touter step alignment: %d\n",
4329 STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
4330 STMT_VINFO_DR_OFFSET (stmt_info),
4331 STMT_VINFO_DR_INIT (stmt_info),
4332 STMT_VINFO_DR_STEP (stmt_info),
4333 STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
4334 STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
4335 STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
4336 STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
fb85abff 4337 }
4338
fb85abff 4339 /* Set vectype for STMT. */
4340 scalar_type = TREE_TYPE (DR_REF (dr));
53c3c39b 4341 STMT_VINFO_VECTYPE (stmt_info)
4342 = get_vectype_for_scalar_type (scalar_type);
48e1416a 4343 if (!STMT_VINFO_VECTYPE (stmt_info))
fb85abff 4344 {
6d8fb6cf 4345 if (dump_enabled_p ())
fb85abff 4346 {
78bb46f5 4347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
a4e972e3 4348 "not vectorized: no vectype for stmt: %G",
4349 stmt_info->stmt);
7bd765d4 4350 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
4351 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
4352 scalar_type);
78bb46f5 4353 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 4354 }
6ea6a380 4355
e2c5c678 4356 if (is_a <bb_vec_info> (vinfo))
77d241ed 4357 {
4358 /* No vector type is fine, the ref can still participate
4359 in dependence analysis, we just can't vectorize it. */
4360 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4361 continue;
4362 }
36bcaa6e 4363 if (fatal)
4364 *fatal = false;
ed9370cc 4365 return opt_result::failure_at (stmt_info->stmt,
4366 "not vectorized:"
4367 " no vectype for stmt: %G"
4368 " scalar_type: %T\n",
4369 stmt_info->stmt, scalar_type);
fb85abff 4370 }
0bf5f81b 4371 else
4372 {
4373 if (dump_enabled_p ())
a4e972e3 4374 dump_printf_loc (MSG_NOTE, vect_location,
4375 "got vectype for stmt: %G%T\n",
4376 stmt_info->stmt, STMT_VINFO_VECTYPE (stmt_info));
0bf5f81b 4377 }
91a74fc6 4378
4379 /* Adjust the minimal vectorization factor according to the
4380 vector type. */
4381 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
d75596cd 4382 *min_vf = upper_bound (*min_vf, vf);
16dfb112 4383
0bd6d857 4384 if (gatherscatter != SG_NONE)
16dfb112 4385 {
cf60da07 4386 gather_scatter_info gs_info;
0219dc42 4387 if (!vect_check_gather_scatter (stmt_info,
4388 as_a <loop_vec_info> (vinfo),
cf60da07 4389 &gs_info)
4390 || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
2403338f 4391 {
4392 if (fatal)
4393 *fatal = false;
4394 return opt_result::failure_at
4395 (stmt_info->stmt,
4396 (gatherscatter == GATHER)
4397 ? "not vectorized: not suitable for gather load %G"
4398 : "not vectorized: not suitable for scatter store %G",
4399 stmt_info->stmt);
4400 }
0bd6d857 4401 STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
f634c3e9 4402 }
fb85abff 4403 }
48e1416a 4404
58cfef6b 4405 /* We used to stop processing and prune the list here. Verify we no
4406 longer need to. */
4407 gcc_assert (i == datarefs.length ());
07e3bcbf 4408
ed9370cc 4409 return opt_result::success ();
fb85abff 4410}
4411
4412
4413/* Function vect_get_new_vect_var.
4414
282bf14c 4415 Returns a name for a new variable. The current naming scheme appends the
48e1416a 4416 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
4417 the name of vectorizer generated variables, and appends that to NAME if
fb85abff 4418 provided. */
4419
4420tree
4421vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
4422{
4423 const char *prefix;
4424 tree new_vect_var;
4425
4426 switch (var_kind)
4427 {
4428 case vect_simple_var:
0bf5f81b 4429 prefix = "vect";
fb85abff 4430 break;
4431 case vect_scalar_var:
0bf5f81b 4432 prefix = "stmp";
fb85abff 4433 break;
dab48979 4434 case vect_mask_var:
4435 prefix = "mask";
4436 break;
fb85abff 4437 case vect_pointer_var:
0bf5f81b 4438 prefix = "vectp";
fb85abff 4439 break;
4440 default:
4441 gcc_unreachable ();
4442 }
4443
4444 if (name)
4445 {
0bf5f81b 4446 char* tmp = concat (prefix, "_", name, NULL);
35244493 4447 new_vect_var = create_tmp_reg (type, tmp);
fb85abff 4448 free (tmp);
4449 }
4450 else
35244493 4451 new_vect_var = create_tmp_reg (type, prefix);
fb85abff 4452
4453 return new_vect_var;
4454}
4455
23ffec42 4456/* Like vect_get_new_vect_var but return an SSA name. */
4457
4458tree
4459vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
4460{
4461 const char *prefix;
4462 tree new_vect_var;
4463
4464 switch (var_kind)
4465 {
4466 case vect_simple_var:
4467 prefix = "vect";
4468 break;
4469 case vect_scalar_var:
4470 prefix = "stmp";
4471 break;
4472 case vect_pointer_var:
4473 prefix = "vectp";
4474 break;
4475 default:
4476 gcc_unreachable ();
4477 }
4478
4479 if (name)
4480 {
4481 char* tmp = concat (prefix, "_", name, NULL);
4482 new_vect_var = make_temp_ssa_name (type, NULL, tmp);
4483 free (tmp);
4484 }
4485 else
4486 new_vect_var = make_temp_ssa_name (type, NULL, prefix);
4487
4488 return new_vect_var;
4489}
4490
abc9513d 4491/* Duplicate ptr info and set alignment/misaligment on NAME from DR_INFO. */
4a2edd22 4492
4493static void
abc9513d 4494vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
4a2edd22 4495{
abc9513d 4496 duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
4497 int misalign = DR_MISALIGNMENT (dr_info);
df8e9f7a 4498 if (misalign == DR_MISALIGNMENT_UNKNOWN)
4a2edd22 4499 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
4500 else
aec313e5 4501 set_ptr_info_alignment (SSA_NAME_PTR_INFO (name),
e092c20e 4502 known_alignment (DR_TARGET_ALIGNMENT (dr_info)),
4503 misalign);
4a2edd22 4504}
fb85abff 4505
4506/* Function vect_create_addr_base_for_vector_ref.
4507
4508 Create an expression that computes the address of the first memory location
4509 that will be accessed for a data reference.
4510
4511 Input:
ecc42a77 4512 STMT_INFO: The statement containing the data reference.
fb85abff 4513 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
4514 OFFSET: Optional. If supplied, it is be added to the initial address.
4515 LOOP: Specify relative to which loop-nest should the address be computed.
4516 For example, when the dataref is in an inner-loop nested in an
4517 outer-loop that is now being vectorized, LOOP can be either the
282bf14c 4518 outer-loop, or the inner-loop. The first memory location accessed
fb85abff 4519 by the following dataref ('in' points to short):
4520
4521 for (i=0; i<N; i++)
4522 for (j=0; j<M; j++)
4523 s += in[i+j]
4524
4525 is as follows:
4526 if LOOP=i_loop: &in (relative to i_loop)
4527 if LOOP=j_loop: &in+i*2B (relative to j_loop)
1ec61bbd 4528 BYTE_OFFSET: Optional, defaulted to NULL. If supplied, it is added to the
4529 initial address. Unlike OFFSET, which is number of elements to
4530 be added, BYTE_OFFSET is measured in bytes.
fb85abff 4531
4532 Output:
48e1416a 4533 1. Return an SSA_NAME whose value is the address of the memory location of
fb85abff 4534 the first vector of the data reference.
4535 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
4536 these statement(s) which define the returned SSA_NAME.
4537
4538 FORNOW: We are only handling array accesses with step 1. */
4539
4540tree
ecc42a77 4541vect_create_addr_base_for_vector_ref (stmt_vec_info stmt_info,
fb85abff 4542 gimple_seq *new_stmt_list,
4543 tree offset,
1ec61bbd 4544 tree byte_offset)
fb85abff 4545{
abc9513d 4546 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
4547 struct data_reference *dr = dr_info->dr;
3c18ea71 4548 const char *base_name;
90d4c4af 4549 tree addr_base;
fb85abff 4550 tree dest;
4551 gimple_seq seq = NULL;
f083cd24 4552 tree vect_ptr_type;
fb85abff 4553 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
37545e54 4554 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
abc9513d 4555 innermost_loop_behavior *drb = vect_dr_behavior (dr_info);
fb85abff 4556
9e879814 4557 tree data_ref_base = unshare_expr (drb->base_address);
4558 tree base_offset = unshare_expr (drb->offset);
4559 tree init = unshare_expr (drb->init);
fb85abff 4560
37545e54 4561 if (loop_vinfo)
3c18ea71 4562 base_name = get_name (data_ref_base);
37545e54 4563 else
4564 {
4565 base_offset = ssize_int (0);
4566 init = ssize_int (0);
3c18ea71 4567 base_name = get_name (DR_REF (dr));
48e1416a 4568 }
37545e54 4569
fb85abff 4570 /* Create base_offset */
4571 base_offset = size_binop (PLUS_EXPR,
4572 fold_convert (sizetype, base_offset),
4573 fold_convert (sizetype, init));
fb85abff 4574
4575 if (offset)
4576 {
fb85abff 4577 offset = fold_build2 (MULT_EXPR, sizetype,
4578 fold_convert (sizetype, offset), step);
4579 base_offset = fold_build2 (PLUS_EXPR, sizetype,
4580 base_offset, offset);
fb85abff 4581 }
1ec61bbd 4582 if (byte_offset)
4583 {
4584 byte_offset = fold_convert (sizetype, byte_offset);
4585 base_offset = fold_build2 (PLUS_EXPR, sizetype,
4586 base_offset, byte_offset);
4587 }
fb85abff 4588
4589 /* base + base_offset */
37545e54 4590 if (loop_vinfo)
2cc66f2a 4591 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
37545e54 4592 else
4593 {
182cf5a9 4594 addr_base = build1 (ADDR_EXPR,
4595 build_pointer_type (TREE_TYPE (DR_REF (dr))),
4596 unshare_expr (DR_REF (dr)));
37545e54 4597 }
48e1416a 4598
fb85abff 4599 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
90d4c4af 4600 dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
8ee959f8 4601 addr_base = force_gimple_operand (addr_base, &seq, true, dest);
fb85abff 4602 gimple_seq_add_seq (new_stmt_list, seq);
4603
f544b9a4 4604 if (DR_PTR_INFO (dr)
8ee959f8 4605 && TREE_CODE (addr_base) == SSA_NAME
4606 && !SSA_NAME_PTR_INFO (addr_base))
1259ab70 4607 {
abc9513d 4608 vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
4a2edd22 4609 if (offset || byte_offset)
90d4c4af 4610 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
1259ab70 4611 }
f544b9a4 4612
6d8fb6cf 4613 if (dump_enabled_p ())
a4e972e3 4614 dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
f083cd24 4615
90d4c4af 4616 return addr_base;
fb85abff 4617}
4618
4619
4620/* Function vect_create_data_ref_ptr.
4621
bd5ba09f 4622 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
ecc42a77 4623 location accessed in the loop by STMT_INFO, along with the def-use update
bd5ba09f 4624 chain to appropriately advance the pointer through the loop iterations.
4625 Also set aliasing information for the pointer. This pointer is used by
4626 the callers to this function to create a memory reference expression for
4627 vector load/store access.
fb85abff 4628
4629 Input:
ecc42a77 4630 1. STMT_INFO: a stmt that references memory. Expected to be of the form
fb85abff 4631 GIMPLE_ASSIGN <name, data-ref> or
4632 GIMPLE_ASSIGN <data-ref, name>.
bd5ba09f 4633 2. AGGR_TYPE: the type of the reference, which should be either a vector
4634 or an array.
4635 3. AT_LOOP: the loop where the vector memref is to be created.
4636 4. OFFSET (optional): an offset to be added to the initial address accessed
ecc42a77 4637 by the data-ref in STMT_INFO.
bd5ba09f 4638 5. BSI: location where the new stmts are to be placed if there is no loop
4639 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
fb85abff 4640 pointing to the initial address.
1ec61bbd 4641 7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
ecc42a77 4642 to the initial address accessed by the data-ref in STMT_INFO. This is
1ec61bbd 4643 similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
4644 in bytes.
1f9a3b5c 4645 8. IV_STEP (optional, defaults to NULL): the amount that should be added
4646 to the IV during each iteration of the loop. NULL says to move
4647 by one copy of AGGR_TYPE up or down, depending on the step of the
4648 data reference.
fb85abff 4649
4650 Output:
4651 1. Declare a new ptr to vector_type, and have it point to the base of the
4652 data reference (initial addressed accessed by the data reference).
4653 For example, for vector of type V8HI, the following code is generated:
4654
bd5ba09f 4655 v8hi *ap;
4656 ap = (v8hi *)initial_address;
fb85abff 4657
4658 if OFFSET is not supplied:
4659 initial_address = &a[init];
4660 if OFFSET is supplied:
4661 initial_address = &a[init + OFFSET];
1ec61bbd 4662 if BYTE_OFFSET is supplied:
4663 initial_address = &a[init] + BYTE_OFFSET;
fb85abff 4664
4665 Return the initial_address in INITIAL_ADDRESS.
4666
4667 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
48e1416a 4668 update the pointer in each iteration of the loop.
fb85abff 4669
4670 Return the increment stmt that updates the pointer in PTR_INCR.
4671
3c8b7bc7 4672 3. Return the pointer. */
fb85abff 4673
4674tree
ecc42a77 4675vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type,
2e966e2a 4676 class loop *at_loop, tree offset,
ecc42a77 4677 tree *initial_address, gimple_stmt_iterator *gsi,
3c8b7bc7 4678 gimple **ptr_incr, bool only_init,
ecc42a77 4679 tree byte_offset, tree iv_step)
fb85abff 4680{
3c18ea71 4681 const char *base_name;
fb85abff 4682 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2e966e2a 4683 class loop *loop = NULL;
37545e54 4684 bool nested_in_vect_loop = false;
2e966e2a 4685 class loop *containing_loop = NULL;
bd5ba09f 4686 tree aggr_ptr_type;
4687 tree aggr_ptr;
fb85abff 4688 tree new_temp;
fb85abff 4689 gimple_seq new_stmt_list = NULL;
37545e54 4690 edge pe = NULL;
fb85abff 4691 basic_block new_bb;
bd5ba09f 4692 tree aggr_ptr_init;
abc9513d 4693 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
4694 struct data_reference *dr = dr_info->dr;
bd5ba09f 4695 tree aptr;
fb85abff 4696 gimple_stmt_iterator incr_gsi;
4697 bool insert_after;
4698 tree indx_before_incr, indx_after_incr;
42acab1c 4699 gimple *incr;
37545e54 4700 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
48e1416a 4701
1f9a3b5c 4702 gcc_assert (iv_step != NULL_TREE
4703 || TREE_CODE (aggr_type) == ARRAY_TYPE
bd5ba09f 4704 || TREE_CODE (aggr_type) == VECTOR_TYPE);
4705
37545e54 4706 if (loop_vinfo)
4707 {
4708 loop = LOOP_VINFO_LOOP (loop_vinfo);
a73182ff 4709 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
4710 containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
37545e54 4711 pe = loop_preheader_edge (loop);
4712 }
4713 else
4714 {
4715 gcc_assert (bb_vinfo);
4716 only_init = true;
4717 *ptr_incr = NULL;
4718 }
48e1416a 4719
fb85abff 4720 /* Create an expression for the first address accessed by this load
48e1416a 4721 in LOOP. */
3c18ea71 4722 base_name = get_name (DR_BASE_ADDRESS (dr));
fb85abff 4723
6d8fb6cf 4724 if (dump_enabled_p ())
fb85abff 4725 {
3c18ea71 4726 tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
7bd765d4 4727 dump_printf_loc (MSG_NOTE, vect_location,
a4e972e3 4728 "create %s-pointer variable to type: %T",
4729 get_tree_code_name (TREE_CODE (aggr_type)),
4730 aggr_type);
3c18ea71 4731 if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
7bd765d4 4732 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
19bacd59 4733 else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
4734 dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
3c18ea71 4735 else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
7bd765d4 4736 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
3c18ea71 4737 else
7bd765d4 4738 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
a4e972e3 4739 dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
fb85abff 4740 }
4741
90d4c4af 4742 /* (1) Create the new aggregate-pointer variable.
4743 Vector and array types inherit the alias set of their component
bd5ba09f 4744 type by default so we need to use a ref-all pointer if the data
4745 reference does not conflict with the created aggregated data
4746 reference because it is not addressable. */
90d4c4af 4747 bool need_ref_all = false;
4748 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
a34701c9 4749 get_alias_set (DR_REF (dr))))
90d4c4af 4750 need_ref_all = true;
a34701c9 4751 /* Likewise for any of the data references in the stmt group. */
e1009321 4752 else if (DR_GROUP_SIZE (stmt_info) > 1)
fb85abff 4753 {
cd24aa3c 4754 stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
dd277d48 4755 do
4756 {
90d4c4af 4757 struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4758 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4759 get_alias_set (DR_REF (sdr))))
dd277d48 4760 {
90d4c4af 4761 need_ref_all = true;
dd277d48 4762 break;
4763 }
cd24aa3c 4764 sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
dd277d48 4765 }
cd24aa3c 4766 while (sinfo);
fb85abff 4767 }
90d4c4af 4768 aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4769 need_ref_all);
4770 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4771
fb85abff 4772
282bf14c 4773 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4774 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4775 def-use update cycles for the pointer: one relative to the outer-loop
4776 (LOOP), which is what steps (3) and (4) below do. The other is relative
4777 to the inner-loop (which is the inner-most loop containing the dataref),
4778 and this is done be step (5) below.
fb85abff 4779
282bf14c 4780 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4781 inner-most loop, and so steps (3),(4) work the same, and step (5) is
4782 redundant. Steps (3),(4) create the following:
fb85abff 4783
4784 vp0 = &base_addr;
4785 LOOP: vp1 = phi(vp0,vp2)
48e1416a 4786 ...
fb85abff 4787 ...
4788 vp2 = vp1 + step
4789 goto LOOP
48e1416a 4790
282bf14c 4791 If there is an inner-loop nested in loop, then step (5) will also be
4792 applied, and an additional update in the inner-loop will be created:
fb85abff 4793
4794 vp0 = &base_addr;
4795 LOOP: vp1 = phi(vp0,vp2)
4796 ...
4797 inner: vp3 = phi(vp1,vp4)
4798 vp4 = vp3 + inner_step
4799 if () goto inner
4800 ...
4801 vp2 = vp1 + step
4802 if () goto LOOP */
4803
bd5ba09f 4804 /* (2) Calculate the initial address of the aggregate-pointer, and set
4805 the aggregate-pointer to point to it before the loop. */
fb85abff 4806
1ec61bbd 4807 /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader. */
fb85abff 4808
a73182ff 4809 new_temp = vect_create_addr_base_for_vector_ref (stmt_info, &new_stmt_list,
9e879814 4810 offset, byte_offset);
fb85abff 4811 if (new_stmt_list)
4812 {
37545e54 4813 if (pe)
4814 {
4815 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4816 gcc_assert (!new_bb);
4817 }
4818 else
bee862b6 4819 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
fb85abff 4820 }
4821
4822 *initial_address = new_temp;
8ee959f8 4823 aggr_ptr_init = new_temp;
fb85abff 4824
bd5ba09f 4825 /* (3) Handle the updating of the aggregate-pointer inside the loop.
282bf14c 4826 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4827 inner-loop nested in LOOP (during outer-loop vectorization). */
fb85abff 4828
37545e54 4829 /* No update in loop is required. */
48e1416a 4830 if (only_init && (!loop_vinfo || at_loop == loop))
bd5ba09f 4831 aptr = aggr_ptr_init;
fb85abff 4832 else
4833 {
3c8b7bc7 4834 /* Accesses to invariant addresses should be handled specially
4835 by the caller. */
4836 tree step = vect_dr_behavior (dr_info)->step;
4837 gcc_assert (!integer_zerop (step));
4838
1f9a3b5c 4839 if (iv_step == NULL_TREE)
4840 {
3c8b7bc7 4841 /* The step of the aggregate pointer is the type size,
4842 negated for downward accesses. */
1f9a3b5c 4843 iv_step = TYPE_SIZE_UNIT (aggr_type);
3c8b7bc7 4844 if (tree_int_cst_sgn (step) == -1)
1f9a3b5c 4845 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
4846 }
fb85abff 4847
4848 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4849
bd5ba09f 4850 create_iv (aggr_ptr_init,
8bbe6b75 4851 fold_convert (aggr_ptr_type, iv_step),
bd5ba09f 4852 aggr_ptr, loop, &incr_gsi, insert_after,
fb85abff 4853 &indx_before_incr, &indx_after_incr);
4854 incr = gsi_stmt (incr_gsi);
04b2391d 4855 loop_vinfo->add_stmt (incr);
fb85abff 4856
4857 /* Copy the points-to information if it exists. */
4858 if (DR_PTR_INFO (dr))
4859 {
abc9513d 4860 vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
4861 vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
fb85abff 4862 }
fb85abff 4863 if (ptr_incr)
4864 *ptr_incr = incr;
4865
bd5ba09f 4866 aptr = indx_before_incr;
fb85abff 4867 }
4868
4869 if (!nested_in_vect_loop || only_init)
bd5ba09f 4870 return aptr;
fb85abff 4871
4872
bd5ba09f 4873 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
282bf14c 4874 nested in LOOP, if exists. */
fb85abff 4875
4876 gcc_assert (nested_in_vect_loop);
4877 if (!only_init)
4878 {
4879 standard_iv_increment_position (containing_loop, &incr_gsi,
4880 &insert_after);
bd5ba09f 4881 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
fb85abff 4882 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4883 &indx_after_incr);
4884 incr = gsi_stmt (incr_gsi);
04b2391d 4885 loop_vinfo->add_stmt (incr);
fb85abff 4886
4887 /* Copy the points-to information if it exists. */
4888 if (DR_PTR_INFO (dr))
4889 {
abc9513d 4890 vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
4891 vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
fb85abff 4892 }
fb85abff 4893 if (ptr_incr)
4894 *ptr_incr = incr;
4895
48e1416a 4896 return indx_before_incr;
fb85abff 4897 }
4898 else
4899 gcc_unreachable ();
4900}
4901
4902
4903/* Function bump_vector_ptr
4904
4905 Increment a pointer (to a vector type) by vector-size. If requested,
48e1416a 4906 i.e. if PTR-INCR is given, then also connect the new increment stmt
fb85abff 4907 to the existing def-use update-chain of the pointer, by modifying
4908 the PTR_INCR as illustrated below:
4909
4910 The pointer def-use update-chain before this function:
4911 DATAREF_PTR = phi (p_0, p_2)
4912 ....
48e1416a 4913 PTR_INCR: p_2 = DATAREF_PTR + step
fb85abff 4914
4915 The pointer def-use update-chain after this function:
4916 DATAREF_PTR = phi (p_0, p_2)
4917 ....
4918 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4919 ....
4920 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4921
4922 Input:
48e1416a 4923 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
fb85abff 4924 in the loop.
48e1416a 4925 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
fb85abff 4926 the loop. The increment amount across iterations is expected
48e1416a 4927 to be vector_size.
fb85abff 4928 BSI - location where the new update stmt is to be placed.
ecc42a77 4929 STMT_INFO - the original scalar memory-access stmt that is being vectorized.
fb85abff 4930 BUMP - optional. The offset by which to bump the pointer. If not given,
4931 the offset is assumed to be vector_size.
4932
4933 Output: Return NEW_DATAREF_PTR as illustrated above.
48e1416a 4934
fb85abff 4935*/
4936
4937tree
42acab1c 4938bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
ecc42a77 4939 stmt_vec_info stmt_info, tree bump)
fb85abff 4940{
fb85abff 4941 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4942 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 4943 tree update = TYPE_SIZE_UNIT (vectype);
1a91d914 4944 gassign *incr_stmt;
fb85abff 4945 ssa_op_iter iter;
4946 use_operand_p use_p;
4947 tree new_dataref_ptr;
4948
4949 if (bump)
4950 update = bump;
48e1416a 4951
8ee959f8 4952 if (TREE_CODE (dataref_ptr) == SSA_NAME)
4953 new_dataref_ptr = copy_ssa_name (dataref_ptr);
4954 else
4955 new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
e9cf809e 4956 incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
4957 dataref_ptr, update);
a73182ff 4958 vect_finish_stmt_generation (stmt_info, incr_stmt, gsi);
fb85abff 4959
4960 /* Copy the points-to information if it exists. */
4961 if (DR_PTR_INFO (dr))
1259ab70 4962 {
4963 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
ceea063b 4964 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
1259ab70 4965 }
fb85abff 4966
4967 if (!ptr_incr)
4968 return new_dataref_ptr;
4969
4970 /* Update the vector-pointer's cross-iteration increment. */
4971 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4972 {
4973 tree use = USE_FROM_PTR (use_p);
4974
4975 if (use == dataref_ptr)
4976 SET_USE (use_p, new_dataref_ptr);
4977 else
1f9a3b5c 4978 gcc_assert (operand_equal_p (use, update, 0));
fb85abff 4979 }
4980
4981 return new_dataref_ptr;
4982}
4983
4984
1c4c7e32 4985/* Copy memory reference info such as base/clique from the SRC reference
4986 to the DEST MEM_REF. */
4987
4988void
4989vect_copy_ref_info (tree dest, tree src)
4990{
4991 if (TREE_CODE (dest) != MEM_REF)
4992 return;
4993
4994 tree src_base = src;
4995 while (handled_component_p (src_base))
4996 src_base = TREE_OPERAND (src_base, 0);
4997 if (TREE_CODE (src_base) != MEM_REF
4998 && TREE_CODE (src_base) != TARGET_MEM_REF)
4999 return;
5000
5001 MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
5002 MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
5003}
5004
5005
fb85abff 5006/* Function vect_create_destination_var.
5007
5008 Create a new temporary of type VECTYPE. */
5009
5010tree
5011vect_create_destination_var (tree scalar_dest, tree vectype)
5012{
5013 tree vec_dest;
0bf5f81b 5014 const char *name;
5015 char *new_name;
fb85abff 5016 tree type;
5017 enum vect_var_kind kind;
5018
dab48979 5019 kind = vectype
5020 ? VECTOR_BOOLEAN_TYPE_P (vectype)
5021 ? vect_mask_var
5022 : vect_simple_var
5023 : vect_scalar_var;
fb85abff 5024 type = vectype ? vectype : TREE_TYPE (scalar_dest);
5025
5026 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
5027
0bf5f81b 5028 name = get_name (scalar_dest);
5029 if (name)
b33b6e58 5030 new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
0bf5f81b 5031 else
b33b6e58 5032 new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
fb85abff 5033 vec_dest = vect_get_new_vect_var (type, kind, new_name);
0bf5f81b 5034 free (new_name);
fb85abff 5035
5036 return vec_dest;
5037}
5038
ee612634 5039/* Function vect_grouped_store_supported.
fb85abff 5040
42f6a6e8 5041 Returns TRUE if interleave high and interleave low permutations
5042 are supported, and FALSE otherwise. */
fb85abff 5043
5044bool
ee612634 5045vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
fb85abff 5046{
3754d046 5047 machine_mode mode = TYPE_MODE (vectype);
48e1416a 5048
d53391a8 5049 /* vect_permute_store_chain requires the group size to be equal to 3 or
5050 be a power of two. */
5051 if (count != 3 && exact_log2 (count) == -1)
481fc474 5052 {
6d8fb6cf 5053 if (dump_enabled_p ())
7bd765d4 5054 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
d53391a8 5055 "the size of the group of accesses"
5056 " is not a power of 2 or not eqaul to 3\n");
481fc474 5057 return false;
5058 }
5059
42f6a6e8 5060 /* Check that the permutation is supported. */
8bec2124 5061 if (VECTOR_MODE_P (mode))
5062 {
ba7efd65 5063 unsigned int i;
d53391a8 5064 if (count == 3)
8bec2124 5065 {
d53391a8 5066 unsigned int j0 = 0, j1 = 0, j2 = 0;
5067 unsigned int i, j;
5068
ba7efd65 5069 unsigned int nelt;
5070 if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
5071 {
5072 if (dump_enabled_p ())
5073 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5074 "cannot handle groups of 3 stores for"
5075 " variable-length vectors\n");
5076 return false;
5077 }
5078
c3fa7fe9 5079 vec_perm_builder sel (nelt, nelt, 1);
5080 sel.quick_grow (nelt);
1957c019 5081 vec_perm_indices indices;
d53391a8 5082 for (j = 0; j < 3; j++)
5083 {
5084 int nelt0 = ((3 - j) * nelt) % 3;
5085 int nelt1 = ((3 - j) * nelt + 1) % 3;
5086 int nelt2 = ((3 - j) * nelt + 2) % 3;
5087 for (i = 0; i < nelt; i++)
5088 {
5089 if (3 * i + nelt0 < nelt)
5090 sel[3 * i + nelt0] = j0++;
5091 if (3 * i + nelt1 < nelt)
5092 sel[3 * i + nelt1] = nelt + j1++;
5093 if (3 * i + nelt2 < nelt)
5094 sel[3 * i + nelt2] = 0;
5095 }
1957c019 5096 indices.new_vector (sel, 2, nelt);
5097 if (!can_vec_perm_const_p (mode, indices))
d53391a8 5098 {
5099 if (dump_enabled_p ())
5100 dump_printf (MSG_MISSED_OPTIMIZATION,
97f7d65e 5101 "permutation op not supported by target.\n");
d53391a8 5102 return false;
5103 }
5104
5105 for (i = 0; i < nelt; i++)
5106 {
5107 if (3 * i + nelt0 < nelt)
5108 sel[3 * i + nelt0] = 3 * i + nelt0;
5109 if (3 * i + nelt1 < nelt)
5110 sel[3 * i + nelt1] = 3 * i + nelt1;
5111 if (3 * i + nelt2 < nelt)
5112 sel[3 * i + nelt2] = nelt + j2++;
5113 }
1957c019 5114 indices.new_vector (sel, 2, nelt);
5115 if (!can_vec_perm_const_p (mode, indices))
d53391a8 5116 {
5117 if (dump_enabled_p ())
5118 dump_printf (MSG_MISSED_OPTIMIZATION,
97f7d65e 5119 "permutation op not supported by target.\n");
d53391a8 5120 return false;
5121 }
5122 }
5123 return true;
8bec2124 5124 }
d53391a8 5125 else
8bec2124 5126 {
d53391a8 5127 /* If length is not equal to 3 then only power of 2 is supported. */
ac29ece2 5128 gcc_assert (pow2p_hwi (count));
ba7efd65 5129 poly_uint64 nelt = GET_MODE_NUNITS (mode);
d53391a8 5130
c3fa7fe9 5131 /* The encoding has 2 interleaved stepped patterns. */
5132 vec_perm_builder sel (nelt, 2, 3);
5133 sel.quick_grow (6);
5134 for (i = 0; i < 3; i++)
d53391a8 5135 {
5136 sel[i * 2] = i;
5137 sel[i * 2 + 1] = i + nelt;
5138 }
1957c019 5139 vec_perm_indices indices (sel, 2, nelt);
5140 if (can_vec_perm_const_p (mode, indices))
282dc861 5141 {
c3fa7fe9 5142 for (i = 0; i < 6; i++)
ba7efd65 5143 sel[i] += exact_div (nelt, 2);
1957c019 5144 indices.new_vector (sel, 2, nelt);
5145 if (can_vec_perm_const_p (mode, indices))
282dc861 5146 return true;
5147 }
8bec2124 5148 }
5149 }
fb85abff 5150
6d8fb6cf 5151 if (dump_enabled_p ())
7bd765d4 5152 dump_printf (MSG_MISSED_OPTIMIZATION,
12554a62 5153 "permutation op not supported by target.\n");
6620d7d7 5154 return false;
fb85abff 5155}
5156
5157
2dd8e84c 5158/* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of
5159 type VECTYPE. MASKED_P says whether the masked form is needed. */
94b7b4dd 5160
5161bool
2dd8e84c 5162vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5163 bool masked_p)
94b7b4dd 5164{
2dd8e84c 5165 if (masked_p)
5166 return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
5167 vec_mask_store_lanes_optab,
5168 vectype, count);
5169 else
5170 return vect_lanes_optab_supported_p ("vec_store_lanes",
5171 vec_store_lanes_optab,
5172 vectype, count);
94b7b4dd 5173}
5174
5175
fb85abff 5176/* Function vect_permute_store_chain.
5177
5178 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
d53391a8 5179 a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
5180 the data correctly for the stores. Return the final references for stores
5181 in RESULT_CHAIN.
fb85abff 5182
5183 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
282bf14c 5184 The input is 4 vectors each containing 8 elements. We assign a number to
5185 each element, the input sequence is:
fb85abff 5186
5187 1st vec: 0 1 2 3 4 5 6 7
5188 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 5189 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 5190 4th vec: 24 25 26 27 28 29 30 31
5191
5192 The output sequence should be:
5193
5194 1st vec: 0 8 16 24 1 9 17 25
5195 2nd vec: 2 10 18 26 3 11 19 27
5196 3rd vec: 4 12 20 28 5 13 21 30
5197 4th vec: 6 14 22 30 7 15 23 31
5198
5199 i.e., we interleave the contents of the four vectors in their order.
5200
282bf14c 5201 We use interleave_high/low instructions to create such output. The input of
fb85abff 5202 each interleave_high/low operation is two vectors:
48e1416a 5203 1st vec 2nd vec
5204 0 1 2 3 4 5 6 7
5205 the even elements of the result vector are obtained left-to-right from the
282bf14c 5206 high/low elements of the first vector. The odd elements of the result are
fb85abff 5207 obtained left-to-right from the high/low elements of the second vector.
5208 The output of interleave_high will be: 0 4 1 5
5209 and of interleave_low: 2 6 3 7
5210
48e1416a 5211
282bf14c 5212 The permutation is done in log LENGTH stages. In each stage interleave_high
48e1416a 5213 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
5214 where the first argument is taken from the first half of DR_CHAIN and the
5215 second argument from it's second half.
5216 In our example,
fb85abff 5217
5218 I1: interleave_high (1st vec, 3rd vec)
5219 I2: interleave_low (1st vec, 3rd vec)
5220 I3: interleave_high (2nd vec, 4th vec)
5221 I4: interleave_low (2nd vec, 4th vec)
5222
5223 The output for the first stage is:
5224
5225 I1: 0 16 1 17 2 18 3 19
5226 I2: 4 20 5 21 6 22 7 23
5227 I3: 8 24 9 25 10 26 11 27
5228 I4: 12 28 13 29 14 30 15 31
5229
5230 The output of the second stage, i.e. the final result is:
5231
5232 I1: 0 8 16 24 1 9 17 25
5233 I2: 2 10 18 26 3 11 19 27
5234 I3: 4 12 20 28 5 13 21 30
5235 I4: 6 14 22 30 7 15 23 31. */
48e1416a 5236
481fc474 5237void
f1f41a6c 5238vect_permute_store_chain (vec<tree> dr_chain,
48e1416a 5239 unsigned int length,
ecc42a77 5240 stmt_vec_info stmt_info,
fb85abff 5241 gimple_stmt_iterator *gsi,
f1f41a6c 5242 vec<tree> *result_chain)
fb85abff 5243{
03d37e4e 5244 tree vect1, vect2, high, low;
42acab1c 5245 gimple *perm_stmt;
1c2fef9a 5246 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8bec2124 5247 tree perm_mask_low, perm_mask_high;
d53391a8 5248 tree data_ref;
5249 tree perm3_mask_low, perm3_mask_high;
8b221927 5250 unsigned int i, j, n, log_length = exact_log2 (length);
282dc861 5251
f40aaf2d 5252 result_chain->quick_grow (length);
5253 memcpy (result_chain->address (), dr_chain.address (),
5254 length * sizeof (tree));
fb85abff 5255
d53391a8 5256 if (length == 3)
8bec2124 5257 {
8b221927 5258 /* vect_grouped_store_supported ensures that this is constant. */
f08ee65f 5259 unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
d53391a8 5260 unsigned int j0 = 0, j1 = 0, j2 = 0;
42f6a6e8 5261
c3fa7fe9 5262 vec_perm_builder sel (nelt, nelt, 1);
5263 sel.quick_grow (nelt);
1957c019 5264 vec_perm_indices indices;
d53391a8 5265 for (j = 0; j < 3; j++)
5266 {
5267 int nelt0 = ((3 - j) * nelt) % 3;
5268 int nelt1 = ((3 - j) * nelt + 1) % 3;
5269 int nelt2 = ((3 - j) * nelt + 2) % 3;
8bec2124 5270
d53391a8 5271 for (i = 0; i < nelt; i++)
5272 {
5273 if (3 * i + nelt0 < nelt)
5274 sel[3 * i + nelt0] = j0++;
5275 if (3 * i + nelt1 < nelt)
5276 sel[3 * i + nelt1] = nelt + j1++;
5277 if (3 * i + nelt2 < nelt)
5278 sel[3 * i + nelt2] = 0;
5279 }
1957c019 5280 indices.new_vector (sel, 2, nelt);
5281 perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
d53391a8 5282
5283 for (i = 0; i < nelt; i++)
5284 {
5285 if (3 * i + nelt0 < nelt)
5286 sel[3 * i + nelt0] = 3 * i + nelt0;
5287 if (3 * i + nelt1 < nelt)
5288 sel[3 * i + nelt1] = 3 * i + nelt1;
5289 if (3 * i + nelt2 < nelt)
5290 sel[3 * i + nelt2] = nelt + j2++;
5291 }
1957c019 5292 indices.new_vector (sel, 2, nelt);
5293 perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
d53391a8 5294
5295 vect1 = dr_chain[0];
5296 vect2 = dr_chain[1];
fb85abff 5297
5298 /* Create interleaving stmt:
d53391a8 5299 low = VEC_PERM_EXPR <vect1, vect2,
5300 {j, nelt, *, j + 1, nelt + j + 1, *,
5301 j + 2, nelt + j + 2, *, ...}> */
5302 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
e9cf809e 5303 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5304 vect2, perm3_mask_low);
a73182ff 5305 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
fb85abff 5306
d53391a8 5307 vect1 = data_ref;
5308 vect2 = dr_chain[2];
fb85abff 5309 /* Create interleaving stmt:
d53391a8 5310 low = VEC_PERM_EXPR <vect1, vect2,
5311 {0, 1, nelt + j, 3, 4, nelt + j + 1,
5312 6, 7, nelt + j + 2, ...}> */
5313 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
e9cf809e 5314 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5315 vect2, perm3_mask_high);
a73182ff 5316 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
d53391a8 5317 (*result_chain)[j] = data_ref;
fb85abff 5318 }
d53391a8 5319 }
5320 else
5321 {
5322 /* If length is not equal to 3 then only power of 2 is supported. */
ac29ece2 5323 gcc_assert (pow2p_hwi (length));
d53391a8 5324
c3fa7fe9 5325 /* The encoding has 2 interleaved stepped patterns. */
f08ee65f 5326 poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
c3fa7fe9 5327 vec_perm_builder sel (nelt, 2, 3);
5328 sel.quick_grow (6);
5329 for (i = 0; i < 3; i++)
d53391a8 5330 {
5331 sel[i * 2] = i;
5332 sel[i * 2 + 1] = i + nelt;
5333 }
1957c019 5334 vec_perm_indices indices (sel, 2, nelt);
5335 perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
d53391a8 5336
c3fa7fe9 5337 for (i = 0; i < 6; i++)
f08ee65f 5338 sel[i] += exact_div (nelt, 2);
1957c019 5339 indices.new_vector (sel, 2, nelt);
5340 perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
d53391a8 5341
5342 for (i = 0, n = log_length; i < n; i++)
5343 {
5344 for (j = 0; j < length/2; j++)
5345 {
5346 vect1 = dr_chain[j];
5347 vect2 = dr_chain[j+length/2];
5348
5349 /* Create interleaving stmt:
5350 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
5351 ...}> */
5352 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
e9cf809e 5353 perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
5354 vect2, perm_mask_high);
a73182ff 5355 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
d53391a8 5356 (*result_chain)[2*j] = high;
5357
5358 /* Create interleaving stmt:
5359 low = VEC_PERM_EXPR <vect1, vect2,
5360 {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
5361 ...}> */
5362 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
e9cf809e 5363 perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
5364 vect2, perm_mask_low);
a73182ff 5365 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
d53391a8 5366 (*result_chain)[2*j+1] = low;
5367 }
5368 memcpy (dr_chain.address (), result_chain->address (),
5369 length * sizeof (tree));
5370 }
fb85abff 5371 }
fb85abff 5372}
5373
5374/* Function vect_setup_realignment
48e1416a 5375
fb85abff 5376 This function is called when vectorizing an unaligned load using
5377 the dr_explicit_realign[_optimized] scheme.
5378 This function generates the following code at the loop prolog:
5379
5380 p = initial_addr;
5381 x msq_init = *(floor(p)); # prolog load
48e1416a 5382 realignment_token = call target_builtin;
fb85abff 5383 loop:
5384 x msq = phi (msq_init, ---)
5385
48e1416a 5386 The stmts marked with x are generated only for the case of
fb85abff 5387 dr_explicit_realign_optimized.
5388
48e1416a 5389 The code above sets up a new (vector) pointer, pointing to the first
ecc42a77 5390 location accessed by STMT_INFO, and a "floor-aligned" load using that
5391 pointer. It also generates code to compute the "realignment-token"
5392 (if the relevant target hook was defined), and creates a phi-node at the
5393 loop-header bb whose arguments are the result of the prolog-load (created
5394 by this function) and the result of a load that takes place in the loop
5395 (to be created by the caller to this function).
fb85abff 5396
5397 For the case of dr_explicit_realign_optimized:
48e1416a 5398 The caller to this function uses the phi-result (msq) to create the
fb85abff 5399 realignment code inside the loop, and sets up the missing phi argument,
5400 as follows:
48e1416a 5401 loop:
fb85abff 5402 msq = phi (msq_init, lsq)
5403 lsq = *(floor(p')); # load in loop
5404 result = realign_load (msq, lsq, realignment_token);
5405
5406 For the case of dr_explicit_realign:
5407 loop:
5408 msq = *(floor(p)); # load in loop
5409 p' = p + (VS-1);
5410 lsq = *(floor(p')); # load in loop
5411 result = realign_load (msq, lsq, realignment_token);
5412
5413 Input:
ecc42a77 5414 STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
5415 a memory location that may be unaligned.
fb85abff 5416 BSI - place where new code is to be inserted.
5417 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
48e1416a 5418 is used.
5419
fb85abff 5420 Output:
5421 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5422 target hook, if defined.
5423 Return value - the result of the loop-header phi node. */
5424
5425tree
ecc42a77 5426vect_setup_realignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
fb85abff 5427 tree *realignment_token,
5428 enum dr_alignment_support alignment_support_scheme,
5429 tree init_addr,
2e966e2a 5430 class loop **at_loop)
fb85abff 5431{
fb85abff 5432 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5433 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
abc9513d 5434 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5435 struct data_reference *dr = dr_info->dr;
2e966e2a 5436 class loop *loop = NULL;
ad074595 5437 edge pe = NULL;
a73182ff 5438 tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
fb85abff 5439 tree vec_dest;
42acab1c 5440 gimple *inc;
fb85abff 5441 tree ptr;
5442 tree data_ref;
fb85abff 5443 basic_block new_bb;
5444 tree msq_init = NULL_TREE;
5445 tree new_temp;
1a91d914 5446 gphi *phi_stmt;
fb85abff 5447 tree msq = NULL_TREE;
5448 gimple_seq stmts = NULL;
fb85abff 5449 bool compute_in_loop = false;
ad074595 5450 bool nested_in_vect_loop = false;
2e966e2a 5451 class loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
5452 class loop *loop_for_initial_load = NULL;
ad074595 5453
5454 if (loop_vinfo)
5455 {
5456 loop = LOOP_VINFO_LOOP (loop_vinfo);
a73182ff 5457 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
ad074595 5458 }
fb85abff 5459
5460 gcc_assert (alignment_support_scheme == dr_explicit_realign
5461 || alignment_support_scheme == dr_explicit_realign_optimized);
5462
5463 /* We need to generate three things:
5464 1. the misalignment computation
5465 2. the extra vector load (for the optimized realignment scheme).
5466 3. the phi node for the two vectors from which the realignment is
282bf14c 5467 done (for the optimized realignment scheme). */
fb85abff 5468
5469 /* 1. Determine where to generate the misalignment computation.
5470
5471 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5472 calculation will be generated by this function, outside the loop (in the
5473 preheader). Otherwise, INIT_ADDR had already been computed for us by the
5474 caller, inside the loop.
5475
5476 Background: If the misalignment remains fixed throughout the iterations of
5477 the loop, then both realignment schemes are applicable, and also the
5478 misalignment computation can be done outside LOOP. This is because we are
5479 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5480 are a multiple of VS (the Vector Size), and therefore the misalignment in
5481 different vectorized LOOP iterations is always the same.
5482 The problem arises only if the memory access is in an inner-loop nested
5483 inside LOOP, which is now being vectorized using outer-loop vectorization.
5484 This is the only case when the misalignment of the memory access may not
5485 remain fixed throughout the iterations of the inner-loop (as explained in
5486 detail in vect_supportable_dr_alignment). In this case, not only is the
5487 optimized realignment scheme not applicable, but also the misalignment
5488 computation (and generation of the realignment token that is passed to
5489 REALIGN_LOAD) have to be done inside the loop.
5490
5491 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5492 or not, which in turn determines if the misalignment is computed inside
5493 the inner-loop, or outside LOOP. */
5494
ad074595 5495 if (init_addr != NULL_TREE || !loop_vinfo)
fb85abff 5496 {
5497 compute_in_loop = true;
5498 gcc_assert (alignment_support_scheme == dr_explicit_realign);
5499 }
5500
5501
5502 /* 2. Determine where to generate the extra vector load.
5503
5504 For the optimized realignment scheme, instead of generating two vector
5505 loads in each iteration, we generate a single extra vector load in the
5506 preheader of the loop, and in each iteration reuse the result of the
5507 vector load from the previous iteration. In case the memory access is in
5508 an inner-loop nested inside LOOP, which is now being vectorized using
5509 outer-loop vectorization, we need to determine whether this initial vector
5510 load should be generated at the preheader of the inner-loop, or can be
5511 generated at the preheader of LOOP. If the memory access has no evolution
5512 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5513 to be generated inside LOOP (in the preheader of the inner-loop). */
5514
5515 if (nested_in_vect_loop)
5516 {
5517 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5518 bool invariant_in_outerloop =
5519 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5520 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5521 }
5522 else
5523 loop_for_initial_load = loop;
5524 if (at_loop)
5525 *at_loop = loop_for_initial_load;
5526
ad074595 5527 if (loop_for_initial_load)
5528 pe = loop_preheader_edge (loop_for_initial_load);
5529
fb85abff 5530 /* 3. For the case of the optimized realignment, create the first vector
5531 load at the loop preheader. */
5532
5533 if (alignment_support_scheme == dr_explicit_realign_optimized)
5534 {
5535 /* Create msq_init = *(floor(p1)) in the loop preheader */
1a91d914 5536 gassign *new_stmt;
fb85abff 5537
5538 gcc_assert (!compute_in_loop);
fb85abff 5539 vec_dest = vect_create_destination_var (scalar_dest, vectype);
a73182ff 5540 ptr = vect_create_data_ref_ptr (stmt_info, vectype,
5541 loop_for_initial_load, NULL_TREE,
3c8b7bc7 5542 &init_addr, NULL, &inc, true);
23bab442 5543 if (TREE_CODE (ptr) == SSA_NAME)
5544 new_temp = copy_ssa_name (ptr);
5545 else
5546 new_temp = make_ssa_name (TREE_TYPE (ptr));
e092c20e 5547 poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
5548 tree type = TREE_TYPE (ptr);
e9cf809e 5549 new_stmt = gimple_build_assign
5550 (new_temp, BIT_AND_EXPR, ptr,
e092c20e 5551 fold_build2 (MINUS_EXPR, type,
5552 build_int_cst (type, 0),
5553 build_int_cst (type, align)));
86638c2e 5554 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5555 gcc_assert (!new_bb);
2cb9ef39 5556 data_ref
5557 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
5558 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
1c4c7e32 5559 vect_copy_ref_info (data_ref, DR_REF (dr));
fb85abff 5560 new_stmt = gimple_build_assign (vec_dest, data_ref);
5561 new_temp = make_ssa_name (vec_dest, new_stmt);
5562 gimple_assign_set_lhs (new_stmt, new_temp);
ad074595 5563 if (pe)
5564 {
5565 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5566 gcc_assert (!new_bb);
5567 }
5568 else
5569 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5570
fb85abff 5571 msq_init = gimple_assign_lhs (new_stmt);
5572 }
5573
5574 /* 4. Create realignment token using a target builtin, if available.
5575 It is done either inside the containing loop, or before LOOP (as
5576 determined above). */
5577
5578 if (targetm.vectorize.builtin_mask_for_load)
5579 {
1a91d914 5580 gcall *new_stmt;
fb85abff 5581 tree builtin_decl;
5582
5583 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
ad074595 5584 if (!init_addr)
fb85abff 5585 {
5586 /* Generate the INIT_ADDR computation outside LOOP. */
a73182ff 5587 init_addr = vect_create_addr_base_for_vector_ref (stmt_info, &stmts,
9e879814 5588 NULL_TREE);
ad074595 5589 if (loop)
5590 {
5591 pe = loop_preheader_edge (loop);
5592 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
5593 gcc_assert (!new_bb);
5594 }
5595 else
5596 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
fb85abff 5597 }
5598
5599 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5600 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
5601 vec_dest =
5602 vect_create_destination_var (scalar_dest,
5603 gimple_call_return_type (new_stmt));
5604 new_temp = make_ssa_name (vec_dest, new_stmt);
5605 gimple_call_set_lhs (new_stmt, new_temp);
5606
5607 if (compute_in_loop)
5608 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5609 else
5610 {
5611 /* Generate the misalignment computation outside LOOP. */
5612 pe = loop_preheader_edge (loop);
5613 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5614 gcc_assert (!new_bb);
5615 }
5616
5617 *realignment_token = gimple_call_lhs (new_stmt);
5618
5619 /* The result of the CALL_EXPR to this builtin is determined from
5620 the value of the parameter and no global variables are touched
5621 which makes the builtin a "const" function. Requiring the
5622 builtin to have the "const" attribute makes it unnecessary
5623 to call mark_call_clobbered. */
5624 gcc_assert (TREE_READONLY (builtin_decl));
5625 }
5626
5627 if (alignment_support_scheme == dr_explicit_realign)
5628 return msq;
5629
5630 gcc_assert (!compute_in_loop);
5631 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5632
5633
5634 /* 5. Create msq = phi <msq_init, lsq> in loop */
5635
5636 pe = loop_preheader_edge (containing_loop);
5637 vec_dest = vect_create_destination_var (scalar_dest, vectype);
f9e245b2 5638 msq = make_ssa_name (vec_dest);
fb85abff 5639 phi_stmt = create_phi_node (msq, containing_loop->header);
60d535d2 5640 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
fb85abff 5641
5642 return msq;
5643}
5644
5645
ee612634 5646/* Function vect_grouped_load_supported.
fb85abff 5647
bc691ae4 5648 COUNT is the size of the load group (the number of statements plus the
5649 number of gaps). SINGLE_ELEMENT_P is true if there is actually
5650 only one statement, with a gap of COUNT - 1.
5651
5652 Returns true if a suitable permute exists. */
fb85abff 5653
5654bool
bc691ae4 5655vect_grouped_load_supported (tree vectype, bool single_element_p,
5656 unsigned HOST_WIDE_INT count)
fb85abff 5657{
3754d046 5658 machine_mode mode = TYPE_MODE (vectype);
fb85abff 5659
bc691ae4 5660 /* If this is single-element interleaving with an element distance
5661 that leaves unused vector loads around punt - we at least create
5662 very sub-optimal code in that case (and blow up memory,
5663 see PR65518). */
f08ee65f 5664 if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
bc691ae4 5665 {
5666 if (dump_enabled_p ())
5667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5668 "single-element interleaving not supported "
5669 "for not adjacent vector loads\n");
5670 return false;
5671 }
5672
1e1bca71 5673 /* vect_permute_load_chain requires the group size to be equal to 3 or
5674 be a power of two. */
5675 if (count != 3 && exact_log2 (count) == -1)
481fc474 5676 {
6d8fb6cf 5677 if (dump_enabled_p ())
7bd765d4 5678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1e1bca71 5679 "the size of the group of accesses"
5680 " is not a power of 2 or not equal to 3\n");
481fc474 5681 return false;
5682 }
5683
42f6a6e8 5684 /* Check that the permutation is supported. */
5685 if (VECTOR_MODE_P (mode))
5686 {
ba7efd65 5687 unsigned int i, j;
1e1bca71 5688 if (count == 3)
42f6a6e8 5689 {
ba7efd65 5690 unsigned int nelt;
5691 if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
5692 {
5693 if (dump_enabled_p ())
5694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5695 "cannot handle groups of 3 loads for"
5696 " variable-length vectors\n");
5697 return false;
5698 }
5699
c3fa7fe9 5700 vec_perm_builder sel (nelt, nelt, 1);
5701 sel.quick_grow (nelt);
1957c019 5702 vec_perm_indices indices;
1e1bca71 5703 unsigned int k;
5704 for (k = 0; k < 3; k++)
5705 {
5706 for (i = 0; i < nelt; i++)
5707 if (3 * i + k < 2 * nelt)
5708 sel[i] = 3 * i + k;
5709 else
5710 sel[i] = 0;
1957c019 5711 indices.new_vector (sel, 2, nelt);
5712 if (!can_vec_perm_const_p (mode, indices))
1e1bca71 5713 {
5714 if (dump_enabled_p ())
5715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5716 "shuffle of 3 loads is not supported by"
5717 " target\n");
5c6f6a61 5718 return false;
1e1bca71 5719 }
5720 for (i = 0, j = 0; i < nelt; i++)
5721 if (3 * i + k < 2 * nelt)
5722 sel[i] = i;
5723 else
5724 sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
1957c019 5725 indices.new_vector (sel, 2, nelt);
5726 if (!can_vec_perm_const_p (mode, indices))
1e1bca71 5727 {
5728 if (dump_enabled_p ())
5729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5730 "shuffle of 3 loads is not supported by"
5731 " target\n");
5732 return false;
5733 }
5734 }
5735 return true;
5736 }
5737 else
5738 {
5739 /* If length is not equal to 3 then only power of 2 is supported. */
ac29ece2 5740 gcc_assert (pow2p_hwi (count));
ba7efd65 5741 poly_uint64 nelt = GET_MODE_NUNITS (mode);
1957c019 5742
c3fa7fe9 5743 /* The encoding has a single stepped pattern. */
5744 vec_perm_builder sel (nelt, 1, 3);
5745 sel.quick_grow (3);
5746 for (i = 0; i < 3; i++)
1e1bca71 5747 sel[i] = i * 2;
1957c019 5748 vec_perm_indices indices (sel, 2, nelt);
5749 if (can_vec_perm_const_p (mode, indices))
1e1bca71 5750 {
c3fa7fe9 5751 for (i = 0; i < 3; i++)
1e1bca71 5752 sel[i] = i * 2 + 1;
1957c019 5753 indices.new_vector (sel, 2, nelt);
5754 if (can_vec_perm_const_p (mode, indices))
1e1bca71 5755 return true;
5756 }
5757 }
42f6a6e8 5758 }
fb85abff 5759
6d8fb6cf 5760 if (dump_enabled_p ())
7bd765d4 5761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1e1bca71 5762 "extract even/odd not supported by target\n");
6620d7d7 5763 return false;
fb85abff 5764}
5765
2dd8e84c 5766/* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of
5767 type VECTYPE. MASKED_P says whether the masked form is needed. */
94b7b4dd 5768
5769bool
2dd8e84c 5770vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5771 bool masked_p)
94b7b4dd 5772{
2dd8e84c 5773 if (masked_p)
5774 return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
5775 vec_mask_load_lanes_optab,
5776 vectype, count);
5777 else
5778 return vect_lanes_optab_supported_p ("vec_load_lanes",
5779 vec_load_lanes_optab,
5780 vectype, count);
94b7b4dd 5781}
fb85abff 5782
5783/* Function vect_permute_load_chain.
5784
5785 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
1e1bca71 5786 a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
5787 the input data correctly. Return the final references for loads in
5788 RESULT_CHAIN.
fb85abff 5789
5790 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5791 The input is 4 vectors each containing 8 elements. We assign a number to each
5792 element, the input sequence is:
5793
5794 1st vec: 0 1 2 3 4 5 6 7
5795 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 5796 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 5797 4th vec: 24 25 26 27 28 29 30 31
5798
5799 The output sequence should be:
5800
5801 1st vec: 0 4 8 12 16 20 24 28
5802 2nd vec: 1 5 9 13 17 21 25 29
48e1416a 5803 3rd vec: 2 6 10 14 18 22 26 30
fb85abff 5804 4th vec: 3 7 11 15 19 23 27 31
5805
5806 i.e., the first output vector should contain the first elements of each
5807 interleaving group, etc.
5808
282bf14c 5809 We use extract_even/odd instructions to create such output. The input of
5810 each extract_even/odd operation is two vectors
48e1416a 5811 1st vec 2nd vec
5812 0 1 2 3 4 5 6 7
fb85abff 5813
282bf14c 5814 and the output is the vector of extracted even/odd elements. The output of
fb85abff 5815 extract_even will be: 0 2 4 6
5816 and of extract_odd: 1 3 5 7
5817
48e1416a 5818
282bf14c 5819 The permutation is done in log LENGTH stages. In each stage extract_even
5820 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
5821 their order. In our example,
fb85abff 5822
5823 E1: extract_even (1st vec, 2nd vec)
5824 E2: extract_odd (1st vec, 2nd vec)
5825 E3: extract_even (3rd vec, 4th vec)
5826 E4: extract_odd (3rd vec, 4th vec)
5827
5828 The output for the first stage will be:
5829
5830 E1: 0 2 4 6 8 10 12 14
5831 E2: 1 3 5 7 9 11 13 15
48e1416a 5832 E3: 16 18 20 22 24 26 28 30
fb85abff 5833 E4: 17 19 21 23 25 27 29 31
5834
5835 In order to proceed and create the correct sequence for the next stage (or
48e1416a 5836 for the correct output, if the second stage is the last one, as in our
5837 example), we first put the output of extract_even operation and then the
fb85abff 5838 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5839 The input for the second stage is:
5840
5841 1st vec (E1): 0 2 4 6 8 10 12 14
48e1416a 5842 2nd vec (E3): 16 18 20 22 24 26 28 30
5843 3rd vec (E2): 1 3 5 7 9 11 13 15
fb85abff 5844 4th vec (E4): 17 19 21 23 25 27 29 31
5845
5846 The output of the second stage:
5847
5848 E1: 0 4 8 12 16 20 24 28
5849 E2: 2 6 10 14 18 22 26 30
5850 E3: 1 5 9 13 17 21 25 29
5851 E4: 3 7 11 15 19 23 27 31
5852
5853 And RESULT_CHAIN after reordering:
5854
5855 1st vec (E1): 0 4 8 12 16 20 24 28
5856 2nd vec (E3): 1 5 9 13 17 21 25 29
48e1416a 5857 3rd vec (E2): 2 6 10 14 18 22 26 30
fb85abff 5858 4th vec (E4): 3 7 11 15 19 23 27 31. */
5859
481fc474 5860static void
f1f41a6c 5861vect_permute_load_chain (vec<tree> dr_chain,
48e1416a 5862 unsigned int length,
ecc42a77 5863 stmt_vec_info stmt_info,
fb85abff 5864 gimple_stmt_iterator *gsi,
f1f41a6c 5865 vec<tree> *result_chain)
fb85abff 5866{
03d37e4e 5867 tree data_ref, first_vect, second_vect;
42f6a6e8 5868 tree perm_mask_even, perm_mask_odd;
1e1bca71 5869 tree perm3_mask_low, perm3_mask_high;
42acab1c 5870 gimple *perm_stmt;
1c2fef9a 5871 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
42f6a6e8 5872 unsigned int i, j, log_length = exact_log2 (length);
282dc861 5873
1648f21f 5874 result_chain->quick_grow (length);
5875 memcpy (result_chain->address (), dr_chain.address (),
5876 length * sizeof (tree));
42f6a6e8 5877
1e1bca71 5878 if (length == 3)
fb85abff 5879 {
8b221927 5880 /* vect_grouped_load_supported ensures that this is constant. */
f08ee65f 5881 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
1e1bca71 5882 unsigned int k;
fb85abff 5883
c3fa7fe9 5884 vec_perm_builder sel (nelt, nelt, 1);
5885 sel.quick_grow (nelt);
1957c019 5886 vec_perm_indices indices;
1e1bca71 5887 for (k = 0; k < 3; k++)
5888 {
5889 for (i = 0; i < nelt; i++)
5890 if (3 * i + k < 2 * nelt)
5891 sel[i] = 3 * i + k;
5892 else
5893 sel[i] = 0;
1957c019 5894 indices.new_vector (sel, 2, nelt);
5895 perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
1e1bca71 5896
5897 for (i = 0, j = 0; i < nelt; i++)
5898 if (3 * i + k < 2 * nelt)
5899 sel[i] = i;
5900 else
5901 sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
1957c019 5902 indices.new_vector (sel, 2, nelt);
5903 perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
1e1bca71 5904
5905 first_vect = dr_chain[0];
5906 second_vect = dr_chain[1];
5907
5908 /* Create interleaving stmt (low part of):
5909 low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5910 ...}> */
321d85d9 5911 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
e9cf809e 5912 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
5913 second_vect, perm3_mask_low);
a73182ff 5914 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
48e1416a 5915
1e1bca71 5916 /* Create interleaving stmt (high part of):
5917 high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5918 ...}> */
5919 first_vect = data_ref;
5920 second_vect = dr_chain[2];
321d85d9 5921 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
e9cf809e 5922 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
5923 second_vect, perm3_mask_high);
a73182ff 5924 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
1e1bca71 5925 (*result_chain)[k] = data_ref;
fb85abff 5926 }
fb85abff 5927 }
1e1bca71 5928 else
5929 {
5930 /* If length is not equal to 3 then only power of 2 is supported. */
ac29ece2 5931 gcc_assert (pow2p_hwi (length));
1e1bca71 5932
c3fa7fe9 5933 /* The encoding has a single stepped pattern. */
f08ee65f 5934 poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
c3fa7fe9 5935 vec_perm_builder sel (nelt, 1, 3);
5936 sel.quick_grow (3);
5937 for (i = 0; i < 3; ++i)
1e1bca71 5938 sel[i] = i * 2;
1957c019 5939 vec_perm_indices indices (sel, 2, nelt);
5940 perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
1e1bca71 5941
c3fa7fe9 5942 for (i = 0; i < 3; ++i)
1e1bca71 5943 sel[i] = i * 2 + 1;
1957c019 5944 indices.new_vector (sel, 2, nelt);
5945 perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
fb85abff 5946
1e1bca71 5947 for (i = 0; i < log_length; i++)
5948 {
5949 for (j = 0; j < length; j += 2)
5950 {
5951 first_vect = dr_chain[j];
5952 second_vect = dr_chain[j+1];
5953
5954 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5955 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
e9cf809e 5956 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5957 first_vect, second_vect,
5958 perm_mask_even);
a73182ff 5959 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
1e1bca71 5960 (*result_chain)[j/2] = data_ref;
5961
5962 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5963 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
e9cf809e 5964 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5965 first_vect, second_vect,
5966 perm_mask_odd);
a73182ff 5967 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
1e1bca71 5968 (*result_chain)[j/2+length/2] = data_ref;
5969 }
5970 memcpy (dr_chain.address (), result_chain->address (),
5971 length * sizeof (tree));
5972 }
5973 }
5974}
fb85abff 5975
926f7a02 5976/* Function vect_shift_permute_load_chain.
5977
5978 Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
5979 sequence of stmts to reorder the input data accordingly.
5980 Return the final references for loads in RESULT_CHAIN.
5981 Return true if successed, false otherwise.
5982
5983 E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
5984 The input is 3 vectors each containing 8 elements. We assign a
5985 number to each element, the input sequence is:
5986
5987 1st vec: 0 1 2 3 4 5 6 7
5988 2nd vec: 8 9 10 11 12 13 14 15
5989 3rd vec: 16 17 18 19 20 21 22 23
5990
5991 The output sequence should be:
5992
5993 1st vec: 0 3 6 9 12 15 18 21
5994 2nd vec: 1 4 7 10 13 16 19 22
5995 3rd vec: 2 5 8 11 14 17 20 23
5996
5997 We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
5998
5999 First we shuffle all 3 vectors to get correct elements order:
6000
6001 1st vec: ( 0 3 6) ( 1 4 7) ( 2 5)
6002 2nd vec: ( 8 11 14) ( 9 12 15) (10 13)
6003 3rd vec: (16 19 22) (17 20 23) (18 21)
6004
6005 Next we unite and shift vector 3 times:
6006
6007 1st step:
6008 shift right by 6 the concatenation of:
6009 "1st vec" and "2nd vec"
6010 ( 0 3 6) ( 1 4 7) |( 2 5) _ ( 8 11 14) ( 9 12 15)| (10 13)
6011 "2nd vec" and "3rd vec"
6012 ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
6013 "3rd vec" and "1st vec"
6014 (16 19 22) (17 20 23) |(18 21) _ ( 0 3 6) ( 1 4 7)| ( 2 5)
6015 | New vectors |
6016
6017 So that now new vectors are:
6018
6019 1st vec: ( 2 5) ( 8 11 14) ( 9 12 15)
6020 2nd vec: (10 13) (16 19 22) (17 20 23)
6021 3rd vec: (18 21) ( 0 3 6) ( 1 4 7)
6022
6023 2nd step:
6024 shift right by 5 the concatenation of:
6025 "1st vec" and "3rd vec"
6026 ( 2 5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0 3 6)| ( 1 4 7)
6027 "2nd vec" and "1st vec"
6028 (10 13) (16 19 22) |(17 20 23) _ ( 2 5) ( 8 11 14)| ( 9 12 15)
6029 "3rd vec" and "2nd vec"
6030 (18 21) ( 0 3 6) |( 1 4 7) _ (10 13) (16 19 22)| (17 20 23)
6031 | New vectors |
6032
6033 So that now new vectors are:
6034
6035 1st vec: ( 9 12 15) (18 21) ( 0 3 6)
6036 2nd vec: (17 20 23) ( 2 5) ( 8 11 14)
6037 3rd vec: ( 1 4 7) (10 13) (16 19 22) READY
6038
6039 3rd step:
6040 shift right by 5 the concatenation of:
6041 "1st vec" and "1st vec"
6042 ( 9 12 15) (18 21) |( 0 3 6) _ ( 9 12 15) (18 21)| ( 0 3 6)
6043 shift right by 3 the concatenation of:
6044 "2nd vec" and "2nd vec"
6045 (17 20 23) |( 2 5) ( 8 11 14) _ (17 20 23)| ( 2 5) ( 8 11 14)
6046 | New vectors |
6047
6048 So that now all vectors are READY:
6049 1st vec: ( 0 3 6) ( 9 12 15) (18 21)
6050 2nd vec: ( 2 5) ( 8 11 14) (17 20 23)
6051 3rd vec: ( 1 4 7) (10 13) (16 19 22)
6052
6053 This algorithm is faster than one in vect_permute_load_chain if:
6054 1. "shift of a concatination" is faster than general permutation.
6055 This is usually so.
6056 2. The TARGET machine can't execute vector instructions in parallel.
6057 This is because each step of the algorithm depends on previous.
6058 The algorithm in vect_permute_load_chain is much more parallel.
6059
6060 The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
6061*/
6062
6063static bool
6064vect_shift_permute_load_chain (vec<tree> dr_chain,
6065 unsigned int length,
ecc42a77 6066 stmt_vec_info stmt_info,
926f7a02 6067 gimple_stmt_iterator *gsi,
6068 vec<tree> *result_chain)
6069{
6070 tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
6071 tree perm2_mask1, perm2_mask2, perm3_mask;
6072 tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
42acab1c 6073 gimple *perm_stmt;
926f7a02 6074
1c2fef9a 6075 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926f7a02 6076 unsigned int i;
926f7a02 6077 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6078
f08ee65f 6079 unsigned HOST_WIDE_INT nelt, vf;
6080 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
6081 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
d75596cd 6082 /* Not supported for variable-length vectors. */
6083 return false;
6084
1957c019 6085 vec_perm_builder sel (nelt, nelt, 1);
282dc861 6086 sel.quick_grow (nelt);
6087
926f7a02 6088 result_chain->quick_grow (length);
6089 memcpy (result_chain->address (), dr_chain.address (),
6090 length * sizeof (tree));
6091
d75596cd 6092 if (pow2p_hwi (length) && vf > 4)
926f7a02 6093 {
2cc1223e 6094 unsigned int j, log_length = exact_log2 (length);
926f7a02 6095 for (i = 0; i < nelt / 2; ++i)
6096 sel[i] = i * 2;
6097 for (i = 0; i < nelt / 2; ++i)
6098 sel[nelt / 2 + i] = i * 2 + 1;
1957c019 6099 vec_perm_indices indices (sel, 2, nelt);
6100 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6101 {
6102 if (dump_enabled_p ())
6103 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6104 "shuffle of 2 fields structure is not \
6105 supported by target\n");
6106 return false;
6107 }
1957c019 6108 perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6109
6110 for (i = 0; i < nelt / 2; ++i)
6111 sel[i] = i * 2 + 1;
6112 for (i = 0; i < nelt / 2; ++i)
6113 sel[nelt / 2 + i] = i * 2;
1957c019 6114 indices.new_vector (sel, 2, nelt);
6115 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6116 {
6117 if (dump_enabled_p ())
6118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6119 "shuffle of 2 fields structure is not \
6120 supported by target\n");
6121 return false;
6122 }
1957c019 6123 perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6124
6125 /* Generating permutation constant to shift all elements.
6126 For vector length 8 it is {4 5 6 7 8 9 10 11}. */
6127 for (i = 0; i < nelt; i++)
6128 sel[i] = nelt / 2 + i;
1957c019 6129 indices.new_vector (sel, 2, nelt);
6130 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6131 {
6132 if (dump_enabled_p ())
6133 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6134 "shift permutation is not supported by target\n");
6135 return false;
6136 }
1957c019 6137 shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6138
6139 /* Generating permutation constant to select vector from 2.
6140 For vector length 8 it is {0 1 2 3 12 13 14 15}. */
6141 for (i = 0; i < nelt / 2; i++)
6142 sel[i] = i;
6143 for (i = nelt / 2; i < nelt; i++)
6144 sel[i] = nelt + i;
1957c019 6145 indices.new_vector (sel, 2, nelt);
6146 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6147 {
6148 if (dump_enabled_p ())
6149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6150 "select is not supported by target\n");
6151 return false;
6152 }
1957c019 6153 select_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6154
2cc1223e 6155 for (i = 0; i < log_length; i++)
6156 {
6157 for (j = 0; j < length; j += 2)
6158 {
6159 first_vect = dr_chain[j];
6160 second_vect = dr_chain[j + 1];
6161
6162 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
e9cf809e 6163 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6164 first_vect, first_vect,
6165 perm2_mask1);
a73182ff 6166 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
2cc1223e 6167 vect[0] = data_ref;
6168
6169 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
e9cf809e 6170 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6171 second_vect, second_vect,
6172 perm2_mask2);
a73182ff 6173 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
2cc1223e 6174 vect[1] = data_ref;
926f7a02 6175
2cc1223e 6176 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
e9cf809e 6177 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6178 vect[0], vect[1], shift1_mask);
a73182ff 6179 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
2cc1223e 6180 (*result_chain)[j/2 + length/2] = data_ref;
6181
6182 data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
e9cf809e 6183 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6184 vect[0], vect[1], select_mask);
a73182ff 6185 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
2cc1223e 6186 (*result_chain)[j/2] = data_ref;
6187 }
6188 memcpy (dr_chain.address (), result_chain->address (),
6189 length * sizeof (tree));
6190 }
926f7a02 6191 return true;
6192 }
d75596cd 6193 if (length == 3 && vf > 2)
926f7a02 6194 {
6195 unsigned int k = 0, l = 0;
6196
6197 /* Generating permutation constant to get all elements in rigth order.
6198 For vector length 8 it is {0 3 6 1 4 7 2 5}. */
6199 for (i = 0; i < nelt; i++)
6200 {
6201 if (3 * k + (l % 3) >= nelt)
6202 {
6203 k = 0;
6204 l += (3 - (nelt % 3));
6205 }
6206 sel[i] = 3 * k + (l % 3);
6207 k++;
6208 }
1957c019 6209 vec_perm_indices indices (sel, 2, nelt);
6210 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6211 {
6212 if (dump_enabled_p ())
6213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6214 "shuffle of 3 fields structure is not \
6215 supported by target\n");
6216 return false;
6217 }
1957c019 6218 perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6219
6220 /* Generating permutation constant to shift all elements.
6221 For vector length 8 it is {6 7 8 9 10 11 12 13}. */
6222 for (i = 0; i < nelt; i++)
6223 sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
1957c019 6224 indices.new_vector (sel, 2, nelt);
6225 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6226 {
6227 if (dump_enabled_p ())
6228 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6229 "shift permutation is not supported by target\n");
6230 return false;
6231 }
1957c019 6232 shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6233
6234 /* Generating permutation constant to shift all elements.
6235 For vector length 8 it is {5 6 7 8 9 10 11 12}. */
6236 for (i = 0; i < nelt; i++)
6237 sel[i] = 2 * (nelt / 3) + 1 + i;
1957c019 6238 indices.new_vector (sel, 2, nelt);
6239 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6240 {
6241 if (dump_enabled_p ())
6242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6243 "shift permutation is not supported by target\n");
6244 return false;
6245 }
1957c019 6246 shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6247
6248 /* Generating permutation constant to shift all elements.
6249 For vector length 8 it is {3 4 5 6 7 8 9 10}. */
6250 for (i = 0; i < nelt; i++)
6251 sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
1957c019 6252 indices.new_vector (sel, 2, nelt);
6253 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6254 {
6255 if (dump_enabled_p ())
6256 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6257 "shift permutation is not supported by target\n");
6258 return false;
6259 }
1957c019 6260 shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6261
6262 /* Generating permutation constant to shift all elements.
6263 For vector length 8 it is {5 6 7 8 9 10 11 12}. */
6264 for (i = 0; i < nelt; i++)
6265 sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
1957c019 6266 indices.new_vector (sel, 2, nelt);
6267 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6268 {
6269 if (dump_enabled_p ())
6270 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6271 "shift permutation is not supported by target\n");
6272 return false;
6273 }
1957c019 6274 shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6275
6276 for (k = 0; k < 3; k++)
6277 {
321d85d9 6278 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
e9cf809e 6279 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6280 dr_chain[k], dr_chain[k],
6281 perm3_mask);
a73182ff 6282 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6283 vect[k] = data_ref;
6284 }
6285
6286 for (k = 0; k < 3; k++)
6287 {
6288 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
e9cf809e 6289 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6290 vect[k % 3], vect[(k + 1) % 3],
6291 shift1_mask);
a73182ff 6292 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6293 vect_shift[k] = data_ref;
6294 }
6295
6296 for (k = 0; k < 3; k++)
6297 {
6298 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
e9cf809e 6299 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6300 vect_shift[(4 - k) % 3],
6301 vect_shift[(3 - k) % 3],
6302 shift2_mask);
a73182ff 6303 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6304 vect[k] = data_ref;
6305 }
6306
6307 (*result_chain)[3 - (nelt % 3)] = vect[2];
6308
6309 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
e9cf809e 6310 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
6311 vect[0], shift3_mask);
a73182ff 6312 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6313 (*result_chain)[nelt % 3] = data_ref;
6314
6315 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
e9cf809e 6316 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
6317 vect[1], shift4_mask);
a73182ff 6318 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6319 (*result_chain)[0] = data_ref;
6320 return true;
6321 }
6322 return false;
6323}
6324
ee612634 6325/* Function vect_transform_grouped_load.
fb85abff 6326
6327 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
6328 to perform their permutation and ascribe the result vectorized statements to
6329 the scalar statements.
6330*/
6331
481fc474 6332void
ecc42a77 6333vect_transform_grouped_load (stmt_vec_info stmt_info, vec<tree> dr_chain,
6334 int size, gimple_stmt_iterator *gsi)
fb85abff 6335{
3754d046 6336 machine_mode mode;
1e094109 6337 vec<tree> result_chain = vNULL;
fb85abff 6338
48e1416a 6339 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
6340 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
fb85abff 6341 vectors, that are ready for vector computation. */
f1f41a6c 6342 result_chain.create (size);
926f7a02 6343
6344 /* If reassociation width for vector type is 2 or greater target machine can
6345 execute 2 or more vector instructions in parallel. Otherwise try to
6346 get chain for loads group using vect_shift_permute_load_chain. */
1c2fef9a 6347 mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
926f7a02 6348 if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
ac29ece2 6349 || pow2p_hwi (size)
a73182ff 6350 || !vect_shift_permute_load_chain (dr_chain, size, stmt_info,
926f7a02 6351 gsi, &result_chain))
a73182ff 6352 vect_permute_load_chain (dr_chain, size, stmt_info, gsi, &result_chain);
6353 vect_record_grouped_load_vectors (stmt_info, result_chain);
f1f41a6c 6354 result_chain.release ();
94b7b4dd 6355}
6356
ee612634 6357/* RESULT_CHAIN contains the output of a group of grouped loads that were
ecc42a77 6358 generated as part of the vectorization of STMT_INFO. Assign the statement
94b7b4dd 6359 for each vector to the associated scalar statement. */
6360
6361void
ecc42a77 6362vect_record_grouped_load_vectors (stmt_vec_info stmt_info,
6363 vec<tree> result_chain)
94b7b4dd 6364{
aebdbd31 6365 vec_info *vinfo = stmt_info->vinfo;
cd24aa3c 6366 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
94b7b4dd 6367 unsigned int i, gap_count;
6368 tree tmp_data_ref;
fb85abff 6369
48e1416a 6370 /* Put a permuted data-ref in the VECTORIZED_STMT field.
6371 Since we scan the chain starting from it's first node, their order
fb85abff 6372 corresponds the order of data-refs in RESULT_CHAIN. */
cd24aa3c 6373 stmt_vec_info next_stmt_info = first_stmt_info;
fb85abff 6374 gap_count = 1;
f1f41a6c 6375 FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
fb85abff 6376 {
cd24aa3c 6377 if (!next_stmt_info)
fb85abff 6378 break;
6379
282bf14c 6380 /* Skip the gaps. Loads created for the gaps will be removed by dead
6381 code elimination pass later. No need to check for the first stmt in
fb85abff 6382 the group, since it always exists.
e1009321 6383 DR_GROUP_GAP is the number of steps in elements from the previous
6384 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
282bf14c 6385 correspond to the gaps. */
cd24aa3c 6386 if (next_stmt_info != first_stmt_info
6387 && gap_count < DR_GROUP_GAP (next_stmt_info))
6883ce83 6388 {
6389 gap_count++;
6390 continue;
6391 }
fb85abff 6392
6883ce83 6393 /* ??? The following needs cleanup after the removal of
6394 DR_GROUP_SAME_DR_STMT. */
6395 if (next_stmt_info)
fb85abff 6396 {
aebdbd31 6397 stmt_vec_info new_stmt_info = vinfo->lookup_def (tmp_data_ref);
fb85abff 6398 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
6399 copies, and we put the new vector statement in the first available
6400 RELATED_STMT. */
cd24aa3c 6401 if (!STMT_VINFO_VEC_STMT (next_stmt_info))
6402 STMT_VINFO_VEC_STMT (next_stmt_info) = new_stmt_info;
fb85abff 6403 else
6404 {
6883ce83 6405 stmt_vec_info prev_stmt_info
6406 = STMT_VINFO_VEC_STMT (next_stmt_info);
6407 stmt_vec_info rel_stmt_info
6408 = STMT_VINFO_RELATED_STMT (prev_stmt_info);
6409 while (rel_stmt_info)
6410 {
6411 prev_stmt_info = rel_stmt_info;
6412 rel_stmt_info = STMT_VINFO_RELATED_STMT (rel_stmt_info);
6413 }
fb85abff 6414
6883ce83 6415 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
fb85abff 6416 }
6417
cd24aa3c 6418 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
fb85abff 6419 gap_count = 1;
fb85abff 6420 }
6421 }
fb85abff 6422}
6423
6424/* Function vect_force_dr_alignment_p.
6425
6426 Returns whether the alignment of a DECL can be forced to be aligned
6427 on ALIGNMENT bit boundary. */
6428
48e1416a 6429bool
e092c20e 6430vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
fb85abff 6431{
53e9c5c4 6432 if (!VAR_P (decl))
fb85abff 6433 return false;
6434
331d5983 6435 if (decl_in_symtab_p (decl)
6436 && !symtab_node::get (decl)->can_increase_alignment_p ())
8cab13cf 6437 return false;
6438
fb85abff 6439 if (TREE_STATIC (decl))
c34f18f1 6440 return (known_le (alignment,
6441 (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
fb85abff 6442 else
e092c20e 6443 return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
fb85abff 6444}
6445
fb85abff 6446
abc9513d 6447/* Return whether the data reference DR_INFO is supported with respect to its
0822b158 6448 alignment.
6449 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
6450 it is aligned, i.e., check if it is possible to vectorize it with different
fb85abff 6451 alignment. */
6452
6453enum dr_alignment_support
abc9513d 6454vect_supportable_dr_alignment (dr_vec_info *dr_info,
0822b158 6455 bool check_aligned_accesses)
fb85abff 6456{
abc9513d 6457 data_reference *dr = dr_info->dr;
6458 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 6459 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3754d046 6460 machine_mode mode = TYPE_MODE (vectype);
37545e54 6461 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2e966e2a 6462 class loop *vect_loop = NULL;
37545e54 6463 bool nested_in_vect_loop = false;
fb85abff 6464
abc9513d 6465 if (aligned_access_p (dr_info) && !check_aligned_accesses)
fb85abff 6466 return dr_aligned;
6467
c71d3c24 6468 /* For now assume all conditional loads/stores support unaligned
6469 access without any special code. */
0219dc42 6470 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6471 if (gimple_call_internal_p (stmt)
6472 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
6473 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
6474 return dr_unaligned_supported;
c71d3c24 6475
ad074595 6476 if (loop_vinfo)
6477 {
6478 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
0219dc42 6479 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
ad074595 6480 }
37545e54 6481
fb85abff 6482 /* Possibly unaligned access. */
6483
6484 /* We can choose between using the implicit realignment scheme (generating
6485 a misaligned_move stmt) and the explicit realignment scheme (generating
282bf14c 6486 aligned loads with a REALIGN_LOAD). There are two variants to the
6487 explicit realignment scheme: optimized, and unoptimized.
fb85abff 6488 We can optimize the realignment only if the step between consecutive
6489 vector loads is equal to the vector size. Since the vector memory
6490 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
6491 is guaranteed that the misalignment amount remains the same throughout the
6492 execution of the vectorized loop. Therefore, we can create the
6493 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
6494 at the loop preheader.
6495
6496 However, in the case of outer-loop vectorization, when vectorizing a
6497 memory access in the inner-loop nested within the LOOP that is now being
6498 vectorized, while it is guaranteed that the misalignment of the
6499 vectorized memory access will remain the same in different outer-loop
6500 iterations, it is *not* guaranteed that is will remain the same throughout
6501 the execution of the inner-loop. This is because the inner-loop advances
6502 with the original scalar step (and not in steps of VS). If the inner-loop
6503 step happens to be a multiple of VS, then the misalignment remains fixed
6504 and we can use the optimized realignment scheme. For example:
6505
6506 for (i=0; i<N; i++)
6507 for (j=0; j<M; j++)
6508 s += a[i+j];
6509
6510 When vectorizing the i-loop in the above example, the step between
6511 consecutive vector loads is 1, and so the misalignment does not remain
6512 fixed across the execution of the inner-loop, and the realignment cannot
6513 be optimized (as illustrated in the following pseudo vectorized loop):
6514
6515 for (i=0; i<N; i+=4)
6516 for (j=0; j<M; j++){
6517 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
6518 // when j is {0,1,2,3,4,5,6,7,...} respectively.
6519 // (assuming that we start from an aligned address).
6520 }
6521
6522 We therefore have to use the unoptimized realignment scheme:
6523
6524 for (i=0; i<N; i+=4)
6525 for (j=k; j<M; j+=4)
6526 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
6527 // that the misalignment of the initial address is
6528 // 0).
6529
6530 The loop can then be vectorized as follows:
6531
6532 for (k=0; k<4; k++){
6533 rt = get_realignment_token (&vp[k]);
6534 for (i=0; i<N; i+=4){
6535 v1 = vp[i+k];
6536 for (j=k; j<M; j+=4){
6537 v2 = vp[i+j+VS-1];
6538 va = REALIGN_LOAD <v1,v2,rt>;
6539 vs += va;
6540 v1 = v2;
6541 }
6542 }
6543 } */
6544
6545 if (DR_IS_READ (dr))
6546 {
c6b19c5f 6547 bool is_packed = false;
6548 tree type = (TREE_TYPE (DR_REF (dr)));
6549
d6bf3b14 6550 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
fb85abff 6551 && (!targetm.vectorize.builtin_mask_for_load
6552 || targetm.vectorize.builtin_mask_for_load ()))
6553 {
6554 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ca1a4077 6555
6556 /* If we are doing SLP then the accesses need not have the
6557 same alignment, instead it depends on the SLP group size. */
6558 if (loop_vinfo
6559 && STMT_SLP_TYPE (stmt_info)
d75596cd 6560 && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
cd24aa3c 6561 * (DR_GROUP_SIZE
6562 (DR_GROUP_FIRST_ELEMENT (stmt_info))),
d75596cd 6563 TYPE_VECTOR_SUBPARTS (vectype)))
ca1a4077 6564 ;
6565 else if (!loop_vinfo
6566 || (nested_in_vect_loop
52acb7ae 6567 && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
6568 GET_MODE_SIZE (TYPE_MODE (vectype)))))
fb85abff 6569 return dr_explicit_realign;
6570 else
6571 return dr_explicit_realign_optimized;
6572 }
abc9513d 6573 if (!known_alignment_for_access_p (dr_info))
cfa724cf 6574 is_packed = not_size_aligned (DR_REF (dr));
48e1416a 6575
33a82fb9 6576 if (targetm.vectorize.support_vector_misalignment
abc9513d 6577 (mode, type, DR_MISALIGNMENT (dr_info), is_packed))
fb85abff 6578 /* Can't software pipeline the loads, but can at least do them. */
6579 return dr_unaligned_supported;
6580 }
c6b19c5f 6581 else
6582 {
6583 bool is_packed = false;
6584 tree type = (TREE_TYPE (DR_REF (dr)));
fb85abff 6585
abc9513d 6586 if (!known_alignment_for_access_p (dr_info))
cfa724cf 6587 is_packed = not_size_aligned (DR_REF (dr));
48e1416a 6588
33a82fb9 6589 if (targetm.vectorize.support_vector_misalignment
abc9513d 6590 (mode, type, DR_MISALIGNMENT (dr_info), is_packed))
c6b19c5f 6591 return dr_unaligned_supported;
6592 }
48e1416a 6593
fb85abff 6594 /* Unsupported. */
6595 return dr_unaligned_unsupported;
6596}