]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-data-refs.c
PR c++/89705 - ICE with reference binding with conversion function.
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
CommitLineData
48e1416a 1/* Data References Analysis and Manipulation Utilities for Vectorization.
fbd26352 2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
48e1416a 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
fb85abff 4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
9ef16211 25#include "backend.h"
7c29e30e 26#include "target.h"
27#include "rtl.h"
fb85abff 28#include "tree.h"
9ef16211 29#include "gimple.h"
7c29e30e 30#include "predict.h"
ad7b10a2 31#include "memmodel.h"
7c29e30e 32#include "tm_p.h"
9ef16211 33#include "ssa.h"
7c29e30e 34#include "optabs-tree.h"
35#include "cgraph.h"
7c29e30e 36#include "dumpfile.h"
9ef16211 37#include "alias.h"
b20a8bb4 38#include "fold-const.h"
9ed99284 39#include "stor-layout.h"
bc61cadb 40#include "tree-eh.h"
a8783bee 41#include "gimplify.h"
dcf1a1ec 42#include "gimple-iterator.h"
e795d6e1 43#include "gimplify-me.h"
05d9c18a 44#include "tree-ssa-loop-ivopts.h"
45#include "tree-ssa-loop-manip.h"
073c1fd5 46#include "tree-ssa-loop.h"
fb85abff 47#include "cfgloop.h"
fb85abff 48#include "tree-scalar-evolution.h"
49#include "tree-vectorizer.h"
8e3cb73b 50#include "expr.h"
f7715905 51#include "builtins.h"
0d8001a7 52#include "params.h"
a5456a6d 53#include "tree-cfg.h"
f68a7726 54#include "tree-hash-traits.h"
d37760c5 55#include "vec-perm-indices.h"
1619606c 56#include "internal-fn.h"
fb85abff 57
94b7b4dd 58/* Return true if load- or store-lanes optab OPTAB is implemented for
59 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
60
61static bool
62vect_lanes_optab_supported_p (const char *name, convert_optab optab,
63 tree vectype, unsigned HOST_WIDE_INT count)
64{
30d26b1c 65 machine_mode mode, array_mode;
94b7b4dd 66 bool limit_p;
67
68 mode = TYPE_MODE (vectype);
30d26b1c 69 if (!targetm.array_mode (mode, count).exists (&array_mode))
94b7b4dd 70 {
30d26b1c 71 poly_uint64 bits = count * GET_MODE_BITSIZE (mode);
72 limit_p = !targetm.array_mode_supported_p (mode, count);
73 if (!int_mode_for_size (bits, limit_p).exists (&array_mode))
74 {
75 if (dump_enabled_p ())
76 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
bffe1cb4 77 "no array mode for %s[%wu]\n",
30d26b1c 78 GET_MODE_NAME (mode), count);
79 return false;
80 }
94b7b4dd 81 }
82
83 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
84 {
6d8fb6cf 85 if (dump_enabled_p ())
7bd765d4 86 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 87 "cannot use %s<%s><%s>\n", name,
7bd765d4 88 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
94b7b4dd 89 return false;
90 }
91
6d8fb6cf 92 if (dump_enabled_p ())
7bd765d4 93 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 94 "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
7bd765d4 95 GET_MODE_NAME (mode));
94b7b4dd 96
97 return true;
98}
99
100
ecc42a77 101/* Return the smallest scalar part of STMT_INFO.
282bf14c 102 This is used to determine the vectype of the stmt. We generally set the
103 vectype according to the type of the result (lhs). For stmts whose
fb85abff 104 result-type is different than the type of the arguments (e.g., demotion,
48e1416a 105 promotion), vectype will be reset appropriately (later). Note that we have
fb85abff 106 to visit the smallest datatype in this function, because that determines the
282bf14c 107 VF. If the smallest datatype in the loop is present only as the rhs of a
fb85abff 108 promotion operation - we'd miss it.
109 Such a case, where a variable of this datatype does not appear in the lhs
110 anywhere in the loop, can only occur if it's an invariant: e.g.:
48e1416a 111 'int_x = (int) short_inv', which we'd expect to have been optimized away by
282bf14c 112 invariant motion. However, we cannot rely on invariant motion to always
113 take invariants out of the loop, and so in the case of promotion we also
114 have to check the rhs.
fb85abff 115 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
116 types. */
117
118tree
ecc42a77 119vect_get_smallest_scalar_type (stmt_vec_info stmt_info,
120 HOST_WIDE_INT *lhs_size_unit,
121 HOST_WIDE_INT *rhs_size_unit)
fb85abff 122{
ecc42a77 123 tree scalar_type = gimple_expr_type (stmt_info->stmt);
fb85abff 124 HOST_WIDE_INT lhs, rhs;
125
0b86fa32 126 /* During the analysis phase, this function is called on arbitrary
127 statements that might not have scalar results. */
128 if (!tree_fits_uhwi_p (TYPE_SIZE_UNIT (scalar_type)))
129 return scalar_type;
130
f9ae6f95 131 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
fb85abff 132
ecc42a77 133 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
5b4b7bcc 134 if (assign
135 && (gimple_assign_cast_p (assign)
136 || gimple_assign_rhs_code (assign) == DOT_PROD_EXPR
137 || gimple_assign_rhs_code (assign) == WIDEN_SUM_EXPR
138 || gimple_assign_rhs_code (assign) == WIDEN_MULT_EXPR
139 || gimple_assign_rhs_code (assign) == WIDEN_LSHIFT_EXPR
140 || gimple_assign_rhs_code (assign) == FLOAT_EXPR))
fb85abff 141 {
5b4b7bcc 142 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (assign));
fb85abff 143
f9ae6f95 144 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
fb85abff 145 if (rhs < lhs)
146 scalar_type = rhs_type;
147 }
48e1416a 148
149 *lhs_size_unit = lhs;
fb85abff 150 *rhs_size_unit = rhs;
151 return scalar_type;
152}
153
154
fb85abff 155/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
156 tested at run-time. Return TRUE if DDR was successfully inserted.
157 Return false if versioning is not supported. */
158
ed9370cc 159static opt_result
fb85abff 160vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
161{
162 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
163
164 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
ed9370cc 165 return opt_result::failure_at (vect_location,
166 "will not create alias checks, as"
167 " --param vect-max-version-for-alias-checks"
168 " == 0\n");
fb85abff 169
ed9370cc 170 opt_result res
171 = runtime_alias_check_p (ddr, loop,
172 optimize_loop_nest_for_speed_p (loop));
173 if (!res)
174 return res;
f634c3e9 175
f1f41a6c 176 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
ed9370cc 177 return opt_result::success ();
fb85abff 178}
179
e85b4a5e 180/* Record that loop LOOP_VINFO needs to check that VALUE is nonzero. */
181
182static void
183vect_check_nonzero_value (loop_vec_info loop_vinfo, tree value)
184{
185 vec<tree> checks = LOOP_VINFO_CHECK_NONZERO (loop_vinfo);
186 for (unsigned int i = 0; i < checks.length(); ++i)
187 if (checks[i] == value)
188 return;
189
190 if (dump_enabled_p ())
a4e972e3 191 dump_printf_loc (MSG_NOTE, vect_location,
192 "need run-time check that %T is nonzero\n",
193 value);
e85b4a5e 194 LOOP_VINFO_CHECK_NONZERO (loop_vinfo).safe_push (value);
195}
196
abc9513d 197/* Return true if we know that the order of vectorized DR_INFO_A and
198 vectorized DR_INFO_B will be the same as the order of DR_INFO_A and
199 DR_INFO_B. At least one of the accesses is a write. */
e85b4a5e 200
201static bool
abc9513d 202vect_preserves_scalar_order_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b)
e85b4a5e 203{
abc9513d 204 stmt_vec_info stmtinfo_a = dr_info_a->stmt;
205 stmt_vec_info stmtinfo_b = dr_info_b->stmt;
206
e85b4a5e 207 /* Single statements are always kept in their original order. */
208 if (!STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
209 && !STMT_VINFO_GROUPED_ACCESS (stmtinfo_b))
210 return true;
211
212 /* STMT_A and STMT_B belong to overlapping groups. All loads in a
ce8e9d74 213 group are emitted at the position of the last scalar load and all
e85b4a5e 214 stores in a group are emitted at the position of the last scalar store.
ce8e9d74 215 Compute that position and check whether the resulting order matches
216 the current one. */
217 stmt_vec_info last_a = DR_GROUP_FIRST_ELEMENT (stmtinfo_a);
218 if (last_a)
219 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_a); s;
220 s = DR_GROUP_NEXT_ELEMENT (s))
221 last_a = get_later_stmt (last_a, s);
222 else
223 last_a = stmtinfo_a;
224 stmt_vec_info last_b = DR_GROUP_FIRST_ELEMENT (stmtinfo_b);
225 if (last_b)
226 for (stmt_vec_info s = DR_GROUP_NEXT_ELEMENT (last_b); s;
227 s = DR_GROUP_NEXT_ELEMENT (s))
228 last_b = get_later_stmt (last_b, s);
229 else
230 last_b = stmtinfo_b;
231 return ((get_later_stmt (last_a, last_b) == last_a)
232 == (get_later_stmt (stmtinfo_a, stmtinfo_b) == stmtinfo_a));
e85b4a5e 233}
37545e54 234
403965f7 235/* A subroutine of vect_analyze_data_ref_dependence. Handle
236 DDR_COULD_BE_INDEPENDENT_P ddr DDR that has a known set of dependence
237 distances. These distances are conservatively correct but they don't
238 reflect a guaranteed dependence.
239
240 Return true if this function does all the work necessary to avoid
241 an alias or false if the caller should use the dependence distances
242 to limit the vectorization factor in the usual way. LOOP_DEPTH is
243 the depth of the loop described by LOOP_VINFO and the other arguments
244 are as for vect_analyze_data_ref_dependence. */
245
246static bool
247vect_analyze_possibly_independent_ddr (data_dependence_relation *ddr,
248 loop_vec_info loop_vinfo,
d75596cd 249 int loop_depth, unsigned int *max_vf)
403965f7 250{
251 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
252 lambda_vector dist_v;
253 unsigned int i;
254 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
255 {
256 int dist = dist_v[loop_depth];
257 if (dist != 0 && !(dist > 0 && DDR_REVERSED_P (ddr)))
258 {
259 /* If the user asserted safelen >= DIST consecutive iterations
260 can be executed concurrently, assume independence.
261
262 ??? An alternative would be to add the alias check even
263 in this case, and vectorize the fallback loop with the
264 maximum VF set to safelen. However, if the user has
265 explicitly given a length, it's less likely that that
266 would be a win. */
267 if (loop->safelen >= 2 && abs_hwi (dist) <= loop->safelen)
268 {
d75596cd 269 if ((unsigned int) loop->safelen < *max_vf)
403965f7 270 *max_vf = loop->safelen;
271 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
272 continue;
273 }
274
275 /* For dependence distances of 2 or more, we have the option
276 of limiting VF or checking for an alias at runtime.
277 Prefer to check at runtime if we can, to avoid limiting
278 the VF unnecessarily when the bases are in fact independent.
279
280 Note that the alias checks will be removed if the VF ends up
281 being small enough. */
db72d3bf 282 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
283 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
284 return (!STMT_VINFO_GATHER_SCATTER_P (dr_info_a->stmt)
285 && !STMT_VINFO_GATHER_SCATTER_P (dr_info_b->stmt)
fa681b45 286 && vect_mark_for_runtime_alias_test (ddr, loop_vinfo));
403965f7 287 }
288 }
289 return true;
290}
291
292
fb85abff 293/* Function vect_analyze_data_ref_dependence.
294
ed9370cc 295 FIXME: I needed to change the sense of the returned flag.
296
297 Return FALSE if there (might) exist a dependence between a memory-reference
fb85abff 298 DRA and a memory-reference DRB. When versioning for alias may check a
ed9370cc 299 dependence at run-time, return TRUE. Adjust *MAX_VF according to
91a74fc6 300 the data dependence. */
48e1416a 301
ed9370cc 302static opt_result
fb85abff 303vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
d75596cd 304 loop_vec_info loop_vinfo,
305 unsigned int *max_vf)
fb85abff 306{
307 unsigned int i;
68f15e9d 308 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 309 struct data_reference *dra = DDR_A (ddr);
310 struct data_reference *drb = DDR_B (ddr);
db72d3bf 311 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (dra);
312 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (drb);
abc9513d 313 stmt_vec_info stmtinfo_a = dr_info_a->stmt;
314 stmt_vec_info stmtinfo_b = dr_info_b->stmt;
fb85abff 315 lambda_vector dist_v;
316 unsigned int loop_depth;
48e1416a 317
68f15e9d 318 /* In loop analysis all data references should be vectorizable. */
6ea6a380 319 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
320 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
68f15e9d 321 gcc_unreachable ();
6ea6a380 322
68f15e9d 323 /* Independent data accesses. */
fb85abff 324 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
ed9370cc 325 return opt_result::success ();
37545e54 326
68f15e9d 327 if (dra == drb
328 || (DR_IS_READ (dra) && DR_IS_READ (drb)))
ed9370cc 329 return opt_result::success ();
48e1416a 330
5695a690 331 /* We do not have to consider dependences between accesses that belong
472a8968 332 to the same group, unless the stride could be smaller than the
333 group size. */
e1009321 334 if (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
335 && (DR_GROUP_FIRST_ELEMENT (stmtinfo_a)
336 == DR_GROUP_FIRST_ELEMENT (stmtinfo_b))
472a8968 337 && !STMT_VINFO_STRIDED_P (stmtinfo_a))
ed9370cc 338 return opt_result::success ();
5695a690 339
0f52e33a 340 /* Even if we have an anti-dependence then, as the vectorized loop covers at
341 least two scalar iterations, there is always also a true dependence.
342 As the vectorizer does not re-order loads and stores we can ignore
343 the anti-dependence if TBAA can disambiguate both DRs similar to the
344 case with known negative distance anti-dependences (positive
345 distance anti-dependences would violate TBAA constraints). */
346 if (((DR_IS_READ (dra) && DR_IS_WRITE (drb))
347 || (DR_IS_WRITE (dra) && DR_IS_READ (drb)))
348 && !alias_sets_conflict_p (get_alias_set (DR_REF (dra)),
349 get_alias_set (DR_REF (drb))))
ed9370cc 350 return opt_result::success ();
48e1416a 351
68f15e9d 352 /* Unknown data dependence. */
fb85abff 353 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
354 {
3d483a94 355 /* If user asserted safelen consecutive iterations can be
356 executed concurrently, assume independence. */
357 if (loop->safelen >= 2)
358 {
d75596cd 359 if ((unsigned int) loop->safelen < *max_vf)
3d483a94 360 *max_vf = loop->safelen;
c7a8722c 361 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
ed9370cc 362 return opt_result::success ();
3d483a94 363 }
364
0bd6d857 365 if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
366 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
ed9370cc 367 return opt_result::failure_at
368 (stmtinfo_a->stmt,
369 "versioning for alias not supported for: "
370 "can't determine dependence between %T and %T\n",
371 DR_REF (dra), DR_REF (drb));
95e19962 372
6d8fb6cf 373 if (dump_enabled_p ())
ed9370cc 374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
a4e972e3 375 "versioning for alias required: "
376 "can't determine dependence between %T and %T\n",
377 DR_REF (dra), DR_REF (drb));
d4b21757 378
68f15e9d 379 /* Add to list of ddrs that need to be tested at run-time. */
ed9370cc 380 return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
37545e54 381 }
382
68f15e9d 383 /* Known data dependence. */
fb85abff 384 if (DDR_NUM_DIST_VECTS (ddr) == 0)
385 {
3d483a94 386 /* If user asserted safelen consecutive iterations can be
387 executed concurrently, assume independence. */
388 if (loop->safelen >= 2)
389 {
d75596cd 390 if ((unsigned int) loop->safelen < *max_vf)
3d483a94 391 *max_vf = loop->safelen;
c7a8722c 392 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = false;
ed9370cc 393 return opt_result::success ();
3d483a94 394 }
395
0bd6d857 396 if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
397 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
ed9370cc 398 return opt_result::failure_at
399 (stmtinfo_a->stmt,
400 "versioning for alias not supported for: "
401 "bad dist vector for %T and %T\n",
402 DR_REF (dra), DR_REF (drb));
95e19962 403
6d8fb6cf 404 if (dump_enabled_p ())
ed9370cc 405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, stmtinfo_a->stmt,
a4e972e3 406 "versioning for alias required: "
407 "bad dist vector for %T and %T\n",
408 DR_REF (dra), DR_REF (drb));
fb85abff 409 /* Add to list of ddrs that need to be tested at run-time. */
ed9370cc 410 return vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
48e1416a 411 }
fb85abff 412
413 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
403965f7 414
415 if (DDR_COULD_BE_INDEPENDENT_P (ddr)
416 && vect_analyze_possibly_independent_ddr (ddr, loop_vinfo,
417 loop_depth, max_vf))
ed9370cc 418 return opt_result::success ();
403965f7 419
f1f41a6c 420 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
fb85abff 421 {
422 int dist = dist_v[loop_depth];
423
6d8fb6cf 424 if (dump_enabled_p ())
7bd765d4 425 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 426 "dependence distance = %d.\n", dist);
fb85abff 427
91a74fc6 428 if (dist == 0)
fb85abff 429 {
6d8fb6cf 430 if (dump_enabled_p ())
a4e972e3 431 dump_printf_loc (MSG_NOTE, vect_location,
432 "dependence distance == 0 between %T and %T\n",
433 DR_REF (dra), DR_REF (drb));
fb85abff 434
4d525783 435 /* When we perform grouped accesses and perform implicit CSE
436 by detecting equal accesses and doing disambiguation with
437 runtime alias tests like for
438 .. = a[i];
439 .. = a[i+1];
440 a[i] = ..;
441 a[i+1] = ..;
442 *p = ..;
443 .. = a[i];
444 .. = a[i+1];
445 where we will end up loading { a[i], a[i+1] } once, make
446 sure that inserting group loads before the first load and
5a91be9e 447 stores after the last store will do the right thing.
448 Similar for groups like
449 a[i] = ...;
450 ... = a[i];
451 a[i+1] = ...;
452 where loads from the group interleave with the store. */
abc9513d 453 if (!vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
ed9370cc 454 return opt_result::failure_at (stmtinfo_a->stmt,
455 "READ_WRITE dependence"
456 " in interleaving.\n");
e85b4a5e 457
84017e0e 458 if (loop->safelen < 2)
4d525783 459 {
e85b4a5e 460 tree indicator = dr_zero_step_indicator (dra);
fa681b45 461 if (!indicator || integer_zerop (indicator))
ed9370cc 462 return opt_result::failure_at (stmtinfo_a->stmt,
463 "access also has a zero step\n");
fa681b45 464 else if (TREE_CODE (indicator) != INTEGER_CST)
465 vect_check_nonzero_value (loop_vinfo, indicator);
fb85abff 466 }
91a74fc6 467 continue;
468 }
469
470 if (dist > 0 && DDR_REVERSED_P (ddr))
471 {
472 /* If DDR_REVERSED_P the order of the data-refs in DDR was
473 reversed (to make distance vector positive), and the actual
474 distance is negative. */
6d8fb6cf 475 if (dump_enabled_p ())
7bd765d4 476 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78bb46f5 477 "dependence distance negative.\n");
a8cf7702 478 /* Record a negative dependence distance to later limit the
479 amount of stmt copying / unrolling we can perform.
480 Only need to handle read-after-write dependence. */
481 if (DR_IS_READ (drb)
482 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) == 0
483 || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) > (unsigned)dist))
484 STMT_VINFO_MIN_NEG_DIST (stmtinfo_b) = dist;
91a74fc6 485 continue;
486 }
487
d75596cd 488 unsigned int abs_dist = abs (dist);
489 if (abs_dist >= 2 && abs_dist < *max_vf)
91a74fc6 490 {
491 /* The dependence distance requires reduction of the maximal
492 vectorization factor. */
493 *max_vf = abs (dist);
6d8fb6cf 494 if (dump_enabled_p ())
7bd765d4 495 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 496 "adjusting maximal vectorization factor to %i\n",
497 *max_vf);
fb85abff 498 }
499
d75596cd 500 if (abs_dist >= *max_vf)
fb85abff 501 {
48e1416a 502 /* Dependence distance does not create dependence, as far as
91a74fc6 503 vectorization is concerned, in this case. */
6d8fb6cf 504 if (dump_enabled_p ())
7bd765d4 505 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 506 "dependence distance >= VF.\n");
fb85abff 507 continue;
508 }
509
ed9370cc 510 return opt_result::failure_at (stmtinfo_a->stmt,
511 "not vectorized, possible dependence "
512 "between data-refs %T and %T\n",
513 DR_REF (dra), DR_REF (drb));
fb85abff 514 }
515
ed9370cc 516 return opt_result::success ();
fb85abff 517}
518
519/* Function vect_analyze_data_ref_dependences.
48e1416a 520
fb85abff 521 Examine all the data references in the loop, and make sure there do not
91a74fc6 522 exist any data dependences between them. Set *MAX_VF according to
523 the maximum vectorization factor the data dependences allow. */
48e1416a 524
ed9370cc 525opt_result
d75596cd 526vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
527 unsigned int *max_vf)
fb85abff 528{
529 unsigned int i;
fb85abff 530 struct data_dependence_relation *ddr;
531
88f6eb8f 532 DUMP_VECT_SCOPE ("vect_analyze_data_ref_dependences");
68f15e9d 533
a99aba41 534 if (!LOOP_VINFO_DDRS (loop_vinfo).exists ())
535 {
536 LOOP_VINFO_DDRS (loop_vinfo)
537 .create (LOOP_VINFO_DATAREFS (loop_vinfo).length ()
538 * LOOP_VINFO_DATAREFS (loop_vinfo).length ());
539 /* We need read-read dependences to compute
540 STMT_VINFO_SAME_ALIGN_REFS. */
03ad9f74 541 bool res = compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
542 &LOOP_VINFO_DDRS (loop_vinfo),
543 LOOP_VINFO_LOOP_NEST (loop_vinfo),
544 true);
545 gcc_assert (res);
a99aba41 546 }
547
c7a8722c 548 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo) = true;
68f15e9d 549
5b631e09 550 /* For epilogues we either have no aliases or alias versioning
551 was applied to original loop. Therefore we may just get max_vf
552 using VF of original loop. */
553 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
4a85c0b1 554 *max_vf = LOOP_VINFO_ORIG_MAX_VECT_FACTOR (loop_vinfo);
5b631e09 555 else
556 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
ed9370cc 557 {
558 opt_result res
559 = vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf);
560 if (!res)
561 return res;
562 }
68f15e9d 563
ed9370cc 564 return opt_result::success ();
68f15e9d 565}
566
567
568/* Function vect_slp_analyze_data_ref_dependence.
569
570 Return TRUE if there (might) exist a dependence between a memory-reference
db72d3bf 571 DRA and a memory-reference DRB for VINFO. When versioning for alias
572 may check a dependence at run-time, return FALSE. Adjust *MAX_VF
573 according to the data dependence. */
68f15e9d 574
575static bool
db72d3bf 576vect_slp_analyze_data_ref_dependence (vec_info *vinfo,
577 struct data_dependence_relation *ddr)
68f15e9d 578{
579 struct data_reference *dra = DDR_A (ddr);
580 struct data_reference *drb = DDR_B (ddr);
db72d3bf 581 dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
582 dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
68f15e9d 583
584 /* We need to check dependences of statements marked as unvectorizable
585 as well, they still can prohibit vectorization. */
586
587 /* Independent data accesses. */
588 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
589 return false;
590
591 if (dra == drb)
592 return false;
593
594 /* Read-read is OK. */
595 if (DR_IS_READ (dra) && DR_IS_READ (drb))
596 return false;
597
1fa434e3 598 /* If dra and drb are part of the same interleaving chain consider
599 them independent. */
abc9513d 600 if (STMT_VINFO_GROUPED_ACCESS (dr_info_a->stmt)
601 && (DR_GROUP_FIRST_ELEMENT (dr_info_a->stmt)
602 == DR_GROUP_FIRST_ELEMENT (dr_info_b->stmt)))
1fa434e3 603 return false;
604
68f15e9d 605 /* Unknown data dependence. */
606 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
07e3bcbf 607 {
50e6c257 608 if (dump_enabled_p ())
a4e972e3 609 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
610 "can't determine dependence between %T and %T\n",
611 DR_REF (dra), DR_REF (drb));
07e3bcbf 612 }
50e6c257 613 else if (dump_enabled_p ())
a4e972e3 614 dump_printf_loc (MSG_NOTE, vect_location,
615 "determined dependence between %T and %T\n",
616 DR_REF (dra), DR_REF (drb));
48e1416a 617
68f15e9d 618 return true;
619}
620
621
c256513d 622/* Analyze dependences involved in the transform of SLP NODE. STORES
623 contain the vector of scalar stores of this instance if we are
624 disambiguating the loads. */
77d241ed 625
626static bool
c256513d 627vect_slp_analyze_node_dependences (slp_instance instance, slp_tree node,
06bb64b8 628 vec<stmt_vec_info> stores,
ecc42a77 629 stmt_vec_info last_store_info)
77d241ed 630{
631 /* This walks over all stmts involved in the SLP load/store done
632 in NODE verifying we can sink them up to the last stmt in the
633 group. */
3d9c962c 634 stmt_vec_info last_access_info = vect_find_last_scalar_stmt_in_slp (node);
1c2fef9a 635 vec_info *vinfo = last_access_info->vinfo;
77d241ed 636 for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
637 {
06bb64b8 638 stmt_vec_info access_info = SLP_TREE_SCALAR_STMTS (node)[k];
3d9c962c 639 if (access_info == last_access_info)
77d241ed 640 continue;
06bb64b8 641 data_reference *dr_a = STMT_VINFO_DATA_REF (access_info);
58cfef6b 642 ao_ref ref;
643 bool ref_initialized_p = false;
06bb64b8 644 for (gimple_stmt_iterator gsi = gsi_for_stmt (access_info->stmt);
3d9c962c 645 gsi_stmt (gsi) != last_access_info->stmt; gsi_next (&gsi))
77d241ed 646 {
647 gimple *stmt = gsi_stmt (gsi);
d144c8b2 648 if (! gimple_vuse (stmt)
649 || (DR_IS_READ (dr_a) && ! gimple_vdef (stmt)))
77d241ed 650 continue;
651
d144c8b2 652 /* If we couldn't record a (single) data reference for this
58cfef6b 653 stmt we have to resort to the alias oracle. */
1c2fef9a 654 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
655 data_reference *dr_b = STMT_VINFO_DATA_REF (stmt_info);
d144c8b2 656 if (!dr_b)
58cfef6b 657 {
658 /* We are moving a store or sinking a load - this means
659 we cannot use TBAA for disambiguation. */
660 if (!ref_initialized_p)
661 ao_ref_init (&ref, DR_REF (dr_a));
662 if (stmt_may_clobber_ref_p_1 (stmt, &ref, false)
663 || ref_maybe_used_by_stmt_p (stmt, &ref, false))
664 return false;
665 continue;
666 }
d144c8b2 667
92bf253d 668 bool dependent = false;
c256513d 669 /* If we run into a store of this same instance (we've just
670 marked those) then delay dependence checking until we run
671 into the last store because this is where it will have
672 been sunk to (and we verify if we can do that as well). */
673 if (gimple_visited_p (stmt))
674 {
ecc42a77 675 if (stmt_info != last_store_info)
c256513d 676 continue;
677 unsigned i;
06bb64b8 678 stmt_vec_info store_info;
679 FOR_EACH_VEC_ELT (stores, i, store_info)
c256513d 680 {
06bb64b8 681 data_reference *store_dr = STMT_VINFO_DATA_REF (store_info);
c256513d 682 ddr_p ddr = initialize_data_dependence_relation
683 (dr_a, store_dr, vNULL);
db72d3bf 684 dependent
685 = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
c256513d 686 free_dependence_relation (ddr);
92bf253d 687 if (dependent)
688 break;
c256513d 689 }
690 }
92bf253d 691 else
77d241ed 692 {
92bf253d 693 ddr_p ddr = initialize_data_dependence_relation (dr_a,
694 dr_b, vNULL);
db72d3bf 695 dependent = vect_slp_analyze_data_ref_dependence (vinfo, ddr);
77d241ed 696 free_dependence_relation (ddr);
77d241ed 697 }
92bf253d 698 if (dependent)
699 return false;
77d241ed 700 }
701 }
702 return true;
703}
704
705
68f15e9d 706/* Function vect_analyze_data_ref_dependences.
707
708 Examine all the data references in the basic-block, and make sure there
709 do not exist any data dependences between them. Set *MAX_VF according to
710 the maximum vectorization factor the data dependences allow. */
711
712bool
c256513d 713vect_slp_analyze_instance_dependence (slp_instance instance)
68f15e9d 714{
88f6eb8f 715 DUMP_VECT_SCOPE ("vect_slp_analyze_instance_dependence");
68f15e9d 716
c256513d 717 /* The stores of this instance are at the root of the SLP tree. */
718 slp_tree store = SLP_INSTANCE_TREE (instance);
06bb64b8 719 if (! STMT_VINFO_DATA_REF (SLP_TREE_SCALAR_STMTS (store)[0]))
c256513d 720 store = NULL;
721
722 /* Verify we can sink stores to the vectorized stmt insert location. */
3d9c962c 723 stmt_vec_info last_store_info = NULL;
c256513d 724 if (store)
77d241ed 725 {
c256513d 726 if (! vect_slp_analyze_node_dependences (instance, store, vNULL, NULL))
727 return false;
728
729 /* Mark stores in this instance and remember the last one. */
3d9c962c 730 last_store_info = vect_find_last_scalar_stmt_in_slp (store);
c256513d 731 for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
06bb64b8 732 gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, true);
77d241ed 733 }
68f15e9d 734
c256513d 735 bool res = true;
fb85abff 736
c256513d 737 /* Verify we can sink loads to the vectorized stmt insert location,
738 special-casing stores of this instance. */
739 slp_tree load;
740 unsigned int i;
741 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, load)
742 if (! vect_slp_analyze_node_dependences (instance, load,
743 store
744 ? SLP_TREE_SCALAR_STMTS (store)
3d9c962c 745 : vNULL, last_store_info))
c256513d 746 {
747 res = false;
748 break;
749 }
750
751 /* Unset the visited flag. */
752 if (store)
753 for (unsigned k = 0; k < SLP_INSTANCE_GROUP_SIZE (instance); ++k)
06bb64b8 754 gimple_set_visited (SLP_TREE_SCALAR_STMTS (store)[k]->stmt, false);
c256513d 755
756 return res;
fb85abff 757}
758
52643160 759/* Record the base alignment guarantee given by DRB, which occurs
760 in STMT_INFO. */
4f372c2c 761
762static void
52643160 763vect_record_base_alignment (stmt_vec_info stmt_info,
4f372c2c 764 innermost_loop_behavior *drb)
765{
52643160 766 vec_info *vinfo = stmt_info->vinfo;
4f372c2c 767 bool existed;
768 innermost_loop_behavior *&entry
769 = vinfo->base_alignments.get_or_insert (drb->base_address, &existed);
770 if (!existed || entry->base_alignment < drb->base_alignment)
771 {
772 entry = drb;
773 if (dump_enabled_p ())
a4e972e3 774 dump_printf_loc (MSG_NOTE, vect_location,
775 "recording new base alignment for %T\n"
776 " alignment: %d\n"
777 " misalignment: %d\n"
778 " based on: %G",
779 drb->base_address,
780 drb->base_alignment,
781 drb->base_misalignment,
782 stmt_info->stmt);
4f372c2c 783 }
784}
785
786/* If the region we're going to vectorize is reached, all unconditional
787 data references occur at least once. We can therefore pool the base
788 alignment guarantees from each unconditional reference. Do this by
789 going through all the data references in VINFO and checking whether
790 the containing statement makes the reference unconditionally. If so,
791 record the alignment of the base address in VINFO so that it can be
792 used for all other references with the same base. */
793
794void
795vect_record_base_alignments (vec_info *vinfo)
796{
797 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
798 struct loop *loop = loop_vinfo ? LOOP_VINFO_LOOP (loop_vinfo) : NULL;
799 data_reference *dr;
800 unsigned int i;
a99aba41 801 FOR_EACH_VEC_ELT (vinfo->shared->datarefs, i, dr)
adebd8d4 802 {
db72d3bf 803 dr_vec_info *dr_info = vinfo->lookup_dr (dr);
abc9513d 804 stmt_vec_info stmt_info = dr_info->stmt;
1ce0a2db 805 if (!DR_IS_CONDITIONAL_IN_STMT (dr)
fa681b45 806 && STMT_VINFO_VECTORIZABLE (stmt_info)
807 && !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
1ce0a2db 808 {
52643160 809 vect_record_base_alignment (stmt_info, &DR_INNERMOST (dr));
4f372c2c 810
1ce0a2db 811 /* If DR is nested in the loop that is being vectorized, we can also
812 record the alignment of the base wrt the outer loop. */
0219dc42 813 if (loop && nested_in_vect_loop_p (loop, stmt_info))
fa681b45 814 vect_record_base_alignment
52643160 815 (stmt_info, &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info));
1ce0a2db 816 }
adebd8d4 817 }
4f372c2c 818}
819
abc9513d 820/* Return the target alignment for the vectorized form of DR_INFO. */
aec313e5 821
e092c20e 822static poly_uint64
abc9513d 823vect_calculate_target_alignment (dr_vec_info *dr_info)
aec313e5 824{
abc9513d 825 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
aec313e5 826 return targetm.vectorize.preferred_vector_alignment (vectype);
827}
828
fb85abff 829/* Function vect_compute_data_ref_alignment
830
abc9513d 831 Compute the misalignment of the data reference DR_INFO.
fb85abff 832
833 Output:
abc9513d 834 1. DR_MISALIGNMENT (DR_INFO) is defined.
fb85abff 835
836 FOR NOW: No analysis is actually performed. Misalignment is calculated
837 only for trivial cases. TODO. */
838
fa681b45 839static void
abc9513d 840vect_compute_data_ref_alignment (dr_vec_info *dr_info)
fb85abff 841{
abc9513d 842 stmt_vec_info stmt_info = dr_info->stmt;
4f372c2c 843 vec_base_alignments *base_alignments = &stmt_info->vinfo->base_alignments;
fb85abff 844 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 845 struct loop *loop = NULL;
abc9513d 846 tree ref = DR_REF (dr_info->dr);
9e879814 847 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
48e1416a 848
6d8fb6cf 849 if (dump_enabled_p ())
7bd765d4 850 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 851 "vect_compute_data_ref_alignment:\n");
fb85abff 852
37545e54 853 if (loop_vinfo)
854 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 855
fb85abff 856 /* Initialize misalignment to unknown. */
abc9513d 857 SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
fb85abff 858
fa681b45 859 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
860 return;
861
abc9513d 862 innermost_loop_behavior *drb = vect_dr_behavior (dr_info);
9e879814 863 bool step_preserves_misalignment_p;
864
e092c20e 865 poly_uint64 vector_alignment
866 = exact_div (vect_calculate_target_alignment (dr_info), BITS_PER_UNIT);
abc9513d 867 DR_TARGET_ALIGNMENT (dr_info) = vector_alignment;
aec313e5 868
e092c20e 869 unsigned HOST_WIDE_INT vect_align_c;
870 if (!vector_alignment.is_constant (&vect_align_c))
871 return;
872
9e879814 873 /* No step for BB vectorization. */
874 if (!loop)
875 {
876 gcc_assert (integer_zerop (drb->step));
877 step_preserves_misalignment_p = true;
878 }
fb85abff 879
880 /* In case the dataref is in an inner-loop of the loop that is being
881 vectorized (LOOP), we use the base and misalignment information
282bf14c 882 relative to the outer-loop (LOOP). This is ok only if the misalignment
fb85abff 883 stays the same throughout the execution of the inner-loop, which is why
884 we have to check that the stride of the dataref in the inner-loop evenly
aec313e5 885 divides by the vector alignment. */
0219dc42 886 else if (nested_in_vect_loop_p (loop, stmt_info))
fb85abff 887 {
9e879814 888 step_preserves_misalignment_p
e092c20e 889 = (DR_STEP_ALIGNMENT (dr_info->dr) % vect_align_c) == 0;
48e1416a 890
9e879814 891 if (dump_enabled_p ())
fb85abff 892 {
9e879814 893 if (step_preserves_misalignment_p)
894 dump_printf_loc (MSG_NOTE, vect_location,
aec313e5 895 "inner step divides the vector alignment.\n");
9e879814 896 else
7bd765d4 897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
aec313e5 898 "inner step doesn't divide the vector"
899 " alignment.\n");
fb85abff 900 }
901 }
902
c1bee668 903 /* Similarly we can only use base and misalignment information relative to
904 an innermost loop if the misalignment stays the same throughout the
905 execution of the loop. As above, this is the case if the stride of
aec313e5 906 the dataref evenly divides by the alignment. */
c1bee668 907 else
38682b67 908 {
d75596cd 909 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
9e879814 910 step_preserves_misalignment_p
e092c20e 911 = multiple_p (DR_STEP_ALIGNMENT (dr_info->dr) * vf, vect_align_c);
38682b67 912
9e879814 913 if (!step_preserves_misalignment_p && dump_enabled_p ())
914 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
aec313e5 915 "step doesn't divide the vector alignment.\n");
38682b67 916 }
9dd88d41 917
a5456a6d 918 unsigned int base_alignment = drb->base_alignment;
919 unsigned int base_misalignment = drb->base_misalignment;
fb85abff 920
4f372c2c 921 /* Calculate the maximum of the pooled base address alignment and the
922 alignment that we can compute for DR itself. */
923 innermost_loop_behavior **entry = base_alignments->get (drb->base_address);
924 if (entry && base_alignment < (*entry)->base_alignment)
925 {
926 base_alignment = (*entry)->base_alignment;
927 base_misalignment = (*entry)->base_misalignment;
928 }
929
e092c20e 930 if (drb->offset_alignment < vect_align_c
668dd7dc 931 || !step_preserves_misalignment_p
932 /* We need to know whether the step wrt the vectorized loop is
933 negative when computing the starting misalignment below. */
934 || TREE_CODE (drb->step) != INTEGER_CST)
fb85abff 935 {
6d8fb6cf 936 if (dump_enabled_p ())
a4e972e3 937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
938 "Unknown alignment for access: %T\n", ref);
fa681b45 939 return;
fb85abff 940 }
941
e092c20e 942 if (base_alignment < vect_align_c)
fb85abff 943 {
469f7bc0 944 unsigned int max_alignment;
945 tree base = get_base_for_alignment (drb->base_address, &max_alignment);
e092c20e 946 if (max_alignment < vect_align_c
469f7bc0 947 || !vect_can_force_dr_alignment_p (base,
e092c20e 948 vect_align_c * BITS_PER_UNIT))
fb85abff 949 {
6d8fb6cf 950 if (dump_enabled_p ())
a4e972e3 951 dump_printf_loc (MSG_NOTE, vect_location,
952 "can't force alignment of ref: %T\n", ref);
fa681b45 953 return;
fb85abff 954 }
48e1416a 955
fb85abff 956 /* Force the alignment of the decl.
957 NOTE: This is the only change to the code we make during
958 the analysis phase, before deciding to vectorize the loop. */
6d8fb6cf 959 if (dump_enabled_p ())
a4e972e3 960 dump_printf_loc (MSG_NOTE, vect_location,
961 "force alignment of %T\n", ref);
0822b158 962
abc9513d 963 dr_info->base_decl = base;
964 dr_info->base_misaligned = true;
a5456a6d 965 base_misalignment = 0;
fb85abff 966 }
658a2c19 967 poly_int64 misalignment
968 = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
fb85abff 969
85a846a2 970 /* If this is a backward running DR then first access in the larger
971 vectype actually is N-1 elements before the address in the DR.
972 Adjust misalign accordingly. */
9e879814 973 if (tree_int_cst_sgn (drb->step) < 0)
a5456a6d 974 /* PLUS because STEP is negative. */
975 misalignment += ((TYPE_VECTOR_SUBPARTS (vectype) - 1)
976 * TREE_INT_CST_LOW (drb->step));
85a846a2 977
658a2c19 978 unsigned int const_misalignment;
e092c20e 979 if (!known_misalignment (misalignment, vect_align_c, &const_misalignment))
658a2c19 980 {
981 if (dump_enabled_p ())
a4e972e3 982 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
983 "Non-constant misalignment for access: %T\n", ref);
fa681b45 984 return;
658a2c19 985 }
986
abc9513d 987 SET_DR_MISALIGNMENT (dr_info, const_misalignment);
fb85abff 988
6d8fb6cf 989 if (dump_enabled_p ())
a4e972e3 990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
991 "misalign = %d bytes of ref %T\n",
992 DR_MISALIGNMENT (dr_info), ref);
fb85abff 993
fa681b45 994 return;
fb85abff 995}
996
cd8306bf 997/* Function vect_update_misalignment_for_peel.
abc9513d 998 Sets DR_INFO's misalignment
999 - to 0 if it has the same alignment as DR_PEEL_INFO,
1000 - to the misalignment computed using NPEEL if DR_INFO's salignment is known,
cd8306bf 1001 - to -1 (unknown) otherwise.
fb85abff 1002
abc9513d 1003 DR_INFO - the data reference whose misalignment is to be adjusted.
1004 DR_PEEL_INFO - the data reference whose misalignment is being made
1005 zero in the vector loop by the peel.
fb85abff 1006 NPEEL - the number of iterations in the peel loop if the misalignment
abc9513d 1007 of DR_PEEL_INFO is known at compile time. */
fb85abff 1008
1009static void
abc9513d 1010vect_update_misalignment_for_peel (dr_vec_info *dr_info,
1011 dr_vec_info *dr_peel_info, int npeel)
fb85abff 1012{
1013 unsigned int i;
cd8306bf 1014 vec<dr_p> same_aligned_drs;
fb85abff 1015 struct data_reference *current_dr;
abc9513d 1016 stmt_vec_info peel_stmt_info = dr_peel_info->stmt;
fb85abff 1017
b4d2979c 1018 /* It can be assumed that if dr_info has the same alignment as dr_peel,
1019 it is aligned in the vector loop. */
abc9513d 1020 same_aligned_drs = STMT_VINFO_SAME_ALIGN_REFS (peel_stmt_info);
cd8306bf 1021 FOR_EACH_VEC_ELT (same_aligned_drs, i, current_dr)
fb85abff 1022 {
abc9513d 1023 if (current_dr != dr_info->dr)
fb85abff 1024 continue;
abc9513d 1025 gcc_assert (!known_alignment_for_access_p (dr_info)
1026 || !known_alignment_for_access_p (dr_peel_info)
b4d2979c 1027 || (DR_MISALIGNMENT (dr_info)
1028 == DR_MISALIGNMENT (dr_peel_info)));
abc9513d 1029 SET_DR_MISALIGNMENT (dr_info, 0);
fb85abff 1030 return;
1031 }
1032
e092c20e 1033 unsigned HOST_WIDE_INT alignment;
1034 if (DR_TARGET_ALIGNMENT (dr_info).is_constant (&alignment)
1035 && known_alignment_for_access_p (dr_info)
abc9513d 1036 && known_alignment_for_access_p (dr_peel_info))
fb85abff 1037 {
abc9513d 1038 int misal = DR_MISALIGNMENT (dr_info);
b4d2979c 1039 misal += npeel * TREE_INT_CST_LOW (DR_STEP (dr_info->dr));
e092c20e 1040 misal &= alignment - 1;
abc9513d 1041 SET_DR_MISALIGNMENT (dr_info, misal);
fb85abff 1042 return;
1043 }
1044
6d8fb6cf 1045 if (dump_enabled_p ())
df8e9f7a 1046 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment " \
1047 "to unknown (-1).\n");
abc9513d 1048 SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
fb85abff 1049}
1050
1051
2f6fec15 1052/* Function verify_data_ref_alignment
1053
abc9513d 1054 Return TRUE if DR_INFO can be handled with respect to alignment. */
2f6fec15 1055
ed9370cc 1056static opt_result
abc9513d 1057verify_data_ref_alignment (dr_vec_info *dr_info)
2f6fec15 1058{
f6593f36 1059 enum dr_alignment_support supportable_dr_alignment
abc9513d 1060 = vect_supportable_dr_alignment (dr_info, false);
2f6fec15 1061 if (!supportable_dr_alignment)
ed9370cc 1062 return opt_result::failure_at
1063 (dr_info->stmt->stmt,
1064 DR_IS_READ (dr_info->dr)
1065 ? "not vectorized: unsupported unaligned load: %T\n"
1066 : "not vectorized: unsupported unaligned store: %T\n",
1067 DR_REF (dr_info->dr));
2f6fec15 1068
1069 if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
1070 dump_printf_loc (MSG_NOTE, vect_location,
1071 "Vectorizing an unaligned access.\n");
1072
ed9370cc 1073 return opt_result::success ();
2f6fec15 1074}
1075
fb85abff 1076/* Function vect_verify_datarefs_alignment
1077
1078 Return TRUE if all data references in the loop can be
1079 handled with respect to alignment. */
1080
ed9370cc 1081opt_result
2f6fec15 1082vect_verify_datarefs_alignment (loop_vec_info vinfo)
fb85abff 1083{
a99aba41 1084 vec<data_reference_p> datarefs = vinfo->shared->datarefs;
fb85abff 1085 struct data_reference *dr;
fb85abff 1086 unsigned int i;
1087
f1f41a6c 1088 FOR_EACH_VEC_ELT (datarefs, i, dr)
433b0ea3 1089 {
db72d3bf 1090 dr_vec_info *dr_info = vinfo->lookup_dr (dr);
abc9513d 1091 stmt_vec_info stmt_info = dr_info->stmt;
433b0ea3 1092
1093 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1094 continue;
f6593f36 1095
1096 /* For interleaving, only the alignment of the first access matters. */
1097 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 1098 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
c86d8a47 1099 continue;
f6593f36 1100
1101 /* Strided accesses perform only component accesses, alignment is
1102 irrelevant for them. */
1103 if (STMT_VINFO_STRIDED_P (stmt_info)
1104 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
c86d8a47 1105 continue;
f6593f36 1106
ed9370cc 1107 opt_result res = verify_data_ref_alignment (dr_info);
1108 if (!res)
1109 return res;
433b0ea3 1110 }
6ea6a380 1111
ed9370cc 1112 return opt_result::success ();
fb85abff 1113}
1114
cfa724cf 1115/* Given an memory reference EXP return whether its alignment is less
1116 than its size. */
1117
1118static bool
1119not_size_aligned (tree exp)
1120{
e913b5cd 1121 if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
cfa724cf 1122 return true;
1123
e913b5cd 1124 return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp)))
cfa724cf 1125 > get_object_alignment (exp));
1126}
fb85abff 1127
1128/* Function vector_alignment_reachable_p
1129
abc9513d 1130 Return true if vector alignment for DR_INFO is reachable by peeling
fb85abff 1131 a few loop iterations. Return false otherwise. */
1132
1133static bool
abc9513d 1134vector_alignment_reachable_p (dr_vec_info *dr_info)
fb85abff 1135{
abc9513d 1136 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 1137 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1138
ee612634 1139 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
fb85abff 1140 {
1141 /* For interleaved access we peel only if number of iterations in
1142 the prolog loop ({VF - misalignment}), is a multiple of the
1143 number of the interleaved accesses. */
1144 int elem_size, mis_in_elements;
fb85abff 1145
1146 /* FORNOW: handle only known alignment. */
abc9513d 1147 if (!known_alignment_for_access_p (dr_info))
fb85abff 1148 return false;
1149
32a4b2d8 1150 poly_uint64 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1151 poly_uint64 vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
1152 elem_size = vector_element_size (vector_size, nelements);
abc9513d 1153 mis_in_elements = DR_MISALIGNMENT (dr_info) / elem_size;
fb85abff 1154
e1009321 1155 if (!multiple_p (nelements - mis_in_elements, DR_GROUP_SIZE (stmt_info)))
fb85abff 1156 return false;
1157 }
1158
1159 /* If misalignment is known at the compile time then allow peeling
1160 only if natural alignment is reachable through peeling. */
abc9513d 1161 if (known_alignment_for_access_p (dr_info) && !aligned_access_p (dr_info))
fb85abff 1162 {
48e1416a 1163 HOST_WIDE_INT elmsize =
fb85abff 1164 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
6d8fb6cf 1165 if (dump_enabled_p ())
fb85abff 1166 {
78bb46f5 1167 dump_printf_loc (MSG_NOTE, vect_location,
bffe1cb4 1168 "data size = %wd. misalignment = %d.\n", elmsize,
1169 DR_MISALIGNMENT (dr_info));
fb85abff 1170 }
abc9513d 1171 if (DR_MISALIGNMENT (dr_info) % elmsize)
fb85abff 1172 {
6d8fb6cf 1173 if (dump_enabled_p ())
78bb46f5 1174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1175 "data size does not divide the misalignment.\n");
fb85abff 1176 return false;
1177 }
1178 }
1179
abc9513d 1180 if (!known_alignment_for_access_p (dr_info))
fb85abff 1181 {
abc9513d 1182 tree type = TREE_TYPE (DR_REF (dr_info->dr));
1183 bool is_packed = not_size_aligned (DR_REF (dr_info->dr));
6d8fb6cf 1184 if (dump_enabled_p ())
78bb46f5 1185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
33a82fb9 1186 "Unknown misalignment, %snaturally aligned\n",
1187 is_packed ? "not " : "");
1188 return targetm.vectorize.vector_alignment_reachable (type, is_packed);
fb85abff 1189 }
1190
1191 return true;
1192}
1193
0822b158 1194
abc9513d 1195/* Calculate the cost of the memory access represented by DR_INFO. */
0822b158 1196
f97dec81 1197static void
abc9513d 1198vect_get_data_access_cost (dr_vec_info *dr_info,
0822b158 1199 unsigned int *inside_cost,
f97dec81 1200 unsigned int *outside_cost,
28d0cd4a 1201 stmt_vector_for_cost *body_cost_vec,
1202 stmt_vector_for_cost *prologue_cost_vec)
0822b158 1203{
abc9513d 1204 stmt_vec_info stmt_info = dr_info->stmt;
0822b158 1205 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4eb17cb6 1206 int ncopies;
1207
1208 if (PURE_SLP_STMT (stmt_info))
1209 ncopies = 1;
1210 else
1211 ncopies = vect_get_num_copies (loop_vinfo, STMT_VINFO_VECTYPE (stmt_info));
0822b158 1212
abc9513d 1213 if (DR_IS_READ (dr_info->dr))
1ce0a2db 1214 vect_get_load_cost (stmt_info, ncopies, true, inside_cost, outside_cost,
28d0cd4a 1215 prologue_cost_vec, body_cost_vec, false);
0822b158 1216 else
1ce0a2db 1217 vect_get_store_cost (stmt_info, ncopies, inside_cost, body_cost_vec);
0822b158 1218
6d8fb6cf 1219 if (dump_enabled_p ())
7bd765d4 1220 dump_printf_loc (MSG_NOTE, vect_location,
1221 "vect_get_data_access_cost: inside_cost = %d, "
78bb46f5 1222 "outside_cost = %d.\n", *inside_cost, *outside_cost);
0822b158 1223}
1224
1225
41500e78 1226typedef struct _vect_peel_info
1227{
abc9513d 1228 dr_vec_info *dr_info;
487798e2 1229 int npeel;
41500e78 1230 unsigned int count;
1231} *vect_peel_info;
1232
1233typedef struct _vect_peel_extended_info
1234{
1235 struct _vect_peel_info peel_info;
1236 unsigned int inside_cost;
1237 unsigned int outside_cost;
41500e78 1238} *vect_peel_extended_info;
1239
1240
1241/* Peeling hashtable helpers. */
1242
1243struct peel_info_hasher : free_ptr_hash <_vect_peel_info>
1244{
1245 static inline hashval_t hash (const _vect_peel_info *);
1246 static inline bool equal (const _vect_peel_info *, const _vect_peel_info *);
1247};
1248
1249inline hashval_t
1250peel_info_hasher::hash (const _vect_peel_info *peel_info)
1251{
1252 return (hashval_t) peel_info->npeel;
1253}
1254
1255inline bool
1256peel_info_hasher::equal (const _vect_peel_info *a, const _vect_peel_info *b)
1257{
1258 return (a->npeel == b->npeel);
1259}
1260
1261
abc9513d 1262/* Insert DR_INFO into peeling hash table with NPEEL as key. */
0822b158 1263
1264static void
41500e78 1265vect_peeling_hash_insert (hash_table<peel_info_hasher> *peeling_htab,
abc9513d 1266 loop_vec_info loop_vinfo, dr_vec_info *dr_info,
0822b158 1267 int npeel)
1268{
1269 struct _vect_peel_info elem, *slot;
3e871d4d 1270 _vect_peel_info **new_slot;
abc9513d 1271 bool supportable_dr_alignment
1272 = vect_supportable_dr_alignment (dr_info, true);
0822b158 1273
1274 elem.npeel = npeel;
41500e78 1275 slot = peeling_htab->find (&elem);
0822b158 1276 if (slot)
1277 slot->count++;
1278 else
1279 {
1280 slot = XNEW (struct _vect_peel_info);
1281 slot->npeel = npeel;
abc9513d 1282 slot->dr_info = dr_info;
0822b158 1283 slot->count = 1;
41500e78 1284 new_slot = peeling_htab->find_slot (slot, INSERT);
0822b158 1285 *new_slot = slot;
1286 }
1287
3e398f5b 1288 if (!supportable_dr_alignment
1289 && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
0822b158 1290 slot->count += VECT_MAX_COST;
1291}
1292
1293
1294/* Traverse peeling hash table to find peeling option that aligns maximum
1295 number of data accesses. */
1296
3e871d4d 1297int
1298vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1299 _vect_peel_extended_info *max)
0822b158 1300{
3e871d4d 1301 vect_peel_info elem = *slot;
0822b158 1302
593fa4d1 1303 if (elem->count > max->peel_info.count
1304 || (elem->count == max->peel_info.count
1305 && max->peel_info.npeel > elem->npeel))
0822b158 1306 {
1307 max->peel_info.npeel = elem->npeel;
1308 max->peel_info.count = elem->count;
abc9513d 1309 max->peel_info.dr_info = elem->dr_info;
0822b158 1310 }
1311
1312 return 1;
1313}
1314
db72d3bf 1315/* Get the costs of peeling NPEEL iterations for LOOP_VINFO, checking
1316 data access costs for all data refs. If UNKNOWN_MISALIGNMENT is true,
1317 we assume DR0_INFO's misalignment will be zero after peeling. */
0822b158 1318
cd8306bf 1319static void
db72d3bf 1320vect_get_peeling_costs_all_drs (loop_vec_info loop_vinfo,
abc9513d 1321 dr_vec_info *dr0_info,
cd8306bf 1322 unsigned int *inside_cost,
1323 unsigned int *outside_cost,
1324 stmt_vector_for_cost *body_cost_vec,
28d0cd4a 1325 stmt_vector_for_cost *prologue_cost_vec,
5081fac8 1326 unsigned int npeel,
1327 bool unknown_misalignment)
0822b158 1328{
db72d3bf 1329 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
cd8306bf 1330 unsigned i;
1331 data_reference *dr;
0822b158 1332
f1f41a6c 1333 FOR_EACH_VEC_ELT (datarefs, i, dr)
0822b158 1334 {
db72d3bf 1335 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 1336 stmt_vec_info stmt_info = dr_info->stmt;
3bbc3f79 1337 if (!STMT_VINFO_RELEVANT_P (stmt_info))
1338 continue;
1339
0822b158 1340 /* For interleaving, only the alignment of the first access
1341 matters. */
ee612634 1342 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 1343 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1344 continue;
0822b158 1345
d84b8514 1346 /* Strided accesses perform only component accesses, alignment is
1347 irrelevant for them. */
1348 if (STMT_VINFO_STRIDED_P (stmt_info)
1349 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1350 continue;
1351
cd8306bf 1352 int save_misalignment;
abc9513d 1353 save_misalignment = DR_MISALIGNMENT (dr_info);
db755b03 1354 if (npeel == 0)
1355 ;
abc9513d 1356 else if (unknown_misalignment && dr_info == dr0_info)
1357 SET_DR_MISALIGNMENT (dr_info, 0);
cd8306bf 1358 else
abc9513d 1359 vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
1360 vect_get_data_access_cost (dr_info, inside_cost, outside_cost,
28d0cd4a 1361 body_cost_vec, prologue_cost_vec);
abc9513d 1362 SET_DR_MISALIGNMENT (dr_info, save_misalignment);
0822b158 1363 }
cd8306bf 1364}
1365
1366/* Traverse peeling hash table and calculate cost for each peeling option.
1367 Find the one with the lowest cost. */
1368
1369int
1370vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1371 _vect_peel_extended_info *min)
1372{
1373 vect_peel_info elem = *slot;
1374 int dummy;
1375 unsigned int inside_cost = 0, outside_cost = 0;
abc9513d 1376 stmt_vec_info stmt_info = elem->dr_info->stmt;
cd8306bf 1377 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1378 stmt_vector_for_cost prologue_cost_vec, body_cost_vec,
1379 epilogue_cost_vec;
1380
1381 prologue_cost_vec.create (2);
1382 body_cost_vec.create (2);
1383 epilogue_cost_vec.create (2);
1384
db72d3bf 1385 vect_get_peeling_costs_all_drs (loop_vinfo, elem->dr_info, &inside_cost,
1386 &outside_cost, &body_cost_vec,
1387 &prologue_cost_vec, elem->npeel, false);
0822b158 1388
f0f51716 1389 body_cost_vec.release ();
1390
41ae9eb4 1391 outside_cost += vect_get_known_peeling_cost
1392 (loop_vinfo, elem->npeel, &dummy,
2a9a3444 1393 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1394 &prologue_cost_vec, &epilogue_cost_vec);
f97dec81 1395
1396 /* Prologue and epilogue costs are added to the target model later.
1397 These costs depend only on the scalar iteration cost, the
1398 number of peeling iterations finally chosen, and the number of
1399 misaligned statements. So discard the information found here. */
f1f41a6c 1400 prologue_cost_vec.release ();
1401 epilogue_cost_vec.release ();
0822b158 1402
1403 if (inside_cost < min->inside_cost
cd8306bf 1404 || (inside_cost == min->inside_cost
1405 && outside_cost < min->outside_cost))
0822b158 1406 {
1407 min->inside_cost = inside_cost;
1408 min->outside_cost = outside_cost;
abc9513d 1409 min->peel_info.dr_info = elem->dr_info;
0822b158 1410 min->peel_info.npeel = elem->npeel;
cd8306bf 1411 min->peel_info.count = elem->count;
0822b158 1412 }
1413
1414 return 1;
1415}
1416
1417
1418/* Choose best peeling option by traversing peeling hash table and either
1419 choosing an option with the lowest cost (if cost model is enabled) or the
1420 option that aligns as many accesses as possible. */
1421
83786d5e 1422static struct _vect_peel_extended_info
41500e78 1423vect_peeling_hash_choose_best_peeling (hash_table<peel_info_hasher> *peeling_htab,
f0f51716 1424 loop_vec_info loop_vinfo)
0822b158 1425{
1426 struct _vect_peel_extended_info res;
1427
abc9513d 1428 res.peel_info.dr_info = NULL;
0822b158 1429
3e398f5b 1430 if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
0822b158 1431 {
1432 res.inside_cost = INT_MAX;
1433 res.outside_cost = INT_MAX;
41500e78 1434 peeling_htab->traverse <_vect_peel_extended_info *,
1435 vect_peeling_hash_get_lowest_cost> (&res);
0822b158 1436 }
1437 else
1438 {
1439 res.peel_info.count = 0;
41500e78 1440 peeling_htab->traverse <_vect_peel_extended_info *,
1441 vect_peeling_hash_get_most_frequent> (&res);
83786d5e 1442 res.inside_cost = 0;
1443 res.outside_cost = 0;
0822b158 1444 }
1445
83786d5e 1446 return res;
0822b158 1447}
1448
cd8306bf 1449/* Return true if the new peeling NPEEL is supported. */
1450
1451static bool
abc9513d 1452vect_peeling_supportable (loop_vec_info loop_vinfo, dr_vec_info *dr0_info,
cd8306bf 1453 unsigned npeel)
1454{
1455 unsigned i;
1456 struct data_reference *dr = NULL;
1457 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
cd8306bf 1458 enum dr_alignment_support supportable_dr_alignment;
1459
1460 /* Ensure that all data refs can be vectorized after the peel. */
1461 FOR_EACH_VEC_ELT (datarefs, i, dr)
1462 {
1463 int save_misalignment;
1464
abc9513d 1465 if (dr == dr0_info->dr)
cd8306bf 1466 continue;
1467
db72d3bf 1468 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 1469 stmt_vec_info stmt_info = dr_info->stmt;
cd8306bf 1470 /* For interleaving, only the alignment of the first access
1471 matters. */
1472 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 1473 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
cd8306bf 1474 continue;
1475
1476 /* Strided accesses perform only component accesses, alignment is
1477 irrelevant for them. */
1478 if (STMT_VINFO_STRIDED_P (stmt_info)
1479 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
1480 continue;
1481
abc9513d 1482 save_misalignment = DR_MISALIGNMENT (dr_info);
1483 vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
1484 supportable_dr_alignment
1485 = vect_supportable_dr_alignment (dr_info, false);
1486 SET_DR_MISALIGNMENT (dr_info, save_misalignment);
cd8306bf 1487
1488 if (!supportable_dr_alignment)
1489 return false;
1490 }
1491
1492 return true;
1493}
0822b158 1494
fb85abff 1495/* Function vect_enhance_data_refs_alignment
1496
1497 This pass will use loop versioning and loop peeling in order to enhance
1498 the alignment of data references in the loop.
1499
1500 FOR NOW: we assume that whatever versioning/peeling takes place, only the
282bf14c 1501 original loop is to be vectorized. Any other loops that are created by
fb85abff 1502 the transformations performed in this pass - are not supposed to be
282bf14c 1503 vectorized. This restriction will be relaxed.
fb85abff 1504
1505 This pass will require a cost model to guide it whether to apply peeling
282bf14c 1506 or versioning or a combination of the two. For example, the scheme that
fb85abff 1507 intel uses when given a loop with several memory accesses, is as follows:
1508 choose one memory access ('p') which alignment you want to force by doing
282bf14c 1509 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
fb85abff 1510 other accesses are not necessarily aligned, or (2) use loop versioning to
1511 generate one loop in which all accesses are aligned, and another loop in
1512 which only 'p' is necessarily aligned.
1513
1514 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1515 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1516 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1517
282bf14c 1518 Devising a cost model is the most critical aspect of this work. It will
fb85abff 1519 guide us on which access to peel for, whether to use loop versioning, how
282bf14c 1520 many versions to create, etc. The cost model will probably consist of
fb85abff 1521 generic considerations as well as target specific considerations (on
1522 powerpc for example, misaligned stores are more painful than misaligned
1523 loads).
1524
1525 Here are the general steps involved in alignment enhancements:
1526
1527 -- original loop, before alignment analysis:
1528 for (i=0; i<N; i++){
1529 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1530 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1531 }
1532
1533 -- After vect_compute_data_refs_alignment:
1534 for (i=0; i<N; i++){
1535 x = q[i]; # DR_MISALIGNMENT(q) = 3
1536 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1537 }
1538
1539 -- Possibility 1: we do loop versioning:
1540 if (p is aligned) {
1541 for (i=0; i<N; i++){ # loop 1A
1542 x = q[i]; # DR_MISALIGNMENT(q) = 3
1543 p[i] = y; # DR_MISALIGNMENT(p) = 0
1544 }
1545 }
1546 else {
1547 for (i=0; i<N; i++){ # loop 1B
1548 x = q[i]; # DR_MISALIGNMENT(q) = 3
1549 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1550 }
1551 }
1552
1553 -- Possibility 2: we do loop peeling:
1554 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1555 x = q[i];
1556 p[i] = y;
1557 }
1558 for (i = 3; i < N; i++){ # loop 2A
1559 x = q[i]; # DR_MISALIGNMENT(q) = 0
1560 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1561 }
1562
1563 -- Possibility 3: combination of loop peeling and versioning:
1564 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1565 x = q[i];
1566 p[i] = y;
1567 }
1568 if (p is aligned) {
1569 for (i = 3; i<N; i++){ # loop 3A
1570 x = q[i]; # DR_MISALIGNMENT(q) = 0
1571 p[i] = y; # DR_MISALIGNMENT(p) = 0
1572 }
1573 }
1574 else {
1575 for (i = 3; i<N; i++){ # loop 3B
1576 x = q[i]; # DR_MISALIGNMENT(q) = 0
1577 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1578 }
1579 }
1580
282bf14c 1581 These loops are later passed to loop_transform to be vectorized. The
fb85abff 1582 vectorizer will use the alignment information to guide the transformation
1583 (whether to generate regular loads/stores, or with special handling for
1584 misalignment). */
1585
ed9370cc 1586opt_result
fb85abff 1587vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1588{
f1f41a6c 1589 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
fb85abff 1590 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5081fac8 1591 enum dr_alignment_support supportable_dr_alignment;
abc9513d 1592 dr_vec_info *first_store = NULL;
1593 dr_vec_info *dr0_info = NULL;
fb85abff 1594 struct data_reference *dr;
0822b158 1595 unsigned int i, j;
fb85abff 1596 bool do_peeling = false;
1597 bool do_versioning = false;
0822b158 1598 unsigned int npeel = 0;
83786d5e 1599 bool one_misalignment_known = false;
1600 bool one_misalignment_unknown = false;
5081fac8 1601 bool one_dr_unsupportable = false;
abc9513d 1602 dr_vec_info *unsupportable_dr_info = NULL;
d75596cd 1603 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
0822b158 1604 unsigned possible_npeel_number = 1;
1605 tree vectype;
d75596cd 1606 unsigned int mis, same_align_drs_max = 0;
41500e78 1607 hash_table<peel_info_hasher> peeling_htab (1);
fb85abff 1608
88f6eb8f 1609 DUMP_VECT_SCOPE ("vect_enhance_data_refs_alignment");
fb85abff 1610
00ecf4da 1611 /* Reset data so we can safely be called multiple times. */
1612 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
1613 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = 0;
1614
fb85abff 1615 /* While cost model enhancements are expected in the future, the high level
1616 view of the code at this time is as follows:
1617
ec2886ed 1618 A) If there is a misaligned access then see if peeling to align
1619 this access can make all data references satisfy
454f25be 1620 vect_supportable_dr_alignment. If so, update data structures
1621 as needed and return true.
fb85abff 1622
1623 B) If peeling wasn't possible and there is a data reference with an
1624 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1625 then see if loop versioning checks can be used to make all data
1626 references satisfy vect_supportable_dr_alignment. If so, update
1627 data structures as needed and return true.
1628
1629 C) If neither peeling nor versioning were successful then return false if
1630 any data reference does not satisfy vect_supportable_dr_alignment.
1631
1632 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1633
1634 Note, Possibility 3 above (which is peeling and versioning together) is not
1635 being done at this time. */
1636
1637 /* (1) Peeling to force alignment. */
1638
1639 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1640 Considerations:
1641 + How many accesses will become aligned due to the peeling
1642 - How many accesses will become unaligned due to the peeling,
1643 and the cost of misaligned accesses.
48e1416a 1644 - The cost of peeling (the extra runtime checks, the increase
0822b158 1645 in code size). */
fb85abff 1646
f1f41a6c 1647 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 1648 {
db72d3bf 1649 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 1650 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 1651
1ad41595 1652 if (!STMT_VINFO_RELEVANT_P (stmt_info))
b04940e7 1653 continue;
1654
fb85abff 1655 /* For interleaving, only the alignment of the first access
1656 matters. */
ee612634 1657 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 1658 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info)
1659 continue;
fb85abff 1660
fa681b45 1661 /* For scatter-gather or invariant accesses there is nothing
1662 to enhance. */
1663 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info)
1664 || integer_zerop (DR_STEP (dr)))
b04940e7 1665 continue;
1666
e1c75243 1667 /* Strided accesses perform only component accesses, alignment is
f634c3e9 1668 irrelevant for them. */
e1c75243 1669 if (STMT_VINFO_STRIDED_P (stmt_info)
994be998 1670 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
f634c3e9 1671 continue;
1672
abc9513d 1673 supportable_dr_alignment = vect_supportable_dr_alignment (dr_info, true);
1674 do_peeling = vector_alignment_reachable_p (dr_info);
0822b158 1675 if (do_peeling)
fb85abff 1676 {
abc9513d 1677 if (known_alignment_for_access_p (dr_info))
0822b158 1678 {
aec313e5 1679 unsigned int npeel_tmp = 0;
f1b8c740 1680 bool negative = tree_int_cst_compare (DR_STEP (dr),
1681 size_zero_node) < 0;
0822b158 1682
aec313e5 1683 vectype = STMT_VINFO_VECTYPE (stmt_info);
e092c20e 1684 /* If known_alignment_for_access_p then we have set
1685 DR_MISALIGNMENT which is only done if we know it at compiler
1686 time, so it is safe to assume target alignment is constant.
1687 */
1688 unsigned int target_align =
1689 DR_TARGET_ALIGNMENT (dr_info).to_constant ();
abc9513d 1690 unsigned int dr_size = vect_get_scalar_dr_size (dr_info);
1691 mis = (negative
1692 ? DR_MISALIGNMENT (dr_info)
1693 : -DR_MISALIGNMENT (dr_info));
1694 if (DR_MISALIGNMENT (dr_info) != 0)
aec313e5 1695 npeel_tmp = (mis & (target_align - 1)) / dr_size;
0822b158 1696
1697 /* For multiple types, it is possible that the bigger type access
282bf14c 1698 will have more than one peeling option. E.g., a loop with two
0822b158 1699 types: one of size (vector size / 4), and the other one of
282bf14c 1700 size (vector size / 8). Vectorization factor will 8. If both
df8e9f7a 1701 accesses are misaligned by 3, the first one needs one scalar
282bf14c 1702 iteration to be aligned, and the second one needs 5. But the
4bec4fee 1703 first one will be aligned also by peeling 5 scalar
0822b158 1704 iterations, and in that case both accesses will be aligned.
1705 Hence, except for the immediate peeling amount, we also want
1706 to try to add full vector size, while we don't exceed
1707 vectorization factor.
df8e9f7a 1708 We do this automatically for cost model, since we calculate
1709 cost for every peeling option. */
3e398f5b 1710 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
c1bee668 1711 {
d75596cd 1712 poly_uint64 nscalars = (STMT_SLP_TYPE (stmt_info)
e1009321 1713 ? vf * DR_GROUP_SIZE (stmt_info) : vf);
d75596cd 1714 possible_npeel_number
1715 = vect_get_num_vectors (nscalars, vectype);
0822b158 1716
5081fac8 1717 /* NPEEL_TMP is 0 when there is no misalignment, but also
1718 allow peeling NELEMENTS. */
abc9513d 1719 if (DR_MISALIGNMENT (dr_info) == 0)
df8e9f7a 1720 possible_npeel_number++;
1721 }
0822b158 1722
df8e9f7a 1723 /* Save info about DR in the hash table. Also include peeling
1724 amounts according to the explanation above. */
0822b158 1725 for (j = 0; j < possible_npeel_number; j++)
1726 {
41500e78 1727 vect_peeling_hash_insert (&peeling_htab, loop_vinfo,
abc9513d 1728 dr_info, npeel_tmp);
aec313e5 1729 npeel_tmp += target_align / dr_size;
0822b158 1730 }
1731
83786d5e 1732 one_misalignment_known = true;
0822b158 1733 }
1734 else
1735 {
6046367e 1736 /* If we don't know any misalignment values, we prefer
1737 peeling for data-ref that has the maximum number of data-refs
0822b158 1738 with the same alignment, unless the target prefers to align
1739 stores over load. */
83786d5e 1740 unsigned same_align_drs
1741 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
abc9513d 1742 if (!dr0_info
83786d5e 1743 || same_align_drs_max < same_align_drs)
1744 {
1745 same_align_drs_max = same_align_drs;
abc9513d 1746 dr0_info = dr_info;
83786d5e 1747 }
1748 /* For data-refs with the same number of related
1749 accesses prefer the one where the misalign
1750 computation will be invariant in the outermost loop. */
1751 else if (same_align_drs_max == same_align_drs)
1752 {
1753 struct loop *ivloop0, *ivloop;
1754 ivloop0 = outermost_invariant_loop_for_expr
abc9513d 1755 (loop, DR_BASE_ADDRESS (dr0_info->dr));
83786d5e 1756 ivloop = outermost_invariant_loop_for_expr
1757 (loop, DR_BASE_ADDRESS (dr));
1758 if ((ivloop && !ivloop0)
1759 || (ivloop && ivloop0
1760 && flow_loop_nested_p (ivloop, ivloop0)))
abc9513d 1761 dr0_info = dr_info;
83786d5e 1762 }
0822b158 1763
5081fac8 1764 one_misalignment_unknown = true;
1765
1766 /* Check for data refs with unsupportable alignment that
1767 can be peeled. */
1768 if (!supportable_dr_alignment)
1769 {
1770 one_dr_unsupportable = true;
abc9513d 1771 unsupportable_dr_info = dr_info;
5081fac8 1772 }
1773
83786d5e 1774 if (!first_store && DR_IS_WRITE (dr))
abc9513d 1775 first_store = dr_info;
0822b158 1776 }
1777 }
1778 else
1779 {
abc9513d 1780 if (!aligned_access_p (dr_info))
0822b158 1781 {
6d8fb6cf 1782 if (dump_enabled_p ())
78bb46f5 1783 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1784 "vector alignment may not be reachable\n");
0822b158 1785 break;
1786 }
1787 }
fb85abff 1788 }
1789
2cd0995e 1790 /* Check if we can possibly peel the loop. */
1791 if (!vect_can_advance_ivs_p (loop_vinfo)
5ee742c4 1792 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop))
1793 || loop->inner)
fb85abff 1794 do_peeling = false;
1795
b565a9ba 1796 struct _vect_peel_extended_info peel_for_known_alignment;
1797 struct _vect_peel_extended_info peel_for_unknown_alignment;
1798 struct _vect_peel_extended_info best_peel;
1799
1800 peel_for_unknown_alignment.inside_cost = INT_MAX;
1801 peel_for_unknown_alignment.outside_cost = INT_MAX;
1802 peel_for_unknown_alignment.peel_info.count = 0;
83786d5e 1803
192f7876 1804 if (do_peeling
b565a9ba 1805 && one_misalignment_unknown)
0822b158 1806 {
0822b158 1807 /* Check if the target requires to prefer stores over loads, i.e., if
1808 misaligned stores are more expensive than misaligned loads (taking
1809 drs with same alignment into account). */
b565a9ba 1810 unsigned int load_inside_cost = 0;
1811 unsigned int load_outside_cost = 0;
1812 unsigned int store_inside_cost = 0;
1813 unsigned int store_outside_cost = 0;
d75596cd 1814 unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
b565a9ba 1815
1816 stmt_vector_for_cost dummy;
1817 dummy.create (2);
db72d3bf 1818 vect_get_peeling_costs_all_drs (loop_vinfo, dr0_info,
b565a9ba 1819 &load_inside_cost,
1820 &load_outside_cost,
28d0cd4a 1821 &dummy, &dummy, estimated_npeels, true);
b565a9ba 1822 dummy.release ();
1823
1824 if (first_store)
1825 {
83786d5e 1826 dummy.create (2);
db72d3bf 1827 vect_get_peeling_costs_all_drs (loop_vinfo, first_store,
83786d5e 1828 &store_inside_cost,
1829 &store_outside_cost,
28d0cd4a 1830 &dummy, &dummy,
1831 estimated_npeels, true);
f1f41a6c 1832 dummy.release ();
b565a9ba 1833 }
1834 else
1835 {
1836 store_inside_cost = INT_MAX;
1837 store_outside_cost = INT_MAX;
1838 }
0822b158 1839
b565a9ba 1840 if (load_inside_cost > store_inside_cost
1841 || (load_inside_cost == store_inside_cost
1842 && load_outside_cost > store_outside_cost))
1843 {
abc9513d 1844 dr0_info = first_store;
b565a9ba 1845 peel_for_unknown_alignment.inside_cost = store_inside_cost;
1846 peel_for_unknown_alignment.outside_cost = store_outside_cost;
1847 }
1848 else
1849 {
1850 peel_for_unknown_alignment.inside_cost = load_inside_cost;
1851 peel_for_unknown_alignment.outside_cost = load_outside_cost;
1852 }
83786d5e 1853
b565a9ba 1854 stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
1855 prologue_cost_vec.create (2);
1856 epilogue_cost_vec.create (2);
83786d5e 1857
b565a9ba 1858 int dummy2;
1859 peel_for_unknown_alignment.outside_cost += vect_get_known_peeling_cost
d75596cd 1860 (loop_vinfo, estimated_npeels, &dummy2,
b565a9ba 1861 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1862 &prologue_cost_vec, &epilogue_cost_vec);
83786d5e 1863
b565a9ba 1864 prologue_cost_vec.release ();
1865 epilogue_cost_vec.release ();
0822b158 1866
b565a9ba 1867 peel_for_unknown_alignment.peel_info.count = 1
abc9513d 1868 + STMT_VINFO_SAME_ALIGN_REFS (dr0_info->stmt).length ();
0822b158 1869 }
1870
b565a9ba 1871 peel_for_unknown_alignment.peel_info.npeel = 0;
abc9513d 1872 peel_for_unknown_alignment.peel_info.dr_info = dr0_info;
b565a9ba 1873
1874 best_peel = peel_for_unknown_alignment;
1875
83786d5e 1876 peel_for_known_alignment.inside_cost = INT_MAX;
1877 peel_for_known_alignment.outside_cost = INT_MAX;
1878 peel_for_known_alignment.peel_info.count = 0;
abc9513d 1879 peel_for_known_alignment.peel_info.dr_info = NULL;
83786d5e 1880
1881 if (do_peeling && one_misalignment_known)
0822b158 1882 {
1883 /* Peeling is possible, but there is no data access that is not supported
b565a9ba 1884 unless aligned. So we try to choose the best possible peeling from
1885 the hash table. */
83786d5e 1886 peel_for_known_alignment = vect_peeling_hash_choose_best_peeling
f0f51716 1887 (&peeling_htab, loop_vinfo);
0822b158 1888 }
1889
83786d5e 1890 /* Compare costs of peeling for known and unknown alignment. */
abc9513d 1891 if (peel_for_known_alignment.peel_info.dr_info != NULL
b565a9ba 1892 && peel_for_unknown_alignment.inside_cost
1893 >= peel_for_known_alignment.inside_cost)
5081fac8 1894 {
1895 best_peel = peel_for_known_alignment;
b565a9ba 1896
5081fac8 1897 /* If the best peeling for known alignment has NPEEL == 0, perform no
1898 peeling at all except if there is an unsupportable dr that we can
1899 align. */
1900 if (best_peel.peel_info.npeel == 0 && !one_dr_unsupportable)
1901 do_peeling = false;
1902 }
b565a9ba 1903
5081fac8 1904 /* If there is an unsupportable data ref, prefer this over all choices so far
1905 since we'd have to discard a chosen peeling except when it accidentally
1906 aligned the unsupportable data ref. */
1907 if (one_dr_unsupportable)
abc9513d 1908 dr0_info = unsupportable_dr_info;
5081fac8 1909 else if (do_peeling)
1910 {
db755b03 1911 /* Calculate the penalty for no peeling, i.e. leaving everything as-is.
f0f51716 1912 TODO: Use nopeel_outside_cost or get rid of it? */
5081fac8 1913 unsigned nopeel_inside_cost = 0;
1914 unsigned nopeel_outside_cost = 0;
b565a9ba 1915
5081fac8 1916 stmt_vector_for_cost dummy;
1917 dummy.create (2);
db72d3bf 1918 vect_get_peeling_costs_all_drs (loop_vinfo, NULL, &nopeel_inside_cost,
28d0cd4a 1919 &nopeel_outside_cost, &dummy, &dummy,
1920 0, false);
5081fac8 1921 dummy.release ();
b565a9ba 1922
5081fac8 1923 /* Add epilogue costs. As we do not peel for alignment here, no prologue
1924 costs will be recorded. */
1925 stmt_vector_for_cost prologue_cost_vec, epilogue_cost_vec;
1926 prologue_cost_vec.create (2);
1927 epilogue_cost_vec.create (2);
b565a9ba 1928
5081fac8 1929 int dummy2;
1930 nopeel_outside_cost += vect_get_known_peeling_cost
1931 (loop_vinfo, 0, &dummy2,
1932 &LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1933 &prologue_cost_vec, &epilogue_cost_vec);
1934
1935 prologue_cost_vec.release ();
1936 epilogue_cost_vec.release ();
b565a9ba 1937
5081fac8 1938 npeel = best_peel.peel_info.npeel;
abc9513d 1939 dr0_info = best_peel.peel_info.dr_info;
83786d5e 1940
5081fac8 1941 /* If no peeling is not more expensive than the best peeling we
1942 have so far, don't perform any peeling. */
1943 if (nopeel_inside_cost <= best_peel.inside_cost)
1944 do_peeling = false;
1945 }
83786d5e 1946
fb85abff 1947 if (do_peeling)
1948 {
abc9513d 1949 stmt_vec_info stmt_info = dr0_info->stmt;
0822b158 1950 vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 1951
abc9513d 1952 if (known_alignment_for_access_p (dr0_info))
fb85abff 1953 {
abc9513d 1954 bool negative = tree_int_cst_compare (DR_STEP (dr0_info->dr),
f1b8c740 1955 size_zero_node) < 0;
0822b158 1956 if (!npeel)
1957 {
1958 /* Since it's known at compile time, compute the number of
1959 iterations in the peeled loop (the peeling factor) for use in
1960 updating DR_MISALIGNMENT values. The peeling factor is the
1961 vectorization factor minus the misalignment as an element
1962 count. */
abc9513d 1963 mis = (negative
1964 ? DR_MISALIGNMENT (dr0_info)
1965 : -DR_MISALIGNMENT (dr0_info));
e092c20e 1966 /* If known_alignment_for_access_p then we have set
1967 DR_MISALIGNMENT which is only done if we know it at compiler
1968 time, so it is safe to assume target alignment is constant.
1969 */
1970 unsigned int target_align =
1971 DR_TARGET_ALIGNMENT (dr0_info).to_constant ();
aec313e5 1972 npeel = ((mis & (target_align - 1))
abc9513d 1973 / vect_get_scalar_dr_size (dr0_info));
0822b158 1974 }
fb85abff 1975
48e1416a 1976 /* For interleaved data access every iteration accesses all the
fb85abff 1977 members of the group, therefore we divide the number of iterations
1978 by the group size. */
ee612634 1979 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
e1009321 1980 npeel /= DR_GROUP_SIZE (stmt_info);
fb85abff 1981
6d8fb6cf 1982 if (dump_enabled_p ())
7bd765d4 1983 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1984 "Try peeling by %d\n", npeel);
fb85abff 1985 }
1986
cd8306bf 1987 /* Ensure that all datarefs can be vectorized after the peel. */
abc9513d 1988 if (!vect_peeling_supportable (loop_vinfo, dr0_info, npeel))
cd8306bf 1989 do_peeling = false;
fb85abff 1990
cd8306bf 1991 /* Check if all datarefs are supportable and log. */
abc9513d 1992 if (do_peeling && known_alignment_for_access_p (dr0_info) && npeel == 0)
0822b158 1993 {
ed9370cc 1994 opt_result stat = vect_verify_datarefs_alignment (loop_vinfo);
0822b158 1995 if (!stat)
1996 do_peeling = false;
1997 else
f0f51716 1998 return stat;
0822b158 1999 }
2000
eb10b471 2001 /* Cost model #1 - honor --param vect-max-peeling-for-alignment. */
d7d7032a 2002 if (do_peeling)
2003 {
2004 unsigned max_allowed_peel
2005 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
2006 if (max_allowed_peel != (unsigned)-1)
2007 {
2008 unsigned max_peel = npeel;
2009 if (max_peel == 0)
2010 {
e092c20e 2011 poly_uint64 target_align = DR_TARGET_ALIGNMENT (dr0_info);
2012 unsigned HOST_WIDE_INT target_align_c;
2013 if (target_align.is_constant (&target_align_c))
2014 max_peel =
2015 target_align_c / vect_get_scalar_dr_size (dr0_info) - 1;
2016 else
2017 {
2018 do_peeling = false;
2019 if (dump_enabled_p ())
2020 dump_printf_loc (MSG_NOTE, vect_location,
2021 "Disable peeling, max peels set and vector"
2022 " alignment unknown\n");
2023 }
d7d7032a 2024 }
2025 if (max_peel > max_allowed_peel)
2026 {
2027 do_peeling = false;
2028 if (dump_enabled_p ())
2029 dump_printf_loc (MSG_NOTE, vect_location,
2030 "Disable peeling, max peels reached: %d\n", max_peel);
2031 }
2032 }
2033 }
2034
eb10b471 2035 /* Cost model #2 - if peeling may result in a remaining loop not
d75596cd 2036 iterating enough to be vectorized then do not peel. Since this
2037 is a cost heuristic rather than a correctness decision, use the
2038 most likely runtime value for variable vectorization factors. */
eb10b471 2039 if (do_peeling
2040 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
2041 {
d75596cd 2042 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
2043 unsigned int max_peel = npeel == 0 ? assumed_vf - 1 : npeel;
2044 if ((unsigned HOST_WIDE_INT) LOOP_VINFO_INT_NITERS (loop_vinfo)
2045 < assumed_vf + max_peel)
eb10b471 2046 do_peeling = false;
2047 }
2048
fb85abff 2049 if (do_peeling)
2050 {
2051 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
2052 If the misalignment of DR_i is identical to that of dr0 then set
2053 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
2054 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
2055 by the peeling factor times the element size of DR_i (MOD the
2056 vectorization factor times the size). Otherwise, the
2057 misalignment of DR_i must be set to unknown. */
f1f41a6c 2058 FOR_EACH_VEC_ELT (datarefs, i, dr)
abc9513d 2059 if (dr != dr0_info->dr)
1ca1d9b2 2060 {
2061 /* Strided accesses perform only component accesses, alignment
2062 is irrelevant for them. */
db72d3bf 2063 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 2064 stmt_info = dr_info->stmt;
1ca1d9b2 2065 if (STMT_VINFO_STRIDED_P (stmt_info)
2066 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
2067 continue;
2068
abc9513d 2069 vect_update_misalignment_for_peel (dr_info, dr0_info, npeel);
1ca1d9b2 2070 }
fb85abff 2071
ec5bf0fb 2072 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0_info;
0822b158 2073 if (npeel)
313a5120 2074 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
0822b158 2075 else
313a5120 2076 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
abc9513d 2077 = DR_MISALIGNMENT (dr0_info);
2078 SET_DR_MISALIGNMENT (dr0_info, 0);
6d8fb6cf 2079 if (dump_enabled_p ())
7bd765d4 2080 {
2081 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2082 "Alignment of access forced using peeling.\n");
7bd765d4 2083 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2084 "Peeling for alignment will be applied.\n");
7bd765d4 2085 }
f0f51716 2086
e4eca2de 2087 /* The inside-loop cost will be accounted for in vectorizable_load
2088 and vectorizable_store correctly with adjusted alignments.
2089 Drop the body_cst_vec on the floor here. */
ed9370cc 2090 opt_result stat = vect_verify_datarefs_alignment (loop_vinfo);
fb85abff 2091 gcc_assert (stat);
2092 return stat;
2093 }
2094 }
2095
fb85abff 2096 /* (2) Versioning to force alignment. */
2097
2098 /* Try versioning if:
1dbf9bd1 2099 1) optimize loop for speed
2100 2) there is at least one unsupported misaligned data ref with an unknown
fb85abff 2101 misalignment, and
1dbf9bd1 2102 3) all misaligned data refs with a known misalignment are supported, and
2103 4) the number of runtime alignment checks is within reason. */
fb85abff 2104
48e1416a 2105 do_versioning =
1dbf9bd1 2106 optimize_loop_nest_for_speed_p (loop)
fb85abff 2107 && (!loop->inner); /* FORNOW */
2108
2109 if (do_versioning)
2110 {
f1f41a6c 2111 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 2112 {
db72d3bf 2113 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
abc9513d 2114 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 2115
2116 /* For interleaving, only the alignment of the first access
2117 matters. */
abc9513d 2118 if (aligned_access_p (dr_info)
ee612634 2119 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
0219dc42 2120 && DR_GROUP_FIRST_ELEMENT (stmt_info) != stmt_info))
fb85abff 2121 continue;
2122
e1c75243 2123 if (STMT_VINFO_STRIDED_P (stmt_info))
994be998 2124 {
2125 /* Strided loads perform only component accesses, alignment is
2126 irrelevant for them. */
2127 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
2128 continue;
2129 do_versioning = false;
2130 break;
2131 }
f634c3e9 2132
abc9513d 2133 supportable_dr_alignment
2134 = vect_supportable_dr_alignment (dr_info, false);
fb85abff 2135
2136 if (!supportable_dr_alignment)
2137 {
fb85abff 2138 int mask;
2139 tree vectype;
2140
abc9513d 2141 if (known_alignment_for_access_p (dr_info)
f1f41a6c 2142 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
fb85abff 2143 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
2144 {
2145 do_versioning = false;
2146 break;
2147 }
2148
0219dc42 2149 vectype = STMT_VINFO_VECTYPE (stmt_info);
2150 gcc_assert (vectype);
48e1416a 2151
52acb7ae 2152 /* At present we don't support versioning for alignment
2153 with variable VF, since there's no guarantee that the
2154 VF is a power of two. We could relax this if we added
2155 a way of enforcing a power-of-two size. */
2156 unsigned HOST_WIDE_INT size;
2157 if (!GET_MODE_SIZE (TYPE_MODE (vectype)).is_constant (&size))
2158 {
2159 do_versioning = false;
2160 break;
2161 }
2162
dff96e64 2163 /* Forcing alignment in the first iteration is no good if
2164 we don't keep it across iterations. For now, just disable
2165 versioning in this case.
2588e836 2166 ?? We could actually unroll the loop to achieve the required
2167 overall step alignment, and forcing the alignment could be
dff96e64 2168 done by doing some iterations of the non-vectorized loop. */
2169 if (!multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
2170 * DR_STEP_ALIGNMENT (dr),
2588e836 2171 DR_TARGET_ALIGNMENT (dr_info)))
dff96e64 2172 {
2173 do_versioning = false;
2174 break;
2175 }
2176
fb85abff 2177 /* The rightmost bits of an aligned address must be zeros.
2178 Construct the mask needed for this test. For example,
2179 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
2180 mask must be 15 = 0xf. */
52acb7ae 2181 mask = size - 1;
fb85abff 2182
2183 /* FORNOW: use the same mask to test all potentially unaligned
2184 references in the loop. The vectorizer currently supports
2185 a single vector size, see the reference to
2186 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
2187 vectorization factor is computed. */
2188 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
2189 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
2190 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
0219dc42 2191 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (stmt_info);
fb85abff 2192 }
2193 }
48e1416a 2194
fb85abff 2195 /* Versioning requires at least one misaligned data reference. */
10095225 2196 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
fb85abff 2197 do_versioning = false;
2198 else if (!do_versioning)
f1f41a6c 2199 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
fb85abff 2200 }
2201
2202 if (do_versioning)
2203 {
ab98e625 2204 vec<stmt_vec_info> may_misalign_stmts
fb85abff 2205 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
ab98e625 2206 stmt_vec_info stmt_info;
fb85abff 2207
2208 /* It can now be assumed that the data references in the statements
2209 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
2210 of the loop being vectorized. */
ab98e625 2211 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
fb85abff 2212 {
abc9513d 2213 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
2214 SET_DR_MISALIGNMENT (dr_info, 0);
6d8fb6cf 2215 if (dump_enabled_p ())
78bb46f5 2216 dump_printf_loc (MSG_NOTE, vect_location,
2217 "Alignment of access forced using versioning.\n");
fb85abff 2218 }
2219
6d8fb6cf 2220 if (dump_enabled_p ())
78bb46f5 2221 dump_printf_loc (MSG_NOTE, vect_location,
2222 "Versioning for alignment will be applied.\n");
fb85abff 2223
2224 /* Peeling and versioning can't be done together at this time. */
2225 gcc_assert (! (do_peeling && do_versioning));
2226
ed9370cc 2227 opt_result stat = vect_verify_datarefs_alignment (loop_vinfo);
fb85abff 2228 gcc_assert (stat);
2229 return stat;
2230 }
2231
2232 /* This point is reached if neither peeling nor versioning is being done. */
2233 gcc_assert (! (do_peeling || do_versioning));
2234
ed9370cc 2235 opt_result stat = vect_verify_datarefs_alignment (loop_vinfo);
fb85abff 2236 return stat;
2237}
2238
2239
91a74fc6 2240/* Function vect_find_same_alignment_drs.
2241
db72d3bf 2242 Update group and alignment relations in VINFO according to the chosen
91a74fc6 2243 vectorization factor. */
2244
2245static void
db72d3bf 2246vect_find_same_alignment_drs (vec_info *vinfo, data_dependence_relation *ddr)
91a74fc6 2247{
91a74fc6 2248 struct data_reference *dra = DDR_A (ddr);
2249 struct data_reference *drb = DDR_B (ddr);
db72d3bf 2250 dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
2251 dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
abc9513d 2252 stmt_vec_info stmtinfo_a = dr_info_a->stmt;
2253 stmt_vec_info stmtinfo_b = dr_info_b->stmt;
91a74fc6 2254
2255 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
2256 return;
2257
0822b158 2258 if (dra == drb)
91a74fc6 2259 return;
2260
fa681b45 2261 if (STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a)
2262 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
2263 return;
2264
4f372c2c 2265 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
7d4e73a6 2266 || !operand_equal_p (DR_OFFSET (dra), DR_OFFSET (drb), 0)
2267 || !operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
91a74fc6 2268 return;
2269
7d4e73a6 2270 /* Two references with distance zero have the same alignment. */
c4d25d8a 2271 poly_offset_int diff = (wi::to_poly_offset (DR_INIT (dra))
2272 - wi::to_poly_offset (DR_INIT (drb)));
2273 if (maybe_ne (diff, 0))
91a74fc6 2274 {
7d4e73a6 2275 /* Get the wider of the two alignments. */
e092c20e 2276 poly_uint64 align_a =
2277 exact_div (vect_calculate_target_alignment (dr_info_a),
2278 BITS_PER_UNIT);
2279 poly_uint64 align_b =
2280 exact_div (vect_calculate_target_alignment (dr_info_b),
2281 BITS_PER_UNIT);
2282 unsigned HOST_WIDE_INT align_a_c, align_b_c;
2283 if (!align_a.is_constant (&align_a_c)
2284 || !align_b.is_constant (&align_b_c))
2285 return;
2286
2287 unsigned HOST_WIDE_INT max_align = MAX (align_a_c, align_b_c);
7d4e73a6 2288
2289 /* Require the gap to be a multiple of the larger vector alignment. */
c4d25d8a 2290 if (!multiple_p (diff, max_align))
7d4e73a6 2291 return;
2292 }
91a74fc6 2293
7d4e73a6 2294 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
2295 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
2296 if (dump_enabled_p ())
a4e972e3 2297 dump_printf_loc (MSG_NOTE, vect_location,
2298 "accesses have the same alignment: %T and %T\n",
2299 DR_REF (dra), DR_REF (drb));
91a74fc6 2300}
2301
2302
fb85abff 2303/* Function vect_analyze_data_refs_alignment
2304
2305 Analyze the alignment of the data-references in the loop.
2306 Return FALSE if a data reference is found that cannot be vectorized. */
2307
ed9370cc 2308opt_result
2f6fec15 2309vect_analyze_data_refs_alignment (loop_vec_info vinfo)
fb85abff 2310{
88f6eb8f 2311 DUMP_VECT_SCOPE ("vect_analyze_data_refs_alignment");
fb85abff 2312
91a74fc6 2313 /* Mark groups of data references with same alignment using
2314 data dependence information. */
a99aba41 2315 vec<ddr_p> ddrs = vinfo->shared->ddrs;
2f6fec15 2316 struct data_dependence_relation *ddr;
2317 unsigned int i;
2318
2319 FOR_EACH_VEC_ELT (ddrs, i, ddr)
db72d3bf 2320 vect_find_same_alignment_drs (vinfo, ddr);
2f6fec15 2321
a99aba41 2322 vec<data_reference_p> datarefs = vinfo->shared->datarefs;
2f6fec15 2323 struct data_reference *dr;
2324
4f372c2c 2325 vect_record_base_alignments (vinfo);
2f6fec15 2326 FOR_EACH_VEC_ELT (datarefs, i, dr)
91a74fc6 2327 {
db72d3bf 2328 dr_vec_info *dr_info = vinfo->lookup_dr (dr);
abc9513d 2329 if (STMT_VINFO_VECTORIZABLE (dr_info->stmt))
2330 vect_compute_data_ref_alignment (dr_info);
91a74fc6 2331 }
2332
ed9370cc 2333 return opt_result::success ();
2f6fec15 2334}
2335
2336
2337/* Analyze alignment of DRs of stmts in NODE. */
2338
2339static bool
2340vect_slp_analyze_and_verify_node_alignment (slp_tree node)
2341{
f6593f36 2342 /* We vectorize from the first scalar stmt in the node unless
2343 the node is permuted in which case we start from the first
2344 element in the group. */
06bb64b8 2345 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
abc9513d 2346 dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
f6593f36 2347 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
cd24aa3c 2348 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
f6593f36 2349
abc9513d 2350 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
2351 vect_compute_data_ref_alignment (dr_info);
fa681b45 2352 /* For creating the data-ref pointer we need alignment of the
2353 first element anyway. */
abc9513d 2354 if (dr_info != first_dr_info)
2355 vect_compute_data_ref_alignment (first_dr_info);
2356 if (! verify_data_ref_alignment (dr_info))
fb85abff 2357 {
f6593f36 2358 if (dump_enabled_p ())
2359 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2360 "not vectorized: bad data alignment in basic "
2361 "block.\n");
2362 return false;
fb85abff 2363 }
2364
2365 return true;
2366}
2367
2f6fec15 2368/* Function vect_slp_analyze_instance_alignment
2369
2370 Analyze the alignment of the data-references in the SLP instance.
2371 Return FALSE if a data reference is found that cannot be vectorized. */
2372
2373bool
2374vect_slp_analyze_and_verify_instance_alignment (slp_instance instance)
2375{
88f6eb8f 2376 DUMP_VECT_SCOPE ("vect_slp_analyze_and_verify_instance_alignment");
2f6fec15 2377
2378 slp_tree node;
2379 unsigned i;
2380 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
2381 if (! vect_slp_analyze_and_verify_node_alignment (node))
2382 return false;
2383
2384 node = SLP_INSTANCE_TREE (instance);
06bb64b8 2385 if (STMT_VINFO_DATA_REF (SLP_TREE_SCALAR_STMTS (node)[0])
2f6fec15 2386 && ! vect_slp_analyze_and_verify_node_alignment
2387 (SLP_INSTANCE_TREE (instance)))
2388 return false;
2389
2390 return true;
2391}
2392
fb85abff 2393
abc9513d 2394/* Analyze groups of accesses: check that DR_INFO belongs to a group of
ee612634 2395 accesses of legal size, step, etc. Detect gaps, single element
2396 interleaving, and other special cases. Set grouped access info.
39e23eaa 2397 Collect groups of strided stores for further use in SLP analysis.
2398 Worker for vect_analyze_group_access. */
fb85abff 2399
2400static bool
abc9513d 2401vect_analyze_group_access_1 (dr_vec_info *dr_info)
fb85abff 2402{
abc9513d 2403 data_reference *dr = dr_info->dr;
fb85abff 2404 tree step = DR_STEP (dr);
2405 tree scalar_type = TREE_TYPE (DR_REF (dr));
f9ae6f95 2406 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
abc9513d 2407 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 2408 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 2409 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
994be998 2410 HOST_WIDE_INT dr_step = -1;
ee612634 2411 HOST_WIDE_INT groupsize, last_accessed_element = 1;
fb85abff 2412 bool slp_impossible = false;
2413
ee612634 2414 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2415 size of the interleaving group (including gaps). */
994be998 2416 if (tree_fits_shwi_p (step))
2417 {
2418 dr_step = tree_to_shwi (step);
0d77042c 2419 /* Check that STEP is a multiple of type size. Otherwise there is
2420 a non-element-sized gap at the end of the group which we
e1009321 2421 cannot represent in DR_GROUP_GAP or DR_GROUP_SIZE.
0d77042c 2422 ??? As we can handle non-constant step fine here we should
e1009321 2423 simply remove uses of DR_GROUP_GAP between the last and first
2424 element and instead rely on DR_STEP. DR_GROUP_SIZE then would
0d77042c 2425 simply not include that gap. */
2426 if ((dr_step % type_size) != 0)
2427 {
2428 if (dump_enabled_p ())
a4e972e3 2429 dump_printf_loc (MSG_NOTE, vect_location,
2430 "Step %T is not a multiple of the element size"
2431 " for %T\n",
2432 step, DR_REF (dr));
0d77042c 2433 return false;
2434 }
994be998 2435 groupsize = absu_hwi (dr_step) / type_size;
2436 }
2437 else
2438 groupsize = 0;
fb85abff 2439
2440 /* Not consecutive access is possible only if it is a part of interleaving. */
0219dc42 2441 if (!DR_GROUP_FIRST_ELEMENT (stmt_info))
fb85abff 2442 {
2443 /* Check if it this DR is a part of interleaving, and is a single
2444 element of the group that is accessed in the loop. */
48e1416a 2445
fb85abff 2446 /* Gaps are supported only for loads. STEP must be a multiple of the type
f5d5e8fa 2447 size. */
fb85abff 2448 if (DR_IS_READ (dr)
2449 && (dr_step % type_size) == 0
f5d5e8fa 2450 && groupsize > 0)
fb85abff 2451 {
0219dc42 2452 DR_GROUP_FIRST_ELEMENT (stmt_info) = stmt_info;
2453 DR_GROUP_SIZE (stmt_info) = groupsize;
e1009321 2454 DR_GROUP_GAP (stmt_info) = groupsize - 1;
6d8fb6cf 2455 if (dump_enabled_p ())
a4e972e3 2456 dump_printf_loc (MSG_NOTE, vect_location,
2457 "Detected single element interleaving %T"
2458 " step %T\n",
2459 DR_REF (dr), step);
a4ee7fac 2460
fb85abff 2461 return true;
2462 }
6ea6a380 2463
6d8fb6cf 2464 if (dump_enabled_p ())
a4e972e3 2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466 "not consecutive access %G", stmt_info->stmt);
6ea6a380 2467
2468 if (bb_vinfo)
0219dc42 2469 {
2470 /* Mark the statement as unvectorizable. */
abc9513d 2471 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
0219dc42 2472 return true;
2473 }
7bd765d4 2474
91f42adc 2475 if (dump_enabled_p ())
2476 dump_printf_loc (MSG_NOTE, vect_location, "using strided accesses\n");
71de77d8 2477 STMT_VINFO_STRIDED_P (stmt_info) = true;
2478 return true;
fb85abff 2479 }
2480
0219dc42 2481 if (DR_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
fb85abff 2482 {
2483 /* First stmt in the interleaving chain. Check the chain. */
cd24aa3c 2484 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
fb85abff 2485 struct data_reference *data_ref = dr;
1a0e7d51 2486 unsigned int count = 1;
fb85abff 2487 tree prev_init = DR_INIT (data_ref);
cd24aa3c 2488 stmt_vec_info prev = stmt_info;
8bbe6b75 2489 HOST_WIDE_INT diff, gaps = 0;
fb85abff 2490
c4d25d8a 2491 /* By construction, all group members have INTEGER_CST DR_INITs. */
fb85abff 2492 while (next)
2493 {
282bf14c 2494 /* Skip same data-refs. In case that two or more stmts share
2495 data-ref (supported only for loads), we vectorize only the first
2496 stmt, and the rest get their vectorized loads from the first
2497 one. */
fb85abff 2498 if (!tree_int_cst_compare (DR_INIT (data_ref),
cd24aa3c 2499 DR_INIT (STMT_VINFO_DATA_REF (next))))
fb85abff 2500 {
9ff25603 2501 if (DR_IS_WRITE (data_ref))
fb85abff 2502 {
6d8fb6cf 2503 if (dump_enabled_p ())
78bb46f5 2504 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2505 "Two store stmts share the same dr.\n");
fb85abff 2506 return false;
2507 }
2508
00ecf4da 2509 if (dump_enabled_p ())
e0599ca4 2510 dump_printf_loc (MSG_NOTE, vect_location,
00ecf4da 2511 "Two or more load stmts share the same dr.\n");
2512
cd24aa3c 2513 /* For load use the same data-ref load. */
2514 DR_GROUP_SAME_DR_STMT (next) = prev;
fb85abff 2515
cd24aa3c 2516 prev = next;
2517 next = DR_GROUP_NEXT_ELEMENT (next);
2518 continue;
fb85abff 2519 }
a4ee7fac 2520
cd24aa3c 2521 prev = next;
2522 data_ref = STMT_VINFO_DATA_REF (next);
fb85abff 2523
8bbe6b75 2524 /* All group members have the same STEP by construction. */
2525 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
fb85abff 2526
fb85abff 2527 /* Check that the distance between two accesses is equal to the type
2528 size. Otherwise, we have gaps. */
f9ae6f95 2529 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2530 - TREE_INT_CST_LOW (prev_init)) / type_size;
fb85abff 2531 if (diff != 1)
2532 {
2533 /* FORNOW: SLP of accesses with gaps is not supported. */
2534 slp_impossible = true;
9ff25603 2535 if (DR_IS_WRITE (data_ref))
fb85abff 2536 {
6d8fb6cf 2537 if (dump_enabled_p ())
78bb46f5 2538 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2539 "interleaved store with gaps\n");
fb85abff 2540 return false;
2541 }
b11576bf 2542
2543 gaps += diff - 1;
fb85abff 2544 }
2545
a4ee7fac 2546 last_accessed_element += diff;
2547
fb85abff 2548 /* Store the gap from the previous member of the group. If there is no
e1009321 2549 gap in the access, DR_GROUP_GAP is always 1. */
cd24aa3c 2550 DR_GROUP_GAP (next) = diff;
fb85abff 2551
cd24aa3c 2552 prev_init = DR_INIT (data_ref);
2553 next = DR_GROUP_NEXT_ELEMENT (next);
2554 /* Count the number of data-refs in the chain. */
2555 count++;
fb85abff 2556 }
2557
994be998 2558 if (groupsize == 0)
2559 groupsize = count + gaps;
fb85abff 2560
26aad5fc 2561 /* This could be UINT_MAX but as we are generating code in a very
2562 inefficient way we have to cap earlier. See PR78699 for example. */
2563 if (groupsize > 4096)
39e23eaa 2564 {
2565 if (dump_enabled_p ())
2566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2567 "group is too large\n");
2568 return false;
2569 }
2570
994be998 2571 /* Check that the size of the interleaving is equal to count for stores,
fb85abff 2572 i.e., that there are no gaps. */
904bd865 2573 if (groupsize != count
2574 && !DR_IS_READ (dr))
fb85abff 2575 {
05b97b35 2576 groupsize = count;
2577 STMT_VINFO_STRIDED_P (stmt_info) = true;
904bd865 2578 }
2579
2580 /* If there is a gap after the last load in the group it is the
2581 difference between the groupsize and the last accessed
2582 element.
2583 When there is no gap, this difference should be 0. */
0219dc42 2584 DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
fb85abff 2585
0219dc42 2586 DR_GROUP_SIZE (stmt_info) = groupsize;
6d8fb6cf 2587 if (dump_enabled_p ())
904bd865 2588 {
2589 dump_printf_loc (MSG_NOTE, vect_location,
39e23eaa 2590 "Detected interleaving ");
2591 if (DR_IS_READ (dr))
2592 dump_printf (MSG_NOTE, "load ");
05b97b35 2593 else if (STMT_VINFO_STRIDED_P (stmt_info))
2594 dump_printf (MSG_NOTE, "strided store ");
39e23eaa 2595 else
2596 dump_printf (MSG_NOTE, "store ");
b4d2979c 2597 dump_printf (MSG_NOTE, "of size %u\n",
2598 (unsigned)groupsize);
2599 dump_printf_loc (MSG_NOTE, vect_location, "\t%G", stmt_info->stmt);
2600 next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2601 while (next)
2602 {
2603 if (DR_GROUP_GAP (next) != 1)
2604 dump_printf_loc (MSG_NOTE, vect_location,
2605 "\t<gap of %d elements>\n",
2606 DR_GROUP_GAP (next) - 1);
2607 dump_printf_loc (MSG_NOTE, vect_location, "\t%G", next->stmt);
2608 next = DR_GROUP_NEXT_ELEMENT (next);
2609 }
0219dc42 2610 if (DR_GROUP_GAP (stmt_info) != 0)
904bd865 2611 dump_printf_loc (MSG_NOTE, vect_location,
b4d2979c 2612 "\t<gap of %d elements>\n",
0219dc42 2613 DR_GROUP_GAP (stmt_info));
904bd865 2614 }
fb85abff 2615
48e1416a 2616 /* SLP: create an SLP data structure for every interleaving group of
fb85abff 2617 stores for further analysis in vect_analyse_slp. */
9ff25603 2618 if (DR_IS_WRITE (dr) && !slp_impossible)
0219dc42 2619 {
2620 if (loop_vinfo)
2621 LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt_info);
2622 if (bb_vinfo)
2623 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
2624 }
fb85abff 2625 }
2626
2627 return true;
2628}
2629
abc9513d 2630/* Analyze groups of accesses: check that DR_INFO belongs to a group of
39e23eaa 2631 accesses of legal size, step, etc. Detect gaps, single element
2632 interleaving, and other special cases. Set grouped access info.
2633 Collect groups of strided stores for further use in SLP analysis. */
2634
2635static bool
abc9513d 2636vect_analyze_group_access (dr_vec_info *dr_info)
39e23eaa 2637{
abc9513d 2638 if (!vect_analyze_group_access_1 (dr_info))
39e23eaa 2639 {
2640 /* Dissolve the group if present. */
abc9513d 2641 stmt_vec_info stmt_info = DR_GROUP_FIRST_ELEMENT (dr_info->stmt);
cd24aa3c 2642 while (stmt_info)
39e23eaa 2643 {
cd24aa3c 2644 stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (stmt_info);
2645 DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2646 DR_GROUP_NEXT_ELEMENT (stmt_info) = NULL;
2647 stmt_info = next;
39e23eaa 2648 }
2649 return false;
2650 }
2651 return true;
2652}
fb85abff 2653
abc9513d 2654/* Analyze the access pattern of the data-reference DR_INFO.
fb85abff 2655 In case of non-consecutive accesses call vect_analyze_group_access() to
ee612634 2656 analyze groups of accesses. */
fb85abff 2657
2658static bool
abc9513d 2659vect_analyze_data_ref_access (dr_vec_info *dr_info)
fb85abff 2660{
abc9513d 2661 data_reference *dr = dr_info->dr;
fb85abff 2662 tree step = DR_STEP (dr);
2663 tree scalar_type = TREE_TYPE (DR_REF (dr));
abc9513d 2664 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 2665 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 2666 struct loop *loop = NULL;
fb85abff 2667
0bf8b382 2668 if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
2669 return true;
2670
37545e54 2671 if (loop_vinfo)
2672 loop = LOOP_VINFO_LOOP (loop_vinfo);
48e1416a 2673
37545e54 2674 if (loop_vinfo && !step)
fb85abff 2675 {
6d8fb6cf 2676 if (dump_enabled_p ())
78bb46f5 2677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2678 "bad data-ref access in loop\n");
fb85abff 2679 return false;
2680 }
2681
9b0be19c 2682 /* Allow loads with zero step in inner-loop vectorization. */
f634c3e9 2683 if (loop_vinfo && integer_zerop (step))
b04940e7 2684 {
0219dc42 2685 DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
2686 if (!nested_in_vect_loop_p (loop, stmt_info))
9b0be19c 2687 return DR_IS_READ (dr);
2688 /* Allow references with zero step for outer loops marked
2689 with pragma omp simd only - it guarantees absence of
2690 loop-carried dependencies between inner loop iterations. */
84017e0e 2691 if (loop->safelen < 2)
afa60cb4 2692 {
2693 if (dump_enabled_p ())
2694 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2695 "zero step in inner loop of nest\n");
afa60cb4 2696 return false;
2697 }
b04940e7 2698 }
fb85abff 2699
0219dc42 2700 if (loop && nested_in_vect_loop_p (loop, stmt_info))
fb85abff 2701 {
2702 /* Interleaved accesses are not yet supported within outer-loop
2703 vectorization for references in the inner-loop. */
0219dc42 2704 DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
fb85abff 2705
2706 /* For the rest of the analysis we use the outer-loop step. */
2707 step = STMT_VINFO_DR_STEP (stmt_info);
f634c3e9 2708 if (integer_zerop (step))
fb85abff 2709 {
6d8fb6cf 2710 if (dump_enabled_p ())
7bd765d4 2711 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2712 "zero step in outer loop.\n");
0bd6d857 2713 return DR_IS_READ (dr);
fb85abff 2714 }
2715 }
2716
2717 /* Consecutive? */
f634c3e9 2718 if (TREE_CODE (step) == INTEGER_CST)
fb85abff 2719 {
f9ae6f95 2720 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
f634c3e9 2721 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2722 || (dr_step < 0
2723 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2724 {
2725 /* Mark that it is not interleaving. */
0219dc42 2726 DR_GROUP_FIRST_ELEMENT (stmt_info) = NULL;
f634c3e9 2727 return true;
2728 }
fb85abff 2729 }
2730
0219dc42 2731 if (loop && nested_in_vect_loop_p (loop, stmt_info))
fb85abff 2732 {
6d8fb6cf 2733 if (dump_enabled_p ())
7bd765d4 2734 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2735 "grouped access in outer loop.\n");
fb85abff 2736 return false;
2737 }
2738
994be998 2739
f634c3e9 2740 /* Assume this is a DR handled by non-constant strided load case. */
2741 if (TREE_CODE (step) != INTEGER_CST)
e1c75243 2742 return (STMT_VINFO_STRIDED_P (stmt_info)
994be998 2743 && (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
abc9513d 2744 || vect_analyze_group_access (dr_info)));
f634c3e9 2745
fb85abff 2746 /* Not consecutive access - check if it's a part of interleaving group. */
abc9513d 2747 return vect_analyze_group_access (dr_info);
fb85abff 2748}
2749
68f15e9d 2750/* Compare two data-references DRA and DRB to group them into chunks
2751 suitable for grouping. */
2752
2753static int
2754dr_group_sort_cmp (const void *dra_, const void *drb_)
2755{
2756 data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2757 data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
68f15e9d 2758 int cmp;
2759
2760 /* Stabilize sort. */
2761 if (dra == drb)
2762 return 0;
2763
8167d6ad 2764 /* DRs in different loops never belong to the same group. */
2765 loop_p loopa = gimple_bb (DR_STMT (dra))->loop_father;
2766 loop_p loopb = gimple_bb (DR_STMT (drb))->loop_father;
2767 if (loopa != loopb)
2768 return loopa->num < loopb->num ? -1 : 1;
2769
68f15e9d 2770 /* Ordering of DRs according to base. */
ce55060f 2771 cmp = data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2772 DR_BASE_ADDRESS (drb));
2773 if (cmp != 0)
2774 return cmp;
68f15e9d 2775
2776 /* And according to DR_OFFSET. */
ce55060f 2777 cmp = data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2778 if (cmp != 0)
2779 return cmp;
68f15e9d 2780
2781 /* Put reads before writes. */
2782 if (DR_IS_READ (dra) != DR_IS_READ (drb))
2783 return DR_IS_READ (dra) ? -1 : 1;
2784
2785 /* Then sort after access size. */
ce55060f 2786 cmp = data_ref_compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2787 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2788 if (cmp != 0)
2789 return cmp;
68f15e9d 2790
2791 /* And after step. */
ce55060f 2792 cmp = data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb));
2793 if (cmp != 0)
2794 return cmp;
68f15e9d 2795
2796 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
8672ee56 2797 cmp = data_ref_compare_tree (DR_INIT (dra), DR_INIT (drb));
68f15e9d 2798 if (cmp == 0)
2799 return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2800 return cmp;
2801}
fb85abff 2802
2dd8e84c 2803/* If OP is the result of a conversion, return the unconverted value,
2804 otherwise return null. */
2805
2806static tree
2807strip_conversion (tree op)
2808{
2809 if (TREE_CODE (op) != SSA_NAME)
2810 return NULL_TREE;
2811 gimple *stmt = SSA_NAME_DEF_STMT (op);
2812 if (!is_gimple_assign (stmt)
2813 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)))
2814 return NULL_TREE;
2815 return gimple_assign_rhs1 (stmt);
2816}
2817
ecc42a77 2818/* Return true if vectorizable_* routines can handle statements STMT1_INFO
2819 and STMT2_INFO being in a single group. */
2dd8e84c 2820
2821static bool
ecc42a77 2822can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
2dd8e84c 2823{
ecc42a77 2824 if (gimple_assign_single_p (stmt1_info->stmt))
2825 return gimple_assign_single_p (stmt2_info->stmt);
2dd8e84c 2826
ecc42a77 2827 gcall *call1 = dyn_cast <gcall *> (stmt1_info->stmt);
5b4b7bcc 2828 if (call1 && gimple_call_internal_p (call1))
2dd8e84c 2829 {
2830 /* Check for two masked loads or two masked stores. */
ecc42a77 2831 gcall *call2 = dyn_cast <gcall *> (stmt2_info->stmt);
5b4b7bcc 2832 if (!call2 || !gimple_call_internal_p (call2))
2dd8e84c 2833 return false;
5b4b7bcc 2834 internal_fn ifn = gimple_call_internal_fn (call1);
2dd8e84c 2835 if (ifn != IFN_MASK_LOAD && ifn != IFN_MASK_STORE)
2836 return false;
5b4b7bcc 2837 if (ifn != gimple_call_internal_fn (call2))
2dd8e84c 2838 return false;
2839
2840 /* Check that the masks are the same. Cope with casts of masks,
2841 like those created by build_mask_conversion. */
5b4b7bcc 2842 tree mask1 = gimple_call_arg (call1, 2);
2843 tree mask2 = gimple_call_arg (call2, 2);
2dd8e84c 2844 if (!operand_equal_p (mask1, mask2, 0))
2845 {
2846 mask1 = strip_conversion (mask1);
2847 if (!mask1)
2848 return false;
2849 mask2 = strip_conversion (mask2);
2850 if (!mask2)
2851 return false;
2852 if (!operand_equal_p (mask1, mask2, 0))
2853 return false;
2854 }
2855 return true;
2856 }
2857
2858 return false;
2859}
2860
fb85abff 2861/* Function vect_analyze_data_ref_accesses.
2862
2863 Analyze the access pattern of all the data references in the loop.
2864
2865 FORNOW: the only access pattern that is considered vectorizable is a
2866 simple step 1 (consecutive) access.
2867
2868 FORNOW: handle only arrays and pointer accesses. */
2869
ed9370cc 2870opt_result
e2c5c678 2871vect_analyze_data_ref_accesses (vec_info *vinfo)
fb85abff 2872{
2873 unsigned int i;
a99aba41 2874 vec<data_reference_p> datarefs = vinfo->shared->datarefs;
fb85abff 2875 struct data_reference *dr;
2876
88f6eb8f 2877 DUMP_VECT_SCOPE ("vect_analyze_data_ref_accesses");
fb85abff 2878
68f15e9d 2879 if (datarefs.is_empty ())
ed9370cc 2880 return opt_result::success ();
68f15e9d 2881
2882 /* Sort the array of datarefs to make building the interleaving chains
863a3781 2883 linear. Don't modify the original vector's order, it is needed for
2884 determining what dependencies are reversed. */
2885 vec<data_reference_p> datarefs_copy = datarefs.copy ();
90a2d741 2886 datarefs_copy.qsort (dr_group_sort_cmp);
e0599ca4 2887 hash_set<stmt_vec_info> to_fixup;
68f15e9d 2888
2889 /* Build the interleaving chains. */
863a3781 2890 for (i = 0; i < datarefs_copy.length () - 1;)
68f15e9d 2891 {
863a3781 2892 data_reference_p dra = datarefs_copy[i];
db72d3bf 2893 dr_vec_info *dr_info_a = vinfo->lookup_dr (dra);
abc9513d 2894 stmt_vec_info stmtinfo_a = dr_info_a->stmt;
68f15e9d 2895 stmt_vec_info lastinfo = NULL;
ab053afe 2896 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
2897 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_a))
f6aeb966 2898 {
2899 ++i;
2900 continue;
2901 }
863a3781 2902 for (i = i + 1; i < datarefs_copy.length (); ++i)
68f15e9d 2903 {
863a3781 2904 data_reference_p drb = datarefs_copy[i];
db72d3bf 2905 dr_vec_info *dr_info_b = vinfo->lookup_dr (drb);
abc9513d 2906 stmt_vec_info stmtinfo_b = dr_info_b->stmt;
ab053afe 2907 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_b)
2908 || STMT_VINFO_GATHER_SCATTER_P (stmtinfo_b))
f6aeb966 2909 break;
68f15e9d 2910
2911 /* ??? Imperfect sorting (non-compatible types, non-modulo
2912 accesses, same accesses) can lead to a group to be artificially
2913 split here as we don't just skip over those. If it really
2914 matters we can push those to a worklist and re-iterate
2915 over them. The we can just skip ahead to the next DR here. */
2916
8167d6ad 2917 /* DRs in a different loop should not be put into the same
2918 interleaving group. */
2919 if (gimple_bb (DR_STMT (dra))->loop_father
2920 != gimple_bb (DR_STMT (drb))->loop_father)
2921 break;
2922
68f15e9d 2923 /* Check that the data-refs have same first location (except init)
5c0fac99 2924 and they are both either store or load (not load and store,
2925 not masked loads or stores). */
68f15e9d 2926 if (DR_IS_READ (dra) != DR_IS_READ (drb)
ce55060f 2927 || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
2928 DR_BASE_ADDRESS (drb)) != 0
2929 || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
1c2fef9a 2930 || !can_group_stmts_p (stmtinfo_a, stmtinfo_b))
68f15e9d 2931 break;
2932
994be998 2933 /* Check that the data-refs have the same constant size. */
68f15e9d 2934 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2935 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
e913b5cd 2936 if (!tree_fits_uhwi_p (sza)
2937 || !tree_fits_uhwi_p (szb)
994be998 2938 || !tree_int_cst_equal (sza, szb))
2939 break;
2940
2941 /* Check that the data-refs have the same step. */
ce55060f 2942 if (data_ref_compare_tree (DR_STEP (dra), DR_STEP (drb)) != 0)
68f15e9d 2943 break;
2944
68f15e9d 2945 /* Check the types are compatible.
2946 ??? We don't distinguish this during sorting. */
2947 if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2948 TREE_TYPE (DR_REF (drb))))
2949 break;
2950
c4d25d8a 2951 /* Check that the DR_INITs are compile-time constants. */
2952 if (TREE_CODE (DR_INIT (dra)) != INTEGER_CST
2953 || TREE_CODE (DR_INIT (drb)) != INTEGER_CST)
2954 break;
2955
68f15e9d 2956 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
f9ae6f95 2957 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2958 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
9c9cb9cf 2959 HOST_WIDE_INT init_prev
2960 = TREE_INT_CST_LOW (DR_INIT (datarefs_copy[i-1]));
2961 gcc_assert (init_a <= init_b
2962 && init_a <= init_prev
2963 && init_prev <= init_b);
2964
2965 /* Do not place the same access in the interleaving chain twice. */
2966 if (init_b == init_prev)
2967 {
2968 gcc_assert (gimple_uid (DR_STMT (datarefs_copy[i-1]))
2969 < gimple_uid (DR_STMT (drb)));
e0599ca4 2970 /* Simply link in duplicates and fix up the chain below. */
9c9cb9cf 2971 }
e0599ca4 2972 else
994be998 2973 {
e0599ca4 2974 /* If init_b == init_a + the size of the type * k, we have an
2975 interleaving, and DRA is accessed before DRB. */
2976 HOST_WIDE_INT type_size_a = tree_to_uhwi (sza);
2977 if (type_size_a == 0
2978 || (init_b - init_a) % type_size_a != 0)
994be998 2979 break;
e0599ca4 2980
2981 /* If we have a store, the accesses are adjacent. This splits
2982 groups into chunks we support (we don't support vectorization
2983 of stores with gaps). */
2984 if (!DR_IS_READ (dra) && init_b - init_prev != type_size_a)
2985 break;
2986
2987 /* If the step (if not zero or non-constant) is greater than the
2988 difference between data-refs' inits this splits groups into
2989 suitable sizes. */
2990 if (tree_fits_shwi_p (DR_STEP (dra)))
2991 {
2992 HOST_WIDE_INT step = tree_to_shwi (DR_STEP (dra));
2993 if (step != 0 && step <= (init_b - init_a))
2994 break;
2995 }
994be998 2996 }
68f15e9d 2997
2998 if (dump_enabled_p ())
a4e972e3 2999 dump_printf_loc (MSG_NOTE, vect_location,
3000 DR_IS_READ (dra)
3001 ? "Detected interleaving load %T and %T\n"
3002 : "Detected interleaving store %T and %T\n",
3003 DR_REF (dra), DR_REF (drb));
68f15e9d 3004
3005 /* Link the found element into the group list. */
e1009321 3006 if (!DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
68f15e9d 3007 {
1c2fef9a 3008 DR_GROUP_FIRST_ELEMENT (stmtinfo_a) = stmtinfo_a;
68f15e9d 3009 lastinfo = stmtinfo_a;
3010 }
1c2fef9a 3011 DR_GROUP_FIRST_ELEMENT (stmtinfo_b) = stmtinfo_a;
3012 DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
68f15e9d 3013 lastinfo = stmtinfo_b;
e0599ca4 3014
3015 if (init_b == init_prev
3016 && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
3017 && dump_enabled_p ())
3018 dump_printf_loc (MSG_NOTE, vect_location,
3019 "Queuing group with duplicate access for fixup\n");
68f15e9d 3020 }
3021 }
3022
e0599ca4 3023 /* Fixup groups with duplicate entries by splitting it. */
3024 while (1)
3025 {
3026 hash_set<stmt_vec_info>::iterator it = to_fixup.begin ();
3027 if (!(it != to_fixup.end ()))
3028 break;
3029 stmt_vec_info grp = *it;
3030 to_fixup.remove (grp);
3031
3032 /* Find the earliest duplicate group member. */
3033 unsigned first_duplicate = -1u;
3034 stmt_vec_info next, g = grp;
3035 while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3036 {
3037 if ((DR_INIT (STMT_VINFO_DR_INFO (next)->dr)
3038 == DR_INIT (STMT_VINFO_DR_INFO (g)->dr))
3039 && gimple_uid (STMT_VINFO_STMT (next)) < first_duplicate)
3040 first_duplicate = gimple_uid (STMT_VINFO_STMT (next));
3041 g = next;
3042 }
3043 if (first_duplicate == -1U)
3044 continue;
3045
3046 /* Then move all stmts after the first duplicate to a new group.
3047 Note this is a heuristic but one with the property that *it
3048 is fixed up completely. */
3049 g = grp;
bbe43331 3050 stmt_vec_info newgroup = NULL, ng = grp;
e0599ca4 3051 while ((next = DR_GROUP_NEXT_ELEMENT (g)))
3052 {
3053 if (gimple_uid (STMT_VINFO_STMT (next)) >= first_duplicate)
3054 {
3055 DR_GROUP_NEXT_ELEMENT (g) = DR_GROUP_NEXT_ELEMENT (next);
3056 if (!newgroup)
3057 newgroup = next;
3058 else
3059 DR_GROUP_NEXT_ELEMENT (ng) = next;
3060 ng = next;
3061 DR_GROUP_FIRST_ELEMENT (ng) = newgroup;
3062 }
3063 else
3064 g = DR_GROUP_NEXT_ELEMENT (g);
3065 }
3066 DR_GROUP_NEXT_ELEMENT (ng) = NULL;
3067
3068 /* Fixup the new group which still may contain duplicates. */
3069 to_fixup.add (newgroup);
3070 }
3071
863a3781 3072 FOR_EACH_VEC_ELT (datarefs_copy, i, dr)
abc9513d 3073 {
db72d3bf 3074 dr_vec_info *dr_info = vinfo->lookup_dr (dr);
abc9513d 3075 if (STMT_VINFO_VECTORIZABLE (dr_info->stmt)
3076 && !vect_analyze_data_ref_access (dr_info))
3077 {
3078 if (dump_enabled_p ())
3079 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3080 "not vectorized: complicated access pattern.\n");
6ea6a380 3081
abc9513d 3082 if (is_a <bb_vec_info> (vinfo))
3083 {
3084 /* Mark the statement as not vectorizable. */
3085 STMT_VINFO_VECTORIZABLE (dr_info->stmt) = false;
3086 continue;
3087 }
3088 else
3089 {
3090 datarefs_copy.release ();
ed9370cc 3091 return opt_result::failure_at (dr_info->stmt->stmt,
3092 "not vectorized:"
3093 " complicated access pattern.\n");
abc9513d 3094 }
3095 }
3096 }
fb85abff 3097
863a3781 3098 datarefs_copy.release ();
ed9370cc 3099 return opt_result::success ();
fb85abff 3100}
3101
8a7b0f48 3102/* Function vect_vfa_segment_size.
3103
8a7b0f48 3104 Input:
abc9513d 3105 DR_INFO: The data reference.
8a7b0f48 3106 LENGTH_FACTOR: segment length to consider.
3107
e85b4a5e 3108 Return a value suitable for the dr_with_seg_len::seg_len field.
3109 This is the "distance travelled" by the pointer from the first
3110 iteration in the segment to the last. Note that it does not include
3111 the size of the access; in effect it only describes the first byte. */
8a7b0f48 3112
3113static tree
abc9513d 3114vect_vfa_segment_size (dr_vec_info *dr_info, tree length_factor)
8a7b0f48 3115{
e85b4a5e 3116 length_factor = size_binop (MINUS_EXPR,
3117 fold_convert (sizetype, length_factor),
3118 size_one_node);
abc9513d 3119 return size_binop (MULT_EXPR, fold_convert (sizetype, DR_STEP (dr_info->dr)),
e85b4a5e 3120 length_factor);
3121}
8a7b0f48 3122
abc9513d 3123/* Return a value that, when added to abs (vect_vfa_segment_size (DR_INFO)),
e85b4a5e 3124 gives the worst-case number of bytes covered by the segment. */
8a7b0f48 3125
e85b4a5e 3126static unsigned HOST_WIDE_INT
abc9513d 3127vect_vfa_access_size (dr_vec_info *dr_info)
e85b4a5e 3128{
abc9513d 3129 stmt_vec_info stmt_vinfo = dr_info->stmt;
3130 tree ref_type = TREE_TYPE (DR_REF (dr_info->dr));
e85b4a5e 3131 unsigned HOST_WIDE_INT ref_size = tree_to_uhwi (TYPE_SIZE_UNIT (ref_type));
3132 unsigned HOST_WIDE_INT access_size = ref_size;
e1009321 3133 if (DR_GROUP_FIRST_ELEMENT (stmt_vinfo))
8a7b0f48 3134 {
abc9513d 3135 gcc_assert (DR_GROUP_FIRST_ELEMENT (stmt_vinfo) == stmt_vinfo);
e1009321 3136 access_size *= DR_GROUP_SIZE (stmt_vinfo) - DR_GROUP_GAP (stmt_vinfo);
e85b4a5e 3137 }
3138 if (STMT_VINFO_VEC_STMT (stmt_vinfo)
abc9513d 3139 && (vect_supportable_dr_alignment (dr_info, false)
e85b4a5e 3140 == dr_explicit_realign_optimized))
3141 {
3142 /* We might access a full vector's worth. */
3143 tree vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3144 access_size += tree_to_uhwi (TYPE_SIZE_UNIT (vectype)) - ref_size;
8a7b0f48 3145 }
e85b4a5e 3146 return access_size;
3147}
3148
abc9513d 3149/* Get the minimum alignment for all the scalar accesses that DR_INFO
3150 describes. */
e85b4a5e 3151
3152static unsigned int
abc9513d 3153vect_vfa_align (dr_vec_info *dr_info)
e85b4a5e 3154{
abc9513d 3155 return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
8a7b0f48 3156}
3157
a5af7a75 3158/* Function vect_no_alias_p.
3159
63bc418d 3160 Given data references A and B with equal base and offset, see whether
3161 the alias relation can be decided at compilation time. Return 1 if
3162 it can and the references alias, 0 if it can and the references do
e85b4a5e 3163 not alias, and -1 if we cannot decide at compile time. SEGMENT_LENGTH_A,
3164 SEGMENT_LENGTH_B, ACCESS_SIZE_A and ACCESS_SIZE_B are the equivalent
3165 of dr_with_seg_len::{seg_len,access_size} for A and B. */
a5af7a75 3166
63bc418d 3167static int
abc9513d 3168vect_compile_time_alias (dr_vec_info *a, dr_vec_info *b,
e85b4a5e 3169 tree segment_length_a, tree segment_length_b,
3170 unsigned HOST_WIDE_INT access_size_a,
3171 unsigned HOST_WIDE_INT access_size_b)
a5af7a75 3172{
abc9513d 3173 poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a->dr));
3174 poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b->dr));
63bc418d 3175 poly_uint64 const_length_a;
3176 poly_uint64 const_length_b;
a5af7a75 3177
a5af7a75 3178 /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
3179 bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
3180 [a, a+12) */
abc9513d 3181 if (tree_int_cst_compare (DR_STEP (a->dr), size_zero_node) < 0)
a5af7a75 3182 {
63bc418d 3183 const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
e85b4a5e 3184 offset_a = (offset_a + access_size_a) - const_length_a;
a5af7a75 3185 }
63bc418d 3186 else
3187 const_length_a = tree_to_poly_uint64 (segment_length_a);
abc9513d 3188 if (tree_int_cst_compare (DR_STEP (b->dr), size_zero_node) < 0)
a5af7a75 3189 {
63bc418d 3190 const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
e85b4a5e 3191 offset_b = (offset_b + access_size_b) - const_length_b;
a5af7a75 3192 }
63bc418d 3193 else
3194 const_length_b = tree_to_poly_uint64 (segment_length_b);
a5af7a75 3195
e85b4a5e 3196 const_length_a += access_size_a;
3197 const_length_b += access_size_b;
3198
63bc418d 3199 if (ranges_known_overlap_p (offset_a, const_length_a,
3200 offset_b, const_length_b))
3201 return 1;
a5af7a75 3202
63bc418d 3203 if (!ranges_maybe_overlap_p (offset_a, const_length_a,
3204 offset_b, const_length_b))
3205 return 0;
3206
3207 return -1;
a5af7a75 3208}
3209
403965f7 3210/* Return true if the minimum nonzero dependence distance for loop LOOP_DEPTH
3211 in DDR is >= VF. */
3212
3213static bool
3214dependence_distance_ge_vf (data_dependence_relation *ddr,
d75596cd 3215 unsigned int loop_depth, poly_uint64 vf)
403965f7 3216{
3217 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE
3218 || DDR_NUM_DIST_VECTS (ddr) == 0)
3219 return false;
3220
3221 /* If the dependence is exact, we should have limited the VF instead. */
3222 gcc_checking_assert (DDR_COULD_BE_INDEPENDENT_P (ddr));
3223
3224 unsigned int i;
3225 lambda_vector dist_v;
3226 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
3227 {
3228 HOST_WIDE_INT dist = dist_v[loop_depth];
3229 if (dist != 0
3230 && !(dist > 0 && DDR_REVERSED_P (ddr))
d75596cd 3231 && maybe_lt ((unsigned HOST_WIDE_INT) abs_hwi (dist), vf))
403965f7 3232 return false;
3233 }
3234
3235 if (dump_enabled_p ())
a4e972e3 3236 dump_printf_loc (MSG_NOTE, vect_location,
3237 "dependence distance between %T and %T is >= VF\n",
3238 DR_REF (DDR_A (ddr)), DR_REF (DDR_B (ddr)));
403965f7 3239
3240 return true;
3241}
3242
e85b4a5e 3243/* Dump LOWER_BOUND using flags DUMP_KIND. Dumps are known to be enabled. */
3244
3245static void
54e7de93 3246dump_lower_bound (dump_flags_t dump_kind, const vec_lower_bound &lower_bound)
e85b4a5e 3247{
a4e972e3 3248 dump_printf (dump_kind, "%s (%T) >= ",
3249 lower_bound.unsigned_p ? "unsigned" : "abs",
3250 lower_bound.expr);
e85b4a5e 3251 dump_dec (dump_kind, lower_bound.min_value);
3252}
3253
3254/* Record that the vectorized loop requires the vec_lower_bound described
3255 by EXPR, UNSIGNED_P and MIN_VALUE. */
3256
3257static void
3258vect_check_lower_bound (loop_vec_info loop_vinfo, tree expr, bool unsigned_p,
3259 poly_uint64 min_value)
3260{
3261 vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
3262 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
3263 if (operand_equal_p (lower_bounds[i].expr, expr, 0))
3264 {
3265 unsigned_p &= lower_bounds[i].unsigned_p;
3266 min_value = upper_bound (lower_bounds[i].min_value, min_value);
3267 if (lower_bounds[i].unsigned_p != unsigned_p
3268 || maybe_lt (lower_bounds[i].min_value, min_value))
3269 {
3270 lower_bounds[i].unsigned_p = unsigned_p;
3271 lower_bounds[i].min_value = min_value;
3272 if (dump_enabled_p ())
3273 {
3274 dump_printf_loc (MSG_NOTE, vect_location,
3275 "updating run-time check to ");
3276 dump_lower_bound (MSG_NOTE, lower_bounds[i]);
3277 dump_printf (MSG_NOTE, "\n");
3278 }
3279 }
3280 return;
3281 }
3282
3283 vec_lower_bound lower_bound (expr, unsigned_p, min_value);
3284 if (dump_enabled_p ())
3285 {
3286 dump_printf_loc (MSG_NOTE, vect_location, "need a run-time check that ");
3287 dump_lower_bound (MSG_NOTE, lower_bound);
3288 dump_printf (MSG_NOTE, "\n");
3289 }
3290 LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).safe_push (lower_bound);
3291}
3292
abc9513d 3293/* Return true if it's unlikely that the step of the vectorized form of DR_INFO
e85b4a5e 3294 will span fewer than GAP bytes. */
3295
3296static bool
abc9513d 3297vect_small_gap_p (loop_vec_info loop_vinfo, dr_vec_info *dr_info,
3298 poly_int64 gap)
e85b4a5e 3299{
abc9513d 3300 stmt_vec_info stmt_info = dr_info->stmt;
e85b4a5e 3301 HOST_WIDE_INT count
3302 = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
e1009321 3303 if (DR_GROUP_FIRST_ELEMENT (stmt_info))
cd24aa3c 3304 count *= DR_GROUP_SIZE (DR_GROUP_FIRST_ELEMENT (stmt_info));
abc9513d 3305 return (estimated_poly_value (gap)
3306 <= count * vect_get_scalar_dr_size (dr_info));
e85b4a5e 3307}
3308
abc9513d 3309/* Return true if we know that there is no alias between DR_INFO_A and
3310 DR_INFO_B when abs (DR_STEP (DR_INFO_A->dr)) >= N for some N.
3311 When returning true, set *LOWER_BOUND_OUT to this N. */
e85b4a5e 3312
3313static bool
abc9513d 3314vectorizable_with_step_bound_p (dr_vec_info *dr_info_a, dr_vec_info *dr_info_b,
e85b4a5e 3315 poly_uint64 *lower_bound_out)
3316{
3317 /* Check that there is a constant gap of known sign between DR_A
3318 and DR_B. */
abc9513d 3319 data_reference *dr_a = dr_info_a->dr;
3320 data_reference *dr_b = dr_info_b->dr;
e85b4a5e 3321 poly_int64 init_a, init_b;
3322 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
3323 || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
3324 || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
3325 || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
3326 || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
3327 || !ordered_p (init_a, init_b))
3328 return false;
3329
3330 /* Sort DR_A and DR_B by the address they access. */
3331 if (maybe_lt (init_b, init_a))
3332 {
3333 std::swap (init_a, init_b);
abc9513d 3334 std::swap (dr_info_a, dr_info_b);
e85b4a5e 3335 std::swap (dr_a, dr_b);
3336 }
3337
3338 /* If the two accesses could be dependent within a scalar iteration,
3339 make sure that we'd retain their order. */
abc9513d 3340 if (maybe_gt (init_a + vect_get_scalar_dr_size (dr_info_a), init_b)
3341 && !vect_preserves_scalar_order_p (dr_info_a, dr_info_b))
e85b4a5e 3342 return false;
3343
3344 /* There is no alias if abs (DR_STEP) is greater than or equal to
3345 the bytes spanned by the combination of the two accesses. */
abc9513d 3346 *lower_bound_out = init_b + vect_get_scalar_dr_size (dr_info_b) - init_a;
e85b4a5e 3347 return true;
3348}
3349
fb85abff 3350/* Function vect_prune_runtime_alias_test_list.
3351
3352 Prune a list of ddrs to be tested at run-time by versioning for alias.
8a7b0f48 3353 Merge several alias checks into one if possible.
fb85abff 3354 Return FALSE if resulting list of ddrs is longer then allowed by
3355 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
3356
ed9370cc 3357opt_result
fb85abff 3358vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
3359{
f68a7726 3360 typedef pair_hash <tree_operand_hash, tree_operand_hash> tree_pair_hash;
3361 hash_set <tree_pair_hash> compared_objects;
3362
3363 vec<ddr_p> may_alias_ddrs = LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
3364 vec<dr_with_seg_len_pair_t> &comp_alias_ddrs
3365 = LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
3366 vec<vec_object_pair> &check_unequal_addrs
3367 = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
d75596cd 3368 poly_uint64 vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8a7b0f48 3369 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
3370
3371 ddr_p ddr;
3372 unsigned int i;
3373 tree length_factor;
fb85abff 3374
88f6eb8f 3375 DUMP_VECT_SCOPE ("vect_prune_runtime_alias_test_list");
fb85abff 3376
e85b4a5e 3377 /* Step values are irrelevant for aliasing if the number of vector
3378 iterations is equal to the number of scalar iterations (which can
3379 happen for fully-SLP loops). */
3380 bool ignore_step_p = known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), 1U);
3381
3382 if (!ignore_step_p)
3383 {
3384 /* Convert the checks for nonzero steps into bound tests. */
3385 tree value;
3386 FOR_EACH_VEC_ELT (LOOP_VINFO_CHECK_NONZERO (loop_vinfo), i, value)
3387 vect_check_lower_bound (loop_vinfo, value, true, 1);
3388 }
3389
8a7b0f48 3390 if (may_alias_ddrs.is_empty ())
ed9370cc 3391 return opt_result::success ();
8a7b0f48 3392
8a7b0f48 3393 comp_alias_ddrs.create (may_alias_ddrs.length ());
3394
403965f7 3395 unsigned int loop_depth
3396 = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
3397 LOOP_VINFO_LOOP_NEST (loop_vinfo));
3398
8a7b0f48 3399 /* First, we collect all data ref pairs for aliasing checks. */
3400 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
fb85abff 3401 {
c1e75671 3402 int comp_res;
e85b4a5e 3403 poly_uint64 lower_bound;
8a7b0f48 3404 tree segment_length_a, segment_length_b;
e85b4a5e 3405 unsigned HOST_WIDE_INT access_size_a, access_size_b;
3406 unsigned int align_a, align_b;
8a7b0f48 3407
403965f7 3408 /* Ignore the alias if the VF we chose ended up being no greater
3409 than the dependence distance. */
3410 if (dependence_distance_ge_vf (ddr, loop_depth, vect_factor))
3411 continue;
3412
f68a7726 3413 if (DDR_OBJECT_A (ddr))
3414 {
3415 vec_object_pair new_pair (DDR_OBJECT_A (ddr), DDR_OBJECT_B (ddr));
3416 if (!compared_objects.add (new_pair))
3417 {
3418 if (dump_enabled_p ())
a4e972e3 3419 dump_printf_loc (MSG_NOTE, vect_location,
3420 "checking that %T and %T"
3421 " have different addresses\n",
3422 new_pair.first, new_pair.second);
f68a7726 3423 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).safe_push (new_pair);
3424 }
3425 continue;
3426 }
3427
db72d3bf 3428 dr_vec_info *dr_info_a = loop_vinfo->lookup_dr (DDR_A (ddr));
abc9513d 3429 stmt_vec_info stmt_info_a = dr_info_a->stmt;
e85b4a5e 3430
db72d3bf 3431 dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
abc9513d 3432 stmt_vec_info stmt_info_b = dr_info_b->stmt;
e85b4a5e 3433
3434 /* Skip the pair if inter-iteration dependencies are irrelevant
3435 and intra-iteration dependencies are guaranteed to be honored. */
3436 if (ignore_step_p
abc9513d 3437 && (vect_preserves_scalar_order_p (dr_info_a, dr_info_b)
3438 || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3439 &lower_bound)))
e85b4a5e 3440 {
3441 if (dump_enabled_p ())
a4e972e3 3442 dump_printf_loc (MSG_NOTE, vect_location,
3443 "no need for alias check between "
3444 "%T and %T when VF is 1\n",
3445 DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
e85b4a5e 3446 continue;
3447 }
3448
3449 /* See whether we can handle the alias using a bounds check on
3450 the step, and whether that's likely to be the best approach.
3451 (It might not be, for example, if the minimum step is much larger
3452 than the number of bytes handled by one vector iteration.) */
3453 if (!ignore_step_p
abc9513d 3454 && TREE_CODE (DR_STEP (dr_info_a->dr)) != INTEGER_CST
3455 && vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
3456 &lower_bound)
3457 && (vect_small_gap_p (loop_vinfo, dr_info_a, lower_bound)
3458 || vect_small_gap_p (loop_vinfo, dr_info_b, lower_bound)))
e85b4a5e 3459 {
abc9513d 3460 bool unsigned_p = dr_known_forward_stride_p (dr_info_a->dr);
e85b4a5e 3461 if (dump_enabled_p ())
3462 {
a4e972e3 3463 dump_printf_loc (MSG_NOTE, vect_location, "no alias between "
3464 "%T and %T when the step %T is outside ",
3465 DR_REF (dr_info_a->dr),
3466 DR_REF (dr_info_b->dr),
3467 DR_STEP (dr_info_a->dr));
e85b4a5e 3468 if (unsigned_p)
3469 dump_printf (MSG_NOTE, "[0");
3470 else
3471 {
3472 dump_printf (MSG_NOTE, "(");
3473 dump_dec (MSG_NOTE, poly_int64 (-lower_bound));
3474 }
3475 dump_printf (MSG_NOTE, ", ");
3476 dump_dec (MSG_NOTE, lower_bound);
3477 dump_printf (MSG_NOTE, ")\n");
3478 }
abc9513d 3479 vect_check_lower_bound (loop_vinfo, DR_STEP (dr_info_a->dr),
3480 unsigned_p, lower_bound);
e85b4a5e 3481 continue;
3482 }
3483
cd24aa3c 3484 stmt_vec_info dr_group_first_a = DR_GROUP_FIRST_ELEMENT (stmt_info_a);
8a7b0f48 3485 if (dr_group_first_a)
3486 {
cd24aa3c 3487 stmt_info_a = dr_group_first_a;
abc9513d 3488 dr_info_a = STMT_VINFO_DR_INFO (stmt_info_a);
8a7b0f48 3489 }
fb85abff 3490
cd24aa3c 3491 stmt_vec_info dr_group_first_b = DR_GROUP_FIRST_ELEMENT (stmt_info_b);
8a7b0f48 3492 if (dr_group_first_b)
3493 {
cd24aa3c 3494 stmt_info_b = dr_group_first_b;
abc9513d 3495 dr_info_b = STMT_VINFO_DR_INFO (stmt_info_b);
8a7b0f48 3496 }
fb85abff 3497
e85b4a5e 3498 if (ignore_step_p)
3499 {
3500 segment_length_a = size_zero_node;
3501 segment_length_b = size_zero_node;
3502 }
8a7b0f48 3503 else
e85b4a5e 3504 {
abc9513d 3505 if (!operand_equal_p (DR_STEP (dr_info_a->dr),
3506 DR_STEP (dr_info_b->dr), 0))
e85b4a5e 3507 length_factor = scalar_loop_iters;
3508 else
3509 length_factor = size_int (vect_factor);
abc9513d 3510 segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
3511 segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
e85b4a5e 3512 }
abc9513d 3513 access_size_a = vect_vfa_access_size (dr_info_a);
3514 access_size_b = vect_vfa_access_size (dr_info_b);
3515 align_a = vect_vfa_align (dr_info_a);
3516 align_b = vect_vfa_align (dr_info_b);
8a7b0f48 3517
abc9513d 3518 comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_info_a->dr),
3519 DR_BASE_ADDRESS (dr_info_b->dr));
a5af7a75 3520 if (comp_res == 0)
abc9513d 3521 comp_res = data_ref_compare_tree (DR_OFFSET (dr_info_a->dr),
3522 DR_OFFSET (dr_info_b->dr));
a5af7a75 3523
63bc418d 3524 /* See whether the alias is known at compilation time. */
a5af7a75 3525 if (comp_res == 0
abc9513d 3526 && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
3527 && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
63bc418d 3528 && poly_int_tree_p (segment_length_a)
3529 && poly_int_tree_p (segment_length_b))
a5af7a75 3530 {
abc9513d 3531 int res = vect_compile_time_alias (dr_info_a, dr_info_b,
63bc418d 3532 segment_length_a,
e85b4a5e 3533 segment_length_b,
3534 access_size_a,
3535 access_size_b);
3536 if (res >= 0 && dump_enabled_p ())
3537 {
3538 dump_printf_loc (MSG_NOTE, vect_location,
a4e972e3 3539 "can tell at compile time that %T and %T",
3540 DR_REF (dr_info_a->dr), DR_REF (dr_info_b->dr));
e85b4a5e 3541 if (res == 0)
3542 dump_printf (MSG_NOTE, " do not alias\n");
3543 else
3544 dump_printf (MSG_NOTE, " alias\n");
3545 }
3546
63bc418d 3547 if (res == 0)
a5af7a75 3548 continue;
3549
63bc418d 3550 if (res == 1)
ed9370cc 3551 return opt_result::failure_at (stmt_info_b->stmt,
3552 "not vectorized:"
3553 " compilation time alias: %G%G",
3554 stmt_info_a->stmt,
3555 stmt_info_b->stmt);
a5af7a75 3556 }
3557
43d14b66 3558 dr_with_seg_len_pair_t dr_with_seg_len_pair
abc9513d 3559 (dr_with_seg_len (dr_info_a->dr, segment_length_a,
3560 access_size_a, align_a),
3561 dr_with_seg_len (dr_info_b->dr, segment_length_b,
3562 access_size_b, align_b));
43d14b66 3563
c1e75671 3564 /* Canonicalize pairs by sorting the two DR members. */
a5af7a75 3565 if (comp_res > 0)
3d4d7ad1 3566 std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
8a7b0f48 3567
3568 comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
3569 }
3570
d75596cd 3571 prune_runtime_alias_test_list (&comp_alias_ddrs, vect_factor);
f68a7726 3572
3573 unsigned int count = (comp_alias_ddrs.length ()
3574 + check_unequal_addrs.length ());
e85b4a5e 3575
91f42adc 3576 if (dump_enabled_p ())
3577 dump_printf_loc (MSG_NOTE, vect_location,
3578 "improved number of alias checks from %d to %d\n",
3579 may_alias_ddrs.length (), count);
f68a7726 3580 if ((int) count > PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
ed9370cc 3581 return opt_result::failure_at
3582 (vect_location,
3583 "number of versioning for alias "
3584 "run-time tests exceeds %d "
3585 "(--param vect-max-version-for-alias-checks)\n",
3586 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS));
3587
3588 return opt_result::success ();
fb85abff 3589}
3590
1619606c 3591/* Check whether we can use an internal function for a gather load
3592 or scatter store. READ_P is true for loads and false for stores.
3593 MASKED_P is true if the load or store is conditional. MEMORY_TYPE is
3594 the type of the memory elements being loaded or stored. OFFSET_BITS
3595 is the number of bits in each scalar offset and OFFSET_SIGN is the
3596 sign of the offset. SCALE is the amount by which the offset should
3597 be multiplied *after* it has been converted to address width.
3598
3599 Return true if the function is supported, storing the function
3600 id in *IFN_OUT and the type of a vector element in *ELEMENT_TYPE_OUT. */
3601
1d2c127d 3602bool
1619606c 3603vect_gather_scatter_fn_p (bool read_p, bool masked_p, tree vectype,
3604 tree memory_type, unsigned int offset_bits,
3605 signop offset_sign, int scale,
3606 internal_fn *ifn_out, tree *element_type_out)
3607{
3608 unsigned int memory_bits = tree_to_uhwi (TYPE_SIZE (memory_type));
3609 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype)));
3610 if (offset_bits > element_bits)
3611 /* Internal functions require the offset to be the same width as
3612 the vector elements. We can extend narrower offsets, but it isn't
3613 safe to truncate wider offsets. */
3614 return false;
3615
3616 if (element_bits != memory_bits)
3617 /* For now the vector elements must be the same width as the
3618 memory elements. */
3619 return false;
3620
3621 /* Work out which function we need. */
3622 internal_fn ifn;
3623 if (read_p)
3624 ifn = masked_p ? IFN_MASK_GATHER_LOAD : IFN_GATHER_LOAD;
3625 else
0bf8b382 3626 ifn = masked_p ? IFN_MASK_SCATTER_STORE : IFN_SCATTER_STORE;
1619606c 3627
3628 /* Test whether the target supports this combination. */
3629 if (!internal_gather_scatter_fn_supported_p (ifn, vectype, memory_type,
3630 offset_sign, scale))
3631 return false;
3632
3633 *ifn_out = ifn;
3634 *element_type_out = TREE_TYPE (vectype);
3635 return true;
3636}
3637
e068828a 3638/* STMT_INFO is a call to an internal gather load or scatter store function.
1619606c 3639 Describe the operation in INFO. */
3640
3641static void
e068828a 3642vect_describe_gather_scatter_call (stmt_vec_info stmt_info,
3643 gather_scatter_info *info)
1619606c 3644{
e068828a 3645 gcall *call = as_a <gcall *> (stmt_info->stmt);
1619606c 3646 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3647 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3648
3649 info->ifn = gimple_call_internal_fn (call);
3650 info->decl = NULL_TREE;
3651 info->base = gimple_call_arg (call, 0);
3652 info->offset = gimple_call_arg (call, 1);
3653 info->offset_dt = vect_unknown_def_type;
3654 info->offset_vectype = NULL_TREE;
3655 info->scale = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
3656 info->element_type = TREE_TYPE (vectype);
3657 info->memory_type = TREE_TYPE (DR_REF (dr));
3658}
3659
ecc42a77 3660/* Return true if a non-affine read or write in STMT_INFO is suitable for a
cf60da07 3661 gather load or scatter store. Describe the operation in *INFO if so. */
16dfb112 3662
cf60da07 3663bool
ecc42a77 3664vect_check_gather_scatter (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
cf60da07 3665 gather_scatter_info *info)
16dfb112 3666{
81bc0f0f 3667 HOST_WIDE_INT scale = 1;
3668 poly_int64 pbitpos, pbitsize;
16dfb112 3669 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
16dfb112 3670 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
3671 tree offtype = NULL_TREE;
1619606c 3672 tree decl = NULL_TREE, base, off;
3673 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3674 tree memory_type = TREE_TYPE (DR_REF (dr));
3754d046 3675 machine_mode pmode;
292237f3 3676 int punsignedp, reversep, pvolatilep = 0;
1619606c 3677 internal_fn ifn;
3678 tree element_type;
3679 bool masked_p = false;
3680
3681 /* See whether this is already a call to a gather/scatter internal function.
3682 If not, see whether it's a masked load or store. */
a73182ff 3683 gcall *call = dyn_cast <gcall *> (stmt_info->stmt);
1619606c 3684 if (call && gimple_call_internal_p (call))
3685 {
5b4b7bcc 3686 ifn = gimple_call_internal_fn (call);
1619606c 3687 if (internal_gather_scatter_fn_p (ifn))
3688 {
e068828a 3689 vect_describe_gather_scatter_call (stmt_info, info);
1619606c 3690 return true;
3691 }
3692 masked_p = (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE);
3693 }
3694
3695 /* True if we should aim to use internal functions rather than
3696 built-in functions. */
3697 bool use_ifn_p = (DR_IS_READ (dr)
0bf8b382 3698 ? supports_vec_gather_load_p ()
3699 : supports_vec_scatter_store_p ());
16dfb112 3700
c71d3c24 3701 base = DR_REF (dr);
3702 /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
3703 see if we can use the def stmt of the address. */
1619606c 3704 if (masked_p
c71d3c24 3705 && TREE_CODE (base) == MEM_REF
3706 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME
3707 && integer_zerop (TREE_OPERAND (base, 1))
3708 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (base, 0)))
3709 {
42acab1c 3710 gimple *def_stmt = SSA_NAME_DEF_STMT (TREE_OPERAND (base, 0));
c71d3c24 3711 if (is_gimple_assign (def_stmt)
3712 && gimple_assign_rhs_code (def_stmt) == ADDR_EXPR)
3713 base = TREE_OPERAND (gimple_assign_rhs1 (def_stmt), 0);
3714 }
3715
0bd6d857 3716 /* The gather and scatter builtins need address of the form
16dfb112 3717 loop_invariant + vector * {1, 2, 4, 8}
3718 or
3719 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
3720 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
3721 of loop invariants/SSA_NAMEs defined in the loop, with casts,
3722 multiplications and additions in it. To get a vector, we need
3723 a single SSA_NAME that will be defined in the loop and will
3724 contain everything that is not loop invariant and that can be
3725 vectorized. The following code attempts to find such a preexistng
3726 SSA_NAME OFF and put the loop invariants into a tree BASE
3727 that can be gimplified before the loop. */
292237f3 3728 base = get_inner_reference (base, &pbitsize, &pbitpos, &off, &pmode,
b3b6e4b5 3729 &punsignedp, &reversep, &pvolatilep);
8a51585f 3730 if (reversep)
3731 return false;
3732
81bc0f0f 3733 poly_int64 pbytepos = exact_div (pbitpos, BITS_PER_UNIT);
16dfb112 3734
3735 if (TREE_CODE (base) == MEM_REF)
3736 {
3737 if (!integer_zerop (TREE_OPERAND (base, 1)))
3738 {
3739 if (off == NULL_TREE)
90ca1268 3740 off = wide_int_to_tree (sizetype, mem_ref_offset (base));
16dfb112 3741 else
3742 off = size_binop (PLUS_EXPR, off,
3743 fold_convert (sizetype, TREE_OPERAND (base, 1)));
3744 }
3745 base = TREE_OPERAND (base, 0);
3746 }
3747 else
3748 base = build_fold_addr_expr (base);
3749
3750 if (off == NULL_TREE)
3751 off = size_zero_node;
3752
3753 /* If base is not loop invariant, either off is 0, then we start with just
3754 the constant offset in the loop invariant BASE and continue with base
3755 as OFF, otherwise give up.
3756 We could handle that case by gimplifying the addition of base + off
3757 into some SSA_NAME and use that as off, but for now punt. */
3758 if (!expr_invariant_in_loop_p (loop, base))
3759 {
3760 if (!integer_zerop (off))
cf60da07 3761 return false;
16dfb112 3762 off = base;
81bc0f0f 3763 base = size_int (pbytepos);
16dfb112 3764 }
3765 /* Otherwise put base + constant offset into the loop invariant BASE
3766 and continue with OFF. */
3767 else
3768 {
3769 base = fold_convert (sizetype, base);
81bc0f0f 3770 base = size_binop (PLUS_EXPR, base, size_int (pbytepos));
16dfb112 3771 }
3772
3773 /* OFF at this point may be either a SSA_NAME or some tree expression
3774 from get_inner_reference. Try to peel off loop invariants from it
3775 into BASE as long as possible. */
3776 STRIP_NOPS (off);
3777 while (offtype == NULL_TREE)
3778 {
3779 enum tree_code code;
3780 tree op0, op1, add = NULL_TREE;
3781
3782 if (TREE_CODE (off) == SSA_NAME)
3783 {
42acab1c 3784 gimple *def_stmt = SSA_NAME_DEF_STMT (off);
16dfb112 3785
3786 if (expr_invariant_in_loop_p (loop, off))
cf60da07 3787 return false;
16dfb112 3788
3789 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3790 break;
3791
3792 op0 = gimple_assign_rhs1 (def_stmt);
3793 code = gimple_assign_rhs_code (def_stmt);
3794 op1 = gimple_assign_rhs2 (def_stmt);
3795 }
3796 else
3797 {
3798 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
cf60da07 3799 return false;
16dfb112 3800 code = TREE_CODE (off);
3801 extract_ops_from_tree (off, &code, &op0, &op1);
3802 }
3803 switch (code)
3804 {
3805 case POINTER_PLUS_EXPR:
3806 case PLUS_EXPR:
3807 if (expr_invariant_in_loop_p (loop, op0))
3808 {
3809 add = op0;
3810 off = op1;
3811 do_add:
3812 add = fold_convert (sizetype, add);
3813 if (scale != 1)
3814 add = size_binop (MULT_EXPR, add, size_int (scale));
3815 base = size_binop (PLUS_EXPR, base, add);
3816 continue;
3817 }
3818 if (expr_invariant_in_loop_p (loop, op1))
3819 {
3820 add = op1;
3821 off = op0;
3822 goto do_add;
3823 }
3824 break;
3825 case MINUS_EXPR:
3826 if (expr_invariant_in_loop_p (loop, op1))
3827 {
3828 add = fold_convert (sizetype, op1);
3829 add = size_binop (MINUS_EXPR, size_zero_node, add);
3830 off = op0;
3831 goto do_add;
3832 }
3833 break;
3834 case MULT_EXPR:
e913b5cd 3835 if (scale == 1 && tree_fits_shwi_p (op1))
16dfb112 3836 {
1619606c 3837 int new_scale = tree_to_shwi (op1);
3838 /* Only treat this as a scaling operation if the target
3839 supports it. */
3840 if (use_ifn_p
3841 && !vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p,
3842 vectype, memory_type, 1,
3843 TYPE_SIGN (TREE_TYPE (op0)),
3844 new_scale, &ifn,
3845 &element_type))
3846 break;
3847 scale = new_scale;
16dfb112 3848 off = op0;
3849 continue;
3850 }
3851 break;
3852 case SSA_NAME:
3853 off = op0;
3854 continue;
3855 CASE_CONVERT:
3856 if (!POINTER_TYPE_P (TREE_TYPE (op0))
3857 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3858 break;
3859 if (TYPE_PRECISION (TREE_TYPE (op0))
3860 == TYPE_PRECISION (TREE_TYPE (off)))
3861 {
3862 off = op0;
3863 continue;
3864 }
1619606c 3865
3866 /* The internal functions need the offset to be the same width
3867 as the elements of VECTYPE. Don't include operations that
3868 cast the offset from that width to a different width. */
3869 if (use_ifn_p
3870 && (int_size_in_bytes (TREE_TYPE (vectype))
3871 == int_size_in_bytes (TREE_TYPE (off))))
3872 break;
3873
16dfb112 3874 if (TYPE_PRECISION (TREE_TYPE (op0))
3875 < TYPE_PRECISION (TREE_TYPE (off)))
3876 {
3877 off = op0;
3878 offtype = TREE_TYPE (off);
3879 STRIP_NOPS (off);
3880 continue;
3881 }
3882 break;
3883 default:
3884 break;
3885 }
3886 break;
3887 }
3888
3889 /* If at the end OFF still isn't a SSA_NAME or isn't
3890 defined in the loop, punt. */
3891 if (TREE_CODE (off) != SSA_NAME
3892 || expr_invariant_in_loop_p (loop, off))
cf60da07 3893 return false;
16dfb112 3894
3895 if (offtype == NULL_TREE)
3896 offtype = TREE_TYPE (off);
3897
1619606c 3898 if (use_ifn_p)
3899 {
3900 if (!vect_gather_scatter_fn_p (DR_IS_READ (dr), masked_p, vectype,
3901 memory_type, TYPE_PRECISION (offtype),
3902 TYPE_SIGN (offtype), scale, &ifn,
3903 &element_type))
3904 return false;
3905 }
0bd6d857 3906 else
1619606c 3907 {
3908 if (DR_IS_READ (dr))
1f9a3b5c 3909 {
3910 if (targetm.vectorize.builtin_gather)
3911 decl = targetm.vectorize.builtin_gather (vectype, offtype, scale);
3912 }
1619606c 3913 else
1f9a3b5c 3914 {
3915 if (targetm.vectorize.builtin_scatter)
3916 decl = targetm.vectorize.builtin_scatter (vectype, offtype, scale);
3917 }
0bd6d857 3918
1619606c 3919 if (!decl)
3920 return false;
3921
3922 ifn = IFN_LAST;
3923 element_type = TREE_TYPE (vectype);
3924 }
cf60da07 3925
1619606c 3926 info->ifn = ifn;
cf60da07 3927 info->decl = decl;
3928 info->base = base;
3929 info->offset = off;
3930 info->offset_dt = vect_unknown_def_type;
3931 info->offset_vectype = NULL_TREE;
3932 info->scale = scale;
1619606c 3933 info->element_type = element_type;
3934 info->memory_type = memory_type;
cf60da07 3935 return true;
16dfb112 3936}
3937
ed9d8730 3938/* Find the data references in STMT, analyze them with respect to LOOP and
3939 append them to DATAREFS. Return false if datarefs in this stmt cannot
3940 be handled. */
3941
ed9370cc 3942opt_result
ed9d8730 3943vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
3944 vec<data_reference_p> *datarefs)
3945{
3946 /* We can ignore clobbers for dataref analysis - they are removed during
3947 loop vectorization and BB vectorization checks dependences with a
3948 stmt walk. */
3949 if (gimple_clobber_p (stmt))
ed9370cc 3950 return opt_result::success ();
ed9d8730 3951
3952 if (gimple_has_volatile_ops (stmt))
ed9370cc 3953 return opt_result::failure_at (stmt, "not vectorized: volatile type: %G",
3954 stmt);
ed9d8730 3955
aac19106 3956 if (stmt_can_throw_internal (cfun, stmt))
ed9370cc 3957 return opt_result::failure_at (stmt,
3958 "not vectorized:"
3959 " statement can throw an exception: %G",
3960 stmt);
ed9d8730 3961
3962 auto_vec<data_reference_p, 2> refs;
ed9370cc 3963 opt_result res = find_data_references_in_stmt (loop, stmt, &refs);
3964 if (!res)
3965 return res;
ed9d8730 3966
3967 if (refs.is_empty ())
ed9370cc 3968 return opt_result::success ();
ed9d8730 3969
3970 if (refs.length () > 1)
ed9370cc 3971 return opt_result::failure_at (stmt,
3972 "not vectorized:"
3973 " more than one data ref in stmt: %G", stmt);
ed9d8730 3974
3975 if (gcall *call = dyn_cast <gcall *> (stmt))
3976 if (!gimple_call_internal_p (call)
3977 || (gimple_call_internal_fn (call) != IFN_MASK_LOAD
3978 && gimple_call_internal_fn (call) != IFN_MASK_STORE))
ed9370cc 3979 return opt_result::failure_at (stmt,
3980 "not vectorized: dr in a call %G", stmt);
ed9d8730 3981
3982 data_reference_p dr = refs.pop ();
3983 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3984 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
ed9370cc 3985 return opt_result::failure_at (stmt,
3986 "not vectorized:"
3987 " statement is bitfield access %G", stmt);
ed9d8730 3988
3989 if (DR_BASE_ADDRESS (dr)
3990 && TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
ed9370cc 3991 return opt_result::failure_at (stmt,
3992 "not vectorized:"
3993 " base addr of dr is a constant\n");
ed9d8730 3994
369a4f17 3995 /* Check whether this may be a SIMD lane access and adjust the
3996 DR to make it easier for us to handle it. */
3997 if (loop
3998 && loop->simduid
3999 && (!DR_BASE_ADDRESS (dr)
4000 || !DR_OFFSET (dr)
4001 || !DR_INIT (dr)
4002 || !DR_STEP (dr)))
4003 {
4004 struct data_reference *newdr
4005 = create_data_ref (NULL, loop_containing_stmt (stmt), DR_REF (dr), stmt,
4006 DR_IS_READ (dr), DR_IS_CONDITIONAL_IN_STMT (dr));
4007 if (DR_BASE_ADDRESS (newdr)
4008 && DR_OFFSET (newdr)
4009 && DR_INIT (newdr)
4010 && DR_STEP (newdr)
4011 && integer_zerop (DR_STEP (newdr)))
4012 {
4013 tree off = DR_OFFSET (newdr);
4014 STRIP_NOPS (off);
4015 if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
4016 && TREE_CODE (off) == MULT_EXPR
4017 && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
4018 {
4019 tree step = TREE_OPERAND (off, 1);
4020 off = TREE_OPERAND (off, 0);
4021 STRIP_NOPS (off);
4022 if (CONVERT_EXPR_P (off)
4023 && (TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off, 0)))
4024 < TYPE_PRECISION (TREE_TYPE (off))))
4025 off = TREE_OPERAND (off, 0);
4026 if (TREE_CODE (off) == SSA_NAME)
4027 {
4028 gimple *def = SSA_NAME_DEF_STMT (off);
4029 tree reft = TREE_TYPE (DR_REF (newdr));
4030 if (is_gimple_call (def)
4031 && gimple_call_internal_p (def)
4032 && (gimple_call_internal_fn (def) == IFN_GOMP_SIMD_LANE))
4033 {
4034 tree arg = gimple_call_arg (def, 0);
4035 gcc_assert (TREE_CODE (arg) == SSA_NAME);
4036 arg = SSA_NAME_VAR (arg);
4037 if (arg == loop->simduid
4038 /* For now. */
4039 && tree_int_cst_equal (TYPE_SIZE_UNIT (reft), step))
4040 {
4041 DR_OFFSET (newdr) = ssize_int (0);
4042 DR_STEP (newdr) = step;
4043 DR_OFFSET_ALIGNMENT (newdr) = BIGGEST_ALIGNMENT;
4044 DR_STEP_ALIGNMENT (newdr)
4045 = highest_pow2_factor (step);
4046 /* Mark as simd-lane access. */
4047 newdr->aux = (void *)-1;
4048 free_data_ref (dr);
4049 datarefs->safe_push (newdr);
ed9370cc 4050 return opt_result::success ();
369a4f17 4051 }
4052 }
4053 }
4054 }
4055 }
4056 free_data_ref (newdr);
4057 }
4058
ed9d8730 4059 datarefs->safe_push (dr);
ed9370cc 4060 return opt_result::success ();
ed9d8730 4061}
4062
fb85abff 4063/* Function vect_analyze_data_refs.
4064
37545e54 4065 Find all the data references in the loop or basic block.
fb85abff 4066
4067 The general structure of the analysis of data refs in the vectorizer is as
4068 follows:
48e1416a 4069 1- vect_analyze_data_refs(loop/bb): call
37545e54 4070 compute_data_dependences_for_loop/bb to find and analyze all data-refs
4071 in the loop/bb and their dependences.
fb85abff 4072 2- vect_analyze_dependences(): apply dependence testing using ddrs.
4073 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
4074 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
4075
4076*/
4077
ed9370cc 4078opt_result
d75596cd 4079vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
fb85abff 4080{
37545e54 4081 struct loop *loop = NULL;
fb85abff 4082 unsigned int i;
fb85abff 4083 struct data_reference *dr;
4084 tree scalar_type;
4085
88f6eb8f 4086 DUMP_VECT_SCOPE ("vect_analyze_data_refs");
48e1416a 4087
e2c5c678 4088 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
0a08c1bc 4089 loop = LOOP_VINFO_LOOP (loop_vinfo);
fb85abff 4090
282bf14c 4091 /* Go through the data-refs, check that the analysis succeeded. Update
4092 pointer from stmt_vec_info struct to DR and vectype. */
fb85abff 4093
a99aba41 4094 vec<data_reference_p> datarefs = vinfo->shared->datarefs;
f1f41a6c 4095 FOR_EACH_VEC_ELT (datarefs, i, dr)
fb85abff 4096 {
0bd6d857 4097 enum { SG_NONE, GATHER, SCATTER } gatherscatter = SG_NONE;
d75596cd 4098 poly_uint64 vf;
48e1416a 4099
ed9d8730 4100 gcc_assert (DR_REF (dr));
5f02ee72 4101 stmt_vec_info stmt_info = vinfo->lookup_stmt (DR_STMT (dr));
4102 gcc_assert (!stmt_info->dr_aux.dr);
4103 stmt_info->dr_aux.dr = dr;
4104 stmt_info->dr_aux.stmt = stmt_info;
fb85abff 4105
4106 /* Check that analysis of the data-ref succeeded. */
4107 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
16dfb112 4108 || !DR_STEP (dr))
fb85abff 4109 {
3d483a94 4110 bool maybe_gather
4111 = DR_IS_READ (dr)
16dfb112 4112 && !TREE_THIS_VOLATILE (DR_REF (dr))
1619606c 4113 && (targetm.vectorize.builtin_gather != NULL
4114 || supports_vec_gather_load_p ());
0bd6d857 4115 bool maybe_scatter
4116 = DR_IS_WRITE (dr)
4117 && !TREE_THIS_VOLATILE (DR_REF (dr))
0bf8b382 4118 && (targetm.vectorize.builtin_scatter != NULL
4119 || supports_vec_scatter_store_p ());
3d483a94 4120
369a4f17 4121 /* If target supports vector gather loads or scatter stores,
4122 see if they can't be used. */
e2c5c678 4123 if (is_a <loop_vec_info> (vinfo)
0219dc42 4124 && !nested_in_vect_loop_p (loop, stmt_info))
16dfb112 4125 {
369a4f17 4126 if (maybe_gather || maybe_scatter)
fa681b45 4127 {
4128 if (maybe_gather)
4129 gatherscatter = GATHER;
4130 else
4131 gatherscatter = SCATTER;
16dfb112 4132 }
16dfb112 4133 }
6ea6a380 4134
369a4f17 4135 if (gatherscatter == SG_NONE)
16dfb112 4136 {
6d8fb6cf 4137 if (dump_enabled_p ())
a4e972e3 4138 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4139 "not vectorized: data ref analysis "
4140 "failed %G", stmt_info->stmt);
e2c5c678 4141 if (is_a <bb_vec_info> (vinfo))
58cfef6b 4142 {
4143 /* In BB vectorization the ref can still participate
4144 in dependence analysis, we just can't vectorize it. */
4145 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4146 continue;
4147 }
ed9370cc 4148 return opt_result::failure_at (stmt_info->stmt,
4149 "not vectorized:"
4150 " data ref analysis failed: %G",
4151 stmt_info->stmt);
16dfb112 4152 }
fb85abff 4153 }
4154
369a4f17 4155 /* See if this was detected as SIMD lane access. */
4156 if (dr->aux == (void *)-1)
4157 {
0219dc42 4158 if (nested_in_vect_loop_p (loop, stmt_info))
ed9370cc 4159 return opt_result::failure_at (stmt_info->stmt,
4160 "not vectorized:"
4161 " data ref analysis failed: %G",
4162 stmt_info->stmt);
369a4f17 4163 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
4164 }
4165
fa681b45 4166 tree base = get_base_address (DR_REF (dr));
4167 if (base && VAR_P (base) && DECL_NONALIASED (base))
87c952b8 4168 {
6d8fb6cf 4169 if (dump_enabled_p ())
a4e972e3 4170 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4171 "not vectorized: base object not addressable "
4172 "for stmt: %G", stmt_info->stmt);
e2c5c678 4173 if (is_a <bb_vec_info> (vinfo))
ed9d8730 4174 {
4175 /* In BB vectorization the ref can still participate
4176 in dependence analysis, we just can't vectorize it. */
4177 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4178 continue;
4179 }
ed9370cc 4180 return opt_result::failure_at (stmt_info->stmt,
4181 "not vectorized: base object not"
4182 " addressable for stmt: %G",
4183 stmt_info->stmt);
87c952b8 4184 }
4185
ed9d8730 4186 if (is_a <loop_vec_info> (vinfo)
fa681b45 4187 && DR_STEP (dr)
ed9d8730 4188 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
635bf3aa 4189 {
0219dc42 4190 if (nested_in_vect_loop_p (loop, stmt_info))
ed9370cc 4191 return opt_result::failure_at (stmt_info->stmt,
4192 "not vectorized:"
4193 "not suitable for strided load %G",
4194 stmt_info->stmt);
ed9d8730 4195 STMT_VINFO_STRIDED_P (stmt_info) = true;
635bf3aa 4196 }
4197
fb85abff 4198 /* Update DR field in stmt_vec_info struct. */
fb85abff 4199
4200 /* If the dataref is in an inner-loop of the loop that is considered for
4201 for vectorization, we also want to analyze the access relative to
48e1416a 4202 the outer-loop (DR contains information only relative to the
fb85abff 4203 inner-most enclosing loop). We do that by building a reference to the
4204 first location accessed by the inner-loop, and analyze it relative to
48e1416a 4205 the outer-loop. */
0219dc42 4206 if (loop && nested_in_vect_loop_p (loop, stmt_info))
fb85abff 4207 {
48e1416a 4208 /* Build a reference to the first location accessed by the
a5456a6d 4209 inner loop: *(BASE + INIT + OFFSET). By construction,
4210 this address must be invariant in the inner loop, so we
4211 can consider it as being used in the outer loop. */
ed9d8730 4212 tree base = unshare_expr (DR_BASE_ADDRESS (dr));
4213 tree offset = unshare_expr (DR_OFFSET (dr));
4214 tree init = unshare_expr (DR_INIT (dr));
a5456a6d 4215 tree init_offset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset),
4216 init, offset);
4217 tree init_addr = fold_build_pointer_plus (base, init_offset);
4218 tree init_ref = build_fold_indirect_ref (init_addr);
fb85abff 4219
6d8fb6cf 4220 if (dump_enabled_p ())
a4e972e3 4221 dump_printf_loc (MSG_NOTE, vect_location,
4222 "analyze in outer loop: %T\n", init_ref);
fb85abff 4223
ed9370cc 4224 opt_result res
4225 = dr_analyze_innermost (&STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info),
4226 init_ref, loop, stmt_info->stmt);
4227 if (!res)
a5456a6d 4228 /* dr_analyze_innermost already explained the failure. */
ed9370cc 4229 return res;
fb85abff 4230
6d8fb6cf 4231 if (dump_enabled_p ())
a4e972e3 4232 dump_printf_loc (MSG_NOTE, vect_location,
4233 "\touter base_address: %T\n"
4234 "\touter offset from base address: %T\n"
4235 "\touter constant offset from base address: %T\n"
4236 "\touter step: %T\n"
4237 "\touter base alignment: %d\n\n"
4238 "\touter base misalignment: %d\n"
4239 "\touter offset alignment: %d\n"
4240 "\touter step alignment: %d\n",
4241 STMT_VINFO_DR_BASE_ADDRESS (stmt_info),
4242 STMT_VINFO_DR_OFFSET (stmt_info),
4243 STMT_VINFO_DR_INIT (stmt_info),
4244 STMT_VINFO_DR_STEP (stmt_info),
4245 STMT_VINFO_DR_BASE_ALIGNMENT (stmt_info),
4246 STMT_VINFO_DR_BASE_MISALIGNMENT (stmt_info),
4247 STMT_VINFO_DR_OFFSET_ALIGNMENT (stmt_info),
4248 STMT_VINFO_DR_STEP_ALIGNMENT (stmt_info));
fb85abff 4249 }
4250
fb85abff 4251 /* Set vectype for STMT. */
4252 scalar_type = TREE_TYPE (DR_REF (dr));
53c3c39b 4253 STMT_VINFO_VECTYPE (stmt_info)
4254 = get_vectype_for_scalar_type (scalar_type);
48e1416a 4255 if (!STMT_VINFO_VECTYPE (stmt_info))
fb85abff 4256 {
6d8fb6cf 4257 if (dump_enabled_p ())
fb85abff 4258 {
78bb46f5 4259 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
a4e972e3 4260 "not vectorized: no vectype for stmt: %G",
4261 stmt_info->stmt);
7bd765d4 4262 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
4263 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
4264 scalar_type);
78bb46f5 4265 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
fb85abff 4266 }
6ea6a380 4267
e2c5c678 4268 if (is_a <bb_vec_info> (vinfo))
77d241ed 4269 {
4270 /* No vector type is fine, the ref can still participate
4271 in dependence analysis, we just can't vectorize it. */
4272 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
4273 continue;
4274 }
ed9370cc 4275 return opt_result::failure_at (stmt_info->stmt,
4276 "not vectorized:"
4277 " no vectype for stmt: %G"
4278 " scalar_type: %T\n",
4279 stmt_info->stmt, scalar_type);
fb85abff 4280 }
0bf5f81b 4281 else
4282 {
4283 if (dump_enabled_p ())
a4e972e3 4284 dump_printf_loc (MSG_NOTE, vect_location,
4285 "got vectype for stmt: %G%T\n",
4286 stmt_info->stmt, STMT_VINFO_VECTYPE (stmt_info));
0bf5f81b 4287 }
91a74fc6 4288
4289 /* Adjust the minimal vectorization factor according to the
4290 vector type. */
4291 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
d75596cd 4292 *min_vf = upper_bound (*min_vf, vf);
16dfb112 4293
0bd6d857 4294 if (gatherscatter != SG_NONE)
16dfb112 4295 {
cf60da07 4296 gather_scatter_info gs_info;
0219dc42 4297 if (!vect_check_gather_scatter (stmt_info,
4298 as_a <loop_vec_info> (vinfo),
cf60da07 4299 &gs_info)
4300 || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
ed9370cc 4301 return opt_result::failure_at
4302 (stmt_info->stmt,
4303 (gatherscatter == GATHER) ?
4304 "not vectorized: not suitable for gather load %G" :
4305 "not vectorized: not suitable for scatter store %G",
4306 stmt_info->stmt);
0bd6d857 4307 STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
f634c3e9 4308 }
fb85abff 4309 }
48e1416a 4310
58cfef6b 4311 /* We used to stop processing and prune the list here. Verify we no
4312 longer need to. */
4313 gcc_assert (i == datarefs.length ());
07e3bcbf 4314
ed9370cc 4315 return opt_result::success ();
fb85abff 4316}
4317
4318
4319/* Function vect_get_new_vect_var.
4320
282bf14c 4321 Returns a name for a new variable. The current naming scheme appends the
48e1416a 4322 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
4323 the name of vectorizer generated variables, and appends that to NAME if
fb85abff 4324 provided. */
4325
4326tree
4327vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
4328{
4329 const char *prefix;
4330 tree new_vect_var;
4331
4332 switch (var_kind)
4333 {
4334 case vect_simple_var:
0bf5f81b 4335 prefix = "vect";
fb85abff 4336 break;
4337 case vect_scalar_var:
0bf5f81b 4338 prefix = "stmp";
fb85abff 4339 break;
dab48979 4340 case vect_mask_var:
4341 prefix = "mask";
4342 break;
fb85abff 4343 case vect_pointer_var:
0bf5f81b 4344 prefix = "vectp";
fb85abff 4345 break;
4346 default:
4347 gcc_unreachable ();
4348 }
4349
4350 if (name)
4351 {
0bf5f81b 4352 char* tmp = concat (prefix, "_", name, NULL);
35244493 4353 new_vect_var = create_tmp_reg (type, tmp);
fb85abff 4354 free (tmp);
4355 }
4356 else
35244493 4357 new_vect_var = create_tmp_reg (type, prefix);
fb85abff 4358
4359 return new_vect_var;
4360}
4361
23ffec42 4362/* Like vect_get_new_vect_var but return an SSA name. */
4363
4364tree
4365vect_get_new_ssa_name (tree type, enum vect_var_kind var_kind, const char *name)
4366{
4367 const char *prefix;
4368 tree new_vect_var;
4369
4370 switch (var_kind)
4371 {
4372 case vect_simple_var:
4373 prefix = "vect";
4374 break;
4375 case vect_scalar_var:
4376 prefix = "stmp";
4377 break;
4378 case vect_pointer_var:
4379 prefix = "vectp";
4380 break;
4381 default:
4382 gcc_unreachable ();
4383 }
4384
4385 if (name)
4386 {
4387 char* tmp = concat (prefix, "_", name, NULL);
4388 new_vect_var = make_temp_ssa_name (type, NULL, tmp);
4389 free (tmp);
4390 }
4391 else
4392 new_vect_var = make_temp_ssa_name (type, NULL, prefix);
4393
4394 return new_vect_var;
4395}
4396
abc9513d 4397/* Duplicate ptr info and set alignment/misaligment on NAME from DR_INFO. */
4a2edd22 4398
4399static void
abc9513d 4400vect_duplicate_ssa_name_ptr_info (tree name, dr_vec_info *dr_info)
4a2edd22 4401{
abc9513d 4402 duplicate_ssa_name_ptr_info (name, DR_PTR_INFO (dr_info->dr));
4403 int misalign = DR_MISALIGNMENT (dr_info);
df8e9f7a 4404 if (misalign == DR_MISALIGNMENT_UNKNOWN)
4a2edd22 4405 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (name));
4406 else
aec313e5 4407 set_ptr_info_alignment (SSA_NAME_PTR_INFO (name),
e092c20e 4408 known_alignment (DR_TARGET_ALIGNMENT (dr_info)),
4409 misalign);
4a2edd22 4410}
fb85abff 4411
4412/* Function vect_create_addr_base_for_vector_ref.
4413
4414 Create an expression that computes the address of the first memory location
4415 that will be accessed for a data reference.
4416
4417 Input:
ecc42a77 4418 STMT_INFO: The statement containing the data reference.
fb85abff 4419 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
4420 OFFSET: Optional. If supplied, it is be added to the initial address.
4421 LOOP: Specify relative to which loop-nest should the address be computed.
4422 For example, when the dataref is in an inner-loop nested in an
4423 outer-loop that is now being vectorized, LOOP can be either the
282bf14c 4424 outer-loop, or the inner-loop. The first memory location accessed
fb85abff 4425 by the following dataref ('in' points to short):
4426
4427 for (i=0; i<N; i++)
4428 for (j=0; j<M; j++)
4429 s += in[i+j]
4430
4431 is as follows:
4432 if LOOP=i_loop: &in (relative to i_loop)
4433 if LOOP=j_loop: &in+i*2B (relative to j_loop)
1ec61bbd 4434 BYTE_OFFSET: Optional, defaulted to NULL. If supplied, it is added to the
4435 initial address. Unlike OFFSET, which is number of elements to
4436 be added, BYTE_OFFSET is measured in bytes.
fb85abff 4437
4438 Output:
48e1416a 4439 1. Return an SSA_NAME whose value is the address of the memory location of
fb85abff 4440 the first vector of the data reference.
4441 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
4442 these statement(s) which define the returned SSA_NAME.
4443
4444 FORNOW: We are only handling array accesses with step 1. */
4445
4446tree
ecc42a77 4447vect_create_addr_base_for_vector_ref (stmt_vec_info stmt_info,
fb85abff 4448 gimple_seq *new_stmt_list,
4449 tree offset,
1ec61bbd 4450 tree byte_offset)
fb85abff 4451{
abc9513d 4452 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
4453 struct data_reference *dr = dr_info->dr;
3c18ea71 4454 const char *base_name;
90d4c4af 4455 tree addr_base;
fb85abff 4456 tree dest;
4457 gimple_seq seq = NULL;
f083cd24 4458 tree vect_ptr_type;
fb85abff 4459 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
37545e54 4460 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
abc9513d 4461 innermost_loop_behavior *drb = vect_dr_behavior (dr_info);
fb85abff 4462
9e879814 4463 tree data_ref_base = unshare_expr (drb->base_address);
4464 tree base_offset = unshare_expr (drb->offset);
4465 tree init = unshare_expr (drb->init);
fb85abff 4466
37545e54 4467 if (loop_vinfo)
3c18ea71 4468 base_name = get_name (data_ref_base);
37545e54 4469 else
4470 {
4471 base_offset = ssize_int (0);
4472 init = ssize_int (0);
3c18ea71 4473 base_name = get_name (DR_REF (dr));
48e1416a 4474 }
37545e54 4475
fb85abff 4476 /* Create base_offset */
4477 base_offset = size_binop (PLUS_EXPR,
4478 fold_convert (sizetype, base_offset),
4479 fold_convert (sizetype, init));
fb85abff 4480
4481 if (offset)
4482 {
fb85abff 4483 offset = fold_build2 (MULT_EXPR, sizetype,
4484 fold_convert (sizetype, offset), step);
4485 base_offset = fold_build2 (PLUS_EXPR, sizetype,
4486 base_offset, offset);
fb85abff 4487 }
1ec61bbd 4488 if (byte_offset)
4489 {
4490 byte_offset = fold_convert (sizetype, byte_offset);
4491 base_offset = fold_build2 (PLUS_EXPR, sizetype,
4492 base_offset, byte_offset);
4493 }
fb85abff 4494
4495 /* base + base_offset */
37545e54 4496 if (loop_vinfo)
2cc66f2a 4497 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
37545e54 4498 else
4499 {
182cf5a9 4500 addr_base = build1 (ADDR_EXPR,
4501 build_pointer_type (TREE_TYPE (DR_REF (dr))),
4502 unshare_expr (DR_REF (dr)));
37545e54 4503 }
48e1416a 4504
fb85abff 4505 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
90d4c4af 4506 dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
8ee959f8 4507 addr_base = force_gimple_operand (addr_base, &seq, true, dest);
fb85abff 4508 gimple_seq_add_seq (new_stmt_list, seq);
4509
f544b9a4 4510 if (DR_PTR_INFO (dr)
8ee959f8 4511 && TREE_CODE (addr_base) == SSA_NAME
4512 && !SSA_NAME_PTR_INFO (addr_base))
1259ab70 4513 {
abc9513d 4514 vect_duplicate_ssa_name_ptr_info (addr_base, dr_info);
4a2edd22 4515 if (offset || byte_offset)
90d4c4af 4516 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
1259ab70 4517 }
f544b9a4 4518
6d8fb6cf 4519 if (dump_enabled_p ())
a4e972e3 4520 dump_printf_loc (MSG_NOTE, vect_location, "created %T\n", addr_base);
f083cd24 4521
90d4c4af 4522 return addr_base;
fb85abff 4523}
4524
4525
4526/* Function vect_create_data_ref_ptr.
4527
bd5ba09f 4528 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
ecc42a77 4529 location accessed in the loop by STMT_INFO, along with the def-use update
bd5ba09f 4530 chain to appropriately advance the pointer through the loop iterations.
4531 Also set aliasing information for the pointer. This pointer is used by
4532 the callers to this function to create a memory reference expression for
4533 vector load/store access.
fb85abff 4534
4535 Input:
ecc42a77 4536 1. STMT_INFO: a stmt that references memory. Expected to be of the form
fb85abff 4537 GIMPLE_ASSIGN <name, data-ref> or
4538 GIMPLE_ASSIGN <data-ref, name>.
bd5ba09f 4539 2. AGGR_TYPE: the type of the reference, which should be either a vector
4540 or an array.
4541 3. AT_LOOP: the loop where the vector memref is to be created.
4542 4. OFFSET (optional): an offset to be added to the initial address accessed
ecc42a77 4543 by the data-ref in STMT_INFO.
bd5ba09f 4544 5. BSI: location where the new stmts are to be placed if there is no loop
4545 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
fb85abff 4546 pointing to the initial address.
1ec61bbd 4547 7. BYTE_OFFSET (optional, defaults to NULL): a byte offset to be added
ecc42a77 4548 to the initial address accessed by the data-ref in STMT_INFO. This is
1ec61bbd 4549 similar to OFFSET, but OFFSET is counted in elements, while BYTE_OFFSET
4550 in bytes.
1f9a3b5c 4551 8. IV_STEP (optional, defaults to NULL): the amount that should be added
4552 to the IV during each iteration of the loop. NULL says to move
4553 by one copy of AGGR_TYPE up or down, depending on the step of the
4554 data reference.
fb85abff 4555
4556 Output:
4557 1. Declare a new ptr to vector_type, and have it point to the base of the
4558 data reference (initial addressed accessed by the data reference).
4559 For example, for vector of type V8HI, the following code is generated:
4560
bd5ba09f 4561 v8hi *ap;
4562 ap = (v8hi *)initial_address;
fb85abff 4563
4564 if OFFSET is not supplied:
4565 initial_address = &a[init];
4566 if OFFSET is supplied:
4567 initial_address = &a[init + OFFSET];
1ec61bbd 4568 if BYTE_OFFSET is supplied:
4569 initial_address = &a[init] + BYTE_OFFSET;
fb85abff 4570
4571 Return the initial_address in INITIAL_ADDRESS.
4572
4573 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
48e1416a 4574 update the pointer in each iteration of the loop.
fb85abff 4575
4576 Return the increment stmt that updates the pointer in PTR_INCR.
4577
3c8b7bc7 4578 3. Return the pointer. */
fb85abff 4579
4580tree
ecc42a77 4581vect_create_data_ref_ptr (stmt_vec_info stmt_info, tree aggr_type,
4582 struct loop *at_loop, tree offset,
4583 tree *initial_address, gimple_stmt_iterator *gsi,
3c8b7bc7 4584 gimple **ptr_incr, bool only_init,
ecc42a77 4585 tree byte_offset, tree iv_step)
fb85abff 4586{
3c18ea71 4587 const char *base_name;
fb85abff 4588 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
37545e54 4589 struct loop *loop = NULL;
4590 bool nested_in_vect_loop = false;
4591 struct loop *containing_loop = NULL;
bd5ba09f 4592 tree aggr_ptr_type;
4593 tree aggr_ptr;
fb85abff 4594 tree new_temp;
fb85abff 4595 gimple_seq new_stmt_list = NULL;
37545e54 4596 edge pe = NULL;
fb85abff 4597 basic_block new_bb;
bd5ba09f 4598 tree aggr_ptr_init;
abc9513d 4599 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
4600 struct data_reference *dr = dr_info->dr;
bd5ba09f 4601 tree aptr;
fb85abff 4602 gimple_stmt_iterator incr_gsi;
4603 bool insert_after;
4604 tree indx_before_incr, indx_after_incr;
42acab1c 4605 gimple *incr;
37545e54 4606 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
48e1416a 4607
1f9a3b5c 4608 gcc_assert (iv_step != NULL_TREE
4609 || TREE_CODE (aggr_type) == ARRAY_TYPE
bd5ba09f 4610 || TREE_CODE (aggr_type) == VECTOR_TYPE);
4611
37545e54 4612 if (loop_vinfo)
4613 {
4614 loop = LOOP_VINFO_LOOP (loop_vinfo);
a73182ff 4615 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
4616 containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
37545e54 4617 pe = loop_preheader_edge (loop);
4618 }
4619 else
4620 {
4621 gcc_assert (bb_vinfo);
4622 only_init = true;
4623 *ptr_incr = NULL;
4624 }
48e1416a 4625
fb85abff 4626 /* Create an expression for the first address accessed by this load
48e1416a 4627 in LOOP. */
3c18ea71 4628 base_name = get_name (DR_BASE_ADDRESS (dr));
fb85abff 4629
6d8fb6cf 4630 if (dump_enabled_p ())
fb85abff 4631 {
3c18ea71 4632 tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
7bd765d4 4633 dump_printf_loc (MSG_NOTE, vect_location,
a4e972e3 4634 "create %s-pointer variable to type: %T",
4635 get_tree_code_name (TREE_CODE (aggr_type)),
4636 aggr_type);
3c18ea71 4637 if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
7bd765d4 4638 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
19bacd59 4639 else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
4640 dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
3c18ea71 4641 else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
7bd765d4 4642 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
3c18ea71 4643 else
7bd765d4 4644 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
a4e972e3 4645 dump_printf (MSG_NOTE, "%T\n", DR_BASE_OBJECT (dr));
fb85abff 4646 }
4647
90d4c4af 4648 /* (1) Create the new aggregate-pointer variable.
4649 Vector and array types inherit the alias set of their component
bd5ba09f 4650 type by default so we need to use a ref-all pointer if the data
4651 reference does not conflict with the created aggregated data
4652 reference because it is not addressable. */
90d4c4af 4653 bool need_ref_all = false;
4654 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
a34701c9 4655 get_alias_set (DR_REF (dr))))
90d4c4af 4656 need_ref_all = true;
a34701c9 4657 /* Likewise for any of the data references in the stmt group. */
e1009321 4658 else if (DR_GROUP_SIZE (stmt_info) > 1)
fb85abff 4659 {
cd24aa3c 4660 stmt_vec_info sinfo = DR_GROUP_FIRST_ELEMENT (stmt_info);
dd277d48 4661 do
4662 {
90d4c4af 4663 struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4664 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4665 get_alias_set (DR_REF (sdr))))
dd277d48 4666 {
90d4c4af 4667 need_ref_all = true;
dd277d48 4668 break;
4669 }
cd24aa3c 4670 sinfo = DR_GROUP_NEXT_ELEMENT (sinfo);
dd277d48 4671 }
cd24aa3c 4672 while (sinfo);
fb85abff 4673 }
90d4c4af 4674 aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4675 need_ref_all);
4676 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4677
fb85abff 4678
282bf14c 4679 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4680 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4681 def-use update cycles for the pointer: one relative to the outer-loop
4682 (LOOP), which is what steps (3) and (4) below do. The other is relative
4683 to the inner-loop (which is the inner-most loop containing the dataref),
4684 and this is done be step (5) below.
fb85abff 4685
282bf14c 4686 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4687 inner-most loop, and so steps (3),(4) work the same, and step (5) is
4688 redundant. Steps (3),(4) create the following:
fb85abff 4689
4690 vp0 = &base_addr;
4691 LOOP: vp1 = phi(vp0,vp2)
48e1416a 4692 ...
fb85abff 4693 ...
4694 vp2 = vp1 + step
4695 goto LOOP
48e1416a 4696
282bf14c 4697 If there is an inner-loop nested in loop, then step (5) will also be
4698 applied, and an additional update in the inner-loop will be created:
fb85abff 4699
4700 vp0 = &base_addr;
4701 LOOP: vp1 = phi(vp0,vp2)
4702 ...
4703 inner: vp3 = phi(vp1,vp4)
4704 vp4 = vp3 + inner_step
4705 if () goto inner
4706 ...
4707 vp2 = vp1 + step
4708 if () goto LOOP */
4709
bd5ba09f 4710 /* (2) Calculate the initial address of the aggregate-pointer, and set
4711 the aggregate-pointer to point to it before the loop. */
fb85abff 4712
1ec61bbd 4713 /* Create: (&(base[init_val+offset]+byte_offset) in the loop preheader. */
fb85abff 4714
a73182ff 4715 new_temp = vect_create_addr_base_for_vector_ref (stmt_info, &new_stmt_list,
9e879814 4716 offset, byte_offset);
fb85abff 4717 if (new_stmt_list)
4718 {
37545e54 4719 if (pe)
4720 {
4721 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4722 gcc_assert (!new_bb);
4723 }
4724 else
bee862b6 4725 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
fb85abff 4726 }
4727
4728 *initial_address = new_temp;
8ee959f8 4729 aggr_ptr_init = new_temp;
fb85abff 4730
bd5ba09f 4731 /* (3) Handle the updating of the aggregate-pointer inside the loop.
282bf14c 4732 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4733 inner-loop nested in LOOP (during outer-loop vectorization). */
fb85abff 4734
37545e54 4735 /* No update in loop is required. */
48e1416a 4736 if (only_init && (!loop_vinfo || at_loop == loop))
bd5ba09f 4737 aptr = aggr_ptr_init;
fb85abff 4738 else
4739 {
3c8b7bc7 4740 /* Accesses to invariant addresses should be handled specially
4741 by the caller. */
4742 tree step = vect_dr_behavior (dr_info)->step;
4743 gcc_assert (!integer_zerop (step));
4744
1f9a3b5c 4745 if (iv_step == NULL_TREE)
4746 {
3c8b7bc7 4747 /* The step of the aggregate pointer is the type size,
4748 negated for downward accesses. */
1f9a3b5c 4749 iv_step = TYPE_SIZE_UNIT (aggr_type);
3c8b7bc7 4750 if (tree_int_cst_sgn (step) == -1)
1f9a3b5c 4751 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
4752 }
fb85abff 4753
4754 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4755
bd5ba09f 4756 create_iv (aggr_ptr_init,
8bbe6b75 4757 fold_convert (aggr_ptr_type, iv_step),
bd5ba09f 4758 aggr_ptr, loop, &incr_gsi, insert_after,
fb85abff 4759 &indx_before_incr, &indx_after_incr);
4760 incr = gsi_stmt (incr_gsi);
04b2391d 4761 loop_vinfo->add_stmt (incr);
fb85abff 4762
4763 /* Copy the points-to information if it exists. */
4764 if (DR_PTR_INFO (dr))
4765 {
abc9513d 4766 vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
4767 vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
fb85abff 4768 }
fb85abff 4769 if (ptr_incr)
4770 *ptr_incr = incr;
4771
bd5ba09f 4772 aptr = indx_before_incr;
fb85abff 4773 }
4774
4775 if (!nested_in_vect_loop || only_init)
bd5ba09f 4776 return aptr;
fb85abff 4777
4778
bd5ba09f 4779 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
282bf14c 4780 nested in LOOP, if exists. */
fb85abff 4781
4782 gcc_assert (nested_in_vect_loop);
4783 if (!only_init)
4784 {
4785 standard_iv_increment_position (containing_loop, &incr_gsi,
4786 &insert_after);
bd5ba09f 4787 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
fb85abff 4788 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4789 &indx_after_incr);
4790 incr = gsi_stmt (incr_gsi);
04b2391d 4791 loop_vinfo->add_stmt (incr);
fb85abff 4792
4793 /* Copy the points-to information if it exists. */
4794 if (DR_PTR_INFO (dr))
4795 {
abc9513d 4796 vect_duplicate_ssa_name_ptr_info (indx_before_incr, dr_info);
4797 vect_duplicate_ssa_name_ptr_info (indx_after_incr, dr_info);
fb85abff 4798 }
fb85abff 4799 if (ptr_incr)
4800 *ptr_incr = incr;
4801
48e1416a 4802 return indx_before_incr;
fb85abff 4803 }
4804 else
4805 gcc_unreachable ();
4806}
4807
4808
4809/* Function bump_vector_ptr
4810
4811 Increment a pointer (to a vector type) by vector-size. If requested,
48e1416a 4812 i.e. if PTR-INCR is given, then also connect the new increment stmt
fb85abff 4813 to the existing def-use update-chain of the pointer, by modifying
4814 the PTR_INCR as illustrated below:
4815
4816 The pointer def-use update-chain before this function:
4817 DATAREF_PTR = phi (p_0, p_2)
4818 ....
48e1416a 4819 PTR_INCR: p_2 = DATAREF_PTR + step
fb85abff 4820
4821 The pointer def-use update-chain after this function:
4822 DATAREF_PTR = phi (p_0, p_2)
4823 ....
4824 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4825 ....
4826 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4827
4828 Input:
48e1416a 4829 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
fb85abff 4830 in the loop.
48e1416a 4831 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
fb85abff 4832 the loop. The increment amount across iterations is expected
48e1416a 4833 to be vector_size.
fb85abff 4834 BSI - location where the new update stmt is to be placed.
ecc42a77 4835 STMT_INFO - the original scalar memory-access stmt that is being vectorized.
fb85abff 4836 BUMP - optional. The offset by which to bump the pointer. If not given,
4837 the offset is assumed to be vector_size.
4838
4839 Output: Return NEW_DATAREF_PTR as illustrated above.
48e1416a 4840
fb85abff 4841*/
4842
4843tree
42acab1c 4844bump_vector_ptr (tree dataref_ptr, gimple *ptr_incr, gimple_stmt_iterator *gsi,
ecc42a77 4845 stmt_vec_info stmt_info, tree bump)
fb85abff 4846{
fb85abff 4847 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4848 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
fb85abff 4849 tree update = TYPE_SIZE_UNIT (vectype);
1a91d914 4850 gassign *incr_stmt;
fb85abff 4851 ssa_op_iter iter;
4852 use_operand_p use_p;
4853 tree new_dataref_ptr;
4854
4855 if (bump)
4856 update = bump;
48e1416a 4857
8ee959f8 4858 if (TREE_CODE (dataref_ptr) == SSA_NAME)
4859 new_dataref_ptr = copy_ssa_name (dataref_ptr);
4860 else
4861 new_dataref_ptr = make_ssa_name (TREE_TYPE (dataref_ptr));
e9cf809e 4862 incr_stmt = gimple_build_assign (new_dataref_ptr, POINTER_PLUS_EXPR,
4863 dataref_ptr, update);
a73182ff 4864 vect_finish_stmt_generation (stmt_info, incr_stmt, gsi);
fb85abff 4865
4866 /* Copy the points-to information if it exists. */
4867 if (DR_PTR_INFO (dr))
1259ab70 4868 {
4869 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
ceea063b 4870 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
1259ab70 4871 }
fb85abff 4872
4873 if (!ptr_incr)
4874 return new_dataref_ptr;
4875
4876 /* Update the vector-pointer's cross-iteration increment. */
4877 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4878 {
4879 tree use = USE_FROM_PTR (use_p);
4880
4881 if (use == dataref_ptr)
4882 SET_USE (use_p, new_dataref_ptr);
4883 else
1f9a3b5c 4884 gcc_assert (operand_equal_p (use, update, 0));
fb85abff 4885 }
4886
4887 return new_dataref_ptr;
4888}
4889
4890
1c4c7e32 4891/* Copy memory reference info such as base/clique from the SRC reference
4892 to the DEST MEM_REF. */
4893
4894void
4895vect_copy_ref_info (tree dest, tree src)
4896{
4897 if (TREE_CODE (dest) != MEM_REF)
4898 return;
4899
4900 tree src_base = src;
4901 while (handled_component_p (src_base))
4902 src_base = TREE_OPERAND (src_base, 0);
4903 if (TREE_CODE (src_base) != MEM_REF
4904 && TREE_CODE (src_base) != TARGET_MEM_REF)
4905 return;
4906
4907 MR_DEPENDENCE_CLIQUE (dest) = MR_DEPENDENCE_CLIQUE (src_base);
4908 MR_DEPENDENCE_BASE (dest) = MR_DEPENDENCE_BASE (src_base);
4909}
4910
4911
fb85abff 4912/* Function vect_create_destination_var.
4913
4914 Create a new temporary of type VECTYPE. */
4915
4916tree
4917vect_create_destination_var (tree scalar_dest, tree vectype)
4918{
4919 tree vec_dest;
0bf5f81b 4920 const char *name;
4921 char *new_name;
fb85abff 4922 tree type;
4923 enum vect_var_kind kind;
4924
dab48979 4925 kind = vectype
4926 ? VECTOR_BOOLEAN_TYPE_P (vectype)
4927 ? vect_mask_var
4928 : vect_simple_var
4929 : vect_scalar_var;
fb85abff 4930 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4931
4932 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4933
0bf5f81b 4934 name = get_name (scalar_dest);
4935 if (name)
b33b6e58 4936 new_name = xasprintf ("%s_%u", name, SSA_NAME_VERSION (scalar_dest));
0bf5f81b 4937 else
b33b6e58 4938 new_name = xasprintf ("_%u", SSA_NAME_VERSION (scalar_dest));
fb85abff 4939 vec_dest = vect_get_new_vect_var (type, kind, new_name);
0bf5f81b 4940 free (new_name);
fb85abff 4941
4942 return vec_dest;
4943}
4944
ee612634 4945/* Function vect_grouped_store_supported.
fb85abff 4946
42f6a6e8 4947 Returns TRUE if interleave high and interleave low permutations
4948 are supported, and FALSE otherwise. */
fb85abff 4949
4950bool
ee612634 4951vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
fb85abff 4952{
3754d046 4953 machine_mode mode = TYPE_MODE (vectype);
48e1416a 4954
d53391a8 4955 /* vect_permute_store_chain requires the group size to be equal to 3 or
4956 be a power of two. */
4957 if (count != 3 && exact_log2 (count) == -1)
481fc474 4958 {
6d8fb6cf 4959 if (dump_enabled_p ())
7bd765d4 4960 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
d53391a8 4961 "the size of the group of accesses"
4962 " is not a power of 2 or not eqaul to 3\n");
481fc474 4963 return false;
4964 }
4965
42f6a6e8 4966 /* Check that the permutation is supported. */
8bec2124 4967 if (VECTOR_MODE_P (mode))
4968 {
ba7efd65 4969 unsigned int i;
d53391a8 4970 if (count == 3)
8bec2124 4971 {
d53391a8 4972 unsigned int j0 = 0, j1 = 0, j2 = 0;
4973 unsigned int i, j;
4974
ba7efd65 4975 unsigned int nelt;
4976 if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
4977 {
4978 if (dump_enabled_p ())
4979 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4980 "cannot handle groups of 3 stores for"
4981 " variable-length vectors\n");
4982 return false;
4983 }
4984
c3fa7fe9 4985 vec_perm_builder sel (nelt, nelt, 1);
4986 sel.quick_grow (nelt);
1957c019 4987 vec_perm_indices indices;
d53391a8 4988 for (j = 0; j < 3; j++)
4989 {
4990 int nelt0 = ((3 - j) * nelt) % 3;
4991 int nelt1 = ((3 - j) * nelt + 1) % 3;
4992 int nelt2 = ((3 - j) * nelt + 2) % 3;
4993 for (i = 0; i < nelt; i++)
4994 {
4995 if (3 * i + nelt0 < nelt)
4996 sel[3 * i + nelt0] = j0++;
4997 if (3 * i + nelt1 < nelt)
4998 sel[3 * i + nelt1] = nelt + j1++;
4999 if (3 * i + nelt2 < nelt)
5000 sel[3 * i + nelt2] = 0;
5001 }
1957c019 5002 indices.new_vector (sel, 2, nelt);
5003 if (!can_vec_perm_const_p (mode, indices))
d53391a8 5004 {
5005 if (dump_enabled_p ())
5006 dump_printf (MSG_MISSED_OPTIMIZATION,
97f7d65e 5007 "permutation op not supported by target.\n");
d53391a8 5008 return false;
5009 }
5010
5011 for (i = 0; i < nelt; i++)
5012 {
5013 if (3 * i + nelt0 < nelt)
5014 sel[3 * i + nelt0] = 3 * i + nelt0;
5015 if (3 * i + nelt1 < nelt)
5016 sel[3 * i + nelt1] = 3 * i + nelt1;
5017 if (3 * i + nelt2 < nelt)
5018 sel[3 * i + nelt2] = nelt + j2++;
5019 }
1957c019 5020 indices.new_vector (sel, 2, nelt);
5021 if (!can_vec_perm_const_p (mode, indices))
d53391a8 5022 {
5023 if (dump_enabled_p ())
5024 dump_printf (MSG_MISSED_OPTIMIZATION,
97f7d65e 5025 "permutation op not supported by target.\n");
d53391a8 5026 return false;
5027 }
5028 }
5029 return true;
8bec2124 5030 }
d53391a8 5031 else
8bec2124 5032 {
d53391a8 5033 /* If length is not equal to 3 then only power of 2 is supported. */
ac29ece2 5034 gcc_assert (pow2p_hwi (count));
ba7efd65 5035 poly_uint64 nelt = GET_MODE_NUNITS (mode);
d53391a8 5036
c3fa7fe9 5037 /* The encoding has 2 interleaved stepped patterns. */
5038 vec_perm_builder sel (nelt, 2, 3);
5039 sel.quick_grow (6);
5040 for (i = 0; i < 3; i++)
d53391a8 5041 {
5042 sel[i * 2] = i;
5043 sel[i * 2 + 1] = i + nelt;
5044 }
1957c019 5045 vec_perm_indices indices (sel, 2, nelt);
5046 if (can_vec_perm_const_p (mode, indices))
282dc861 5047 {
c3fa7fe9 5048 for (i = 0; i < 6; i++)
ba7efd65 5049 sel[i] += exact_div (nelt, 2);
1957c019 5050 indices.new_vector (sel, 2, nelt);
5051 if (can_vec_perm_const_p (mode, indices))
282dc861 5052 return true;
5053 }
8bec2124 5054 }
5055 }
fb85abff 5056
6d8fb6cf 5057 if (dump_enabled_p ())
7bd765d4 5058 dump_printf (MSG_MISSED_OPTIMIZATION,
12554a62 5059 "permutation op not supported by target.\n");
6620d7d7 5060 return false;
fb85abff 5061}
5062
5063
2dd8e84c 5064/* Return TRUE if vec_{mask_}store_lanes is available for COUNT vectors of
5065 type VECTYPE. MASKED_P says whether the masked form is needed. */
94b7b4dd 5066
5067bool
2dd8e84c 5068vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5069 bool masked_p)
94b7b4dd 5070{
2dd8e84c 5071 if (masked_p)
5072 return vect_lanes_optab_supported_p ("vec_mask_store_lanes",
5073 vec_mask_store_lanes_optab,
5074 vectype, count);
5075 else
5076 return vect_lanes_optab_supported_p ("vec_store_lanes",
5077 vec_store_lanes_optab,
5078 vectype, count);
94b7b4dd 5079}
5080
5081
fb85abff 5082/* Function vect_permute_store_chain.
5083
5084 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
d53391a8 5085 a power of 2 or equal to 3, generate interleave_high/low stmts to reorder
5086 the data correctly for the stores. Return the final references for stores
5087 in RESULT_CHAIN.
fb85abff 5088
5089 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
282bf14c 5090 The input is 4 vectors each containing 8 elements. We assign a number to
5091 each element, the input sequence is:
fb85abff 5092
5093 1st vec: 0 1 2 3 4 5 6 7
5094 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 5095 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 5096 4th vec: 24 25 26 27 28 29 30 31
5097
5098 The output sequence should be:
5099
5100 1st vec: 0 8 16 24 1 9 17 25
5101 2nd vec: 2 10 18 26 3 11 19 27
5102 3rd vec: 4 12 20 28 5 13 21 30
5103 4th vec: 6 14 22 30 7 15 23 31
5104
5105 i.e., we interleave the contents of the four vectors in their order.
5106
282bf14c 5107 We use interleave_high/low instructions to create such output. The input of
fb85abff 5108 each interleave_high/low operation is two vectors:
48e1416a 5109 1st vec 2nd vec
5110 0 1 2 3 4 5 6 7
5111 the even elements of the result vector are obtained left-to-right from the
282bf14c 5112 high/low elements of the first vector. The odd elements of the result are
fb85abff 5113 obtained left-to-right from the high/low elements of the second vector.
5114 The output of interleave_high will be: 0 4 1 5
5115 and of interleave_low: 2 6 3 7
5116
48e1416a 5117
282bf14c 5118 The permutation is done in log LENGTH stages. In each stage interleave_high
48e1416a 5119 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
5120 where the first argument is taken from the first half of DR_CHAIN and the
5121 second argument from it's second half.
5122 In our example,
fb85abff 5123
5124 I1: interleave_high (1st vec, 3rd vec)
5125 I2: interleave_low (1st vec, 3rd vec)
5126 I3: interleave_high (2nd vec, 4th vec)
5127 I4: interleave_low (2nd vec, 4th vec)
5128
5129 The output for the first stage is:
5130
5131 I1: 0 16 1 17 2 18 3 19
5132 I2: 4 20 5 21 6 22 7 23
5133 I3: 8 24 9 25 10 26 11 27
5134 I4: 12 28 13 29 14 30 15 31
5135
5136 The output of the second stage, i.e. the final result is:
5137
5138 I1: 0 8 16 24 1 9 17 25
5139 I2: 2 10 18 26 3 11 19 27
5140 I3: 4 12 20 28 5 13 21 30
5141 I4: 6 14 22 30 7 15 23 31. */
48e1416a 5142
481fc474 5143void
f1f41a6c 5144vect_permute_store_chain (vec<tree> dr_chain,
48e1416a 5145 unsigned int length,
ecc42a77 5146 stmt_vec_info stmt_info,
fb85abff 5147 gimple_stmt_iterator *gsi,
f1f41a6c 5148 vec<tree> *result_chain)
fb85abff 5149{
03d37e4e 5150 tree vect1, vect2, high, low;
42acab1c 5151 gimple *perm_stmt;
1c2fef9a 5152 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
8bec2124 5153 tree perm_mask_low, perm_mask_high;
d53391a8 5154 tree data_ref;
5155 tree perm3_mask_low, perm3_mask_high;
8b221927 5156 unsigned int i, j, n, log_length = exact_log2 (length);
282dc861 5157
f40aaf2d 5158 result_chain->quick_grow (length);
5159 memcpy (result_chain->address (), dr_chain.address (),
5160 length * sizeof (tree));
fb85abff 5161
d53391a8 5162 if (length == 3)
8bec2124 5163 {
8b221927 5164 /* vect_grouped_store_supported ensures that this is constant. */
f08ee65f 5165 unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
d53391a8 5166 unsigned int j0 = 0, j1 = 0, j2 = 0;
42f6a6e8 5167
c3fa7fe9 5168 vec_perm_builder sel (nelt, nelt, 1);
5169 sel.quick_grow (nelt);
1957c019 5170 vec_perm_indices indices;
d53391a8 5171 for (j = 0; j < 3; j++)
5172 {
5173 int nelt0 = ((3 - j) * nelt) % 3;
5174 int nelt1 = ((3 - j) * nelt + 1) % 3;
5175 int nelt2 = ((3 - j) * nelt + 2) % 3;
8bec2124 5176
d53391a8 5177 for (i = 0; i < nelt; i++)
5178 {
5179 if (3 * i + nelt0 < nelt)
5180 sel[3 * i + nelt0] = j0++;
5181 if (3 * i + nelt1 < nelt)
5182 sel[3 * i + nelt1] = nelt + j1++;
5183 if (3 * i + nelt2 < nelt)
5184 sel[3 * i + nelt2] = 0;
5185 }
1957c019 5186 indices.new_vector (sel, 2, nelt);
5187 perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
d53391a8 5188
5189 for (i = 0; i < nelt; i++)
5190 {
5191 if (3 * i + nelt0 < nelt)
5192 sel[3 * i + nelt0] = 3 * i + nelt0;
5193 if (3 * i + nelt1 < nelt)
5194 sel[3 * i + nelt1] = 3 * i + nelt1;
5195 if (3 * i + nelt2 < nelt)
5196 sel[3 * i + nelt2] = nelt + j2++;
5197 }
1957c019 5198 indices.new_vector (sel, 2, nelt);
5199 perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
d53391a8 5200
5201 vect1 = dr_chain[0];
5202 vect2 = dr_chain[1];
fb85abff 5203
5204 /* Create interleaving stmt:
d53391a8 5205 low = VEC_PERM_EXPR <vect1, vect2,
5206 {j, nelt, *, j + 1, nelt + j + 1, *,
5207 j + 2, nelt + j + 2, *, ...}> */
5208 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
e9cf809e 5209 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5210 vect2, perm3_mask_low);
a73182ff 5211 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
fb85abff 5212
d53391a8 5213 vect1 = data_ref;
5214 vect2 = dr_chain[2];
fb85abff 5215 /* Create interleaving stmt:
d53391a8 5216 low = VEC_PERM_EXPR <vect1, vect2,
5217 {0, 1, nelt + j, 3, 4, nelt + j + 1,
5218 6, 7, nelt + j + 2, ...}> */
5219 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
e9cf809e 5220 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect1,
5221 vect2, perm3_mask_high);
a73182ff 5222 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
d53391a8 5223 (*result_chain)[j] = data_ref;
fb85abff 5224 }
d53391a8 5225 }
5226 else
5227 {
5228 /* If length is not equal to 3 then only power of 2 is supported. */
ac29ece2 5229 gcc_assert (pow2p_hwi (length));
d53391a8 5230
c3fa7fe9 5231 /* The encoding has 2 interleaved stepped patterns. */
f08ee65f 5232 poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
c3fa7fe9 5233 vec_perm_builder sel (nelt, 2, 3);
5234 sel.quick_grow (6);
5235 for (i = 0; i < 3; i++)
d53391a8 5236 {
5237 sel[i * 2] = i;
5238 sel[i * 2 + 1] = i + nelt;
5239 }
1957c019 5240 vec_perm_indices indices (sel, 2, nelt);
5241 perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
d53391a8 5242
c3fa7fe9 5243 for (i = 0; i < 6; i++)
f08ee65f 5244 sel[i] += exact_div (nelt, 2);
1957c019 5245 indices.new_vector (sel, 2, nelt);
5246 perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
d53391a8 5247
5248 for (i = 0, n = log_length; i < n; i++)
5249 {
5250 for (j = 0; j < length/2; j++)
5251 {
5252 vect1 = dr_chain[j];
5253 vect2 = dr_chain[j+length/2];
5254
5255 /* Create interleaving stmt:
5256 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1,
5257 ...}> */
5258 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
e9cf809e 5259 perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1,
5260 vect2, perm_mask_high);
a73182ff 5261 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
d53391a8 5262 (*result_chain)[2*j] = high;
5263
5264 /* Create interleaving stmt:
5265 low = VEC_PERM_EXPR <vect1, vect2,
5266 {nelt/2, nelt*3/2, nelt/2+1, nelt*3/2+1,
5267 ...}> */
5268 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
e9cf809e 5269 perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1,
5270 vect2, perm_mask_low);
a73182ff 5271 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
d53391a8 5272 (*result_chain)[2*j+1] = low;
5273 }
5274 memcpy (dr_chain.address (), result_chain->address (),
5275 length * sizeof (tree));
5276 }
fb85abff 5277 }
fb85abff 5278}
5279
5280/* Function vect_setup_realignment
48e1416a 5281
fb85abff 5282 This function is called when vectorizing an unaligned load using
5283 the dr_explicit_realign[_optimized] scheme.
5284 This function generates the following code at the loop prolog:
5285
5286 p = initial_addr;
5287 x msq_init = *(floor(p)); # prolog load
48e1416a 5288 realignment_token = call target_builtin;
fb85abff 5289 loop:
5290 x msq = phi (msq_init, ---)
5291
48e1416a 5292 The stmts marked with x are generated only for the case of
fb85abff 5293 dr_explicit_realign_optimized.
5294
48e1416a 5295 The code above sets up a new (vector) pointer, pointing to the first
ecc42a77 5296 location accessed by STMT_INFO, and a "floor-aligned" load using that
5297 pointer. It also generates code to compute the "realignment-token"
5298 (if the relevant target hook was defined), and creates a phi-node at the
5299 loop-header bb whose arguments are the result of the prolog-load (created
5300 by this function) and the result of a load that takes place in the loop
5301 (to be created by the caller to this function).
fb85abff 5302
5303 For the case of dr_explicit_realign_optimized:
48e1416a 5304 The caller to this function uses the phi-result (msq) to create the
fb85abff 5305 realignment code inside the loop, and sets up the missing phi argument,
5306 as follows:
48e1416a 5307 loop:
fb85abff 5308 msq = phi (msq_init, lsq)
5309 lsq = *(floor(p')); # load in loop
5310 result = realign_load (msq, lsq, realignment_token);
5311
5312 For the case of dr_explicit_realign:
5313 loop:
5314 msq = *(floor(p)); # load in loop
5315 p' = p + (VS-1);
5316 lsq = *(floor(p')); # load in loop
5317 result = realign_load (msq, lsq, realignment_token);
5318
5319 Input:
ecc42a77 5320 STMT_INFO - (scalar) load stmt to be vectorized. This load accesses
5321 a memory location that may be unaligned.
fb85abff 5322 BSI - place where new code is to be inserted.
5323 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
48e1416a 5324 is used.
5325
fb85abff 5326 Output:
5327 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
5328 target hook, if defined.
5329 Return value - the result of the loop-header phi node. */
5330
5331tree
ecc42a77 5332vect_setup_realignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
fb85abff 5333 tree *realignment_token,
5334 enum dr_alignment_support alignment_support_scheme,
5335 tree init_addr,
5336 struct loop **at_loop)
5337{
fb85abff 5338 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
5339 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
abc9513d 5340 dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
5341 struct data_reference *dr = dr_info->dr;
ad074595 5342 struct loop *loop = NULL;
5343 edge pe = NULL;
a73182ff 5344 tree scalar_dest = gimple_assign_lhs (stmt_info->stmt);
fb85abff 5345 tree vec_dest;
42acab1c 5346 gimple *inc;
fb85abff 5347 tree ptr;
5348 tree data_ref;
fb85abff 5349 basic_block new_bb;
5350 tree msq_init = NULL_TREE;
5351 tree new_temp;
1a91d914 5352 gphi *phi_stmt;
fb85abff 5353 tree msq = NULL_TREE;
5354 gimple_seq stmts = NULL;
fb85abff 5355 bool compute_in_loop = false;
ad074595 5356 bool nested_in_vect_loop = false;
a73182ff 5357 struct loop *containing_loop = (gimple_bb (stmt_info->stmt))->loop_father;
ad074595 5358 struct loop *loop_for_initial_load = NULL;
5359
5360 if (loop_vinfo)
5361 {
5362 loop = LOOP_VINFO_LOOP (loop_vinfo);
a73182ff 5363 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt_info);
ad074595 5364 }
fb85abff 5365
5366 gcc_assert (alignment_support_scheme == dr_explicit_realign
5367 || alignment_support_scheme == dr_explicit_realign_optimized);
5368
5369 /* We need to generate three things:
5370 1. the misalignment computation
5371 2. the extra vector load (for the optimized realignment scheme).
5372 3. the phi node for the two vectors from which the realignment is
282bf14c 5373 done (for the optimized realignment scheme). */
fb85abff 5374
5375 /* 1. Determine where to generate the misalignment computation.
5376
5377 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
5378 calculation will be generated by this function, outside the loop (in the
5379 preheader). Otherwise, INIT_ADDR had already been computed for us by the
5380 caller, inside the loop.
5381
5382 Background: If the misalignment remains fixed throughout the iterations of
5383 the loop, then both realignment schemes are applicable, and also the
5384 misalignment computation can be done outside LOOP. This is because we are
5385 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
5386 are a multiple of VS (the Vector Size), and therefore the misalignment in
5387 different vectorized LOOP iterations is always the same.
5388 The problem arises only if the memory access is in an inner-loop nested
5389 inside LOOP, which is now being vectorized using outer-loop vectorization.
5390 This is the only case when the misalignment of the memory access may not
5391 remain fixed throughout the iterations of the inner-loop (as explained in
5392 detail in vect_supportable_dr_alignment). In this case, not only is the
5393 optimized realignment scheme not applicable, but also the misalignment
5394 computation (and generation of the realignment token that is passed to
5395 REALIGN_LOAD) have to be done inside the loop.
5396
5397 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
5398 or not, which in turn determines if the misalignment is computed inside
5399 the inner-loop, or outside LOOP. */
5400
ad074595 5401 if (init_addr != NULL_TREE || !loop_vinfo)
fb85abff 5402 {
5403 compute_in_loop = true;
5404 gcc_assert (alignment_support_scheme == dr_explicit_realign);
5405 }
5406
5407
5408 /* 2. Determine where to generate the extra vector load.
5409
5410 For the optimized realignment scheme, instead of generating two vector
5411 loads in each iteration, we generate a single extra vector load in the
5412 preheader of the loop, and in each iteration reuse the result of the
5413 vector load from the previous iteration. In case the memory access is in
5414 an inner-loop nested inside LOOP, which is now being vectorized using
5415 outer-loop vectorization, we need to determine whether this initial vector
5416 load should be generated at the preheader of the inner-loop, or can be
5417 generated at the preheader of LOOP. If the memory access has no evolution
5418 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
5419 to be generated inside LOOP (in the preheader of the inner-loop). */
5420
5421 if (nested_in_vect_loop)
5422 {
5423 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
5424 bool invariant_in_outerloop =
5425 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
5426 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
5427 }
5428 else
5429 loop_for_initial_load = loop;
5430 if (at_loop)
5431 *at_loop = loop_for_initial_load;
5432
ad074595 5433 if (loop_for_initial_load)
5434 pe = loop_preheader_edge (loop_for_initial_load);
5435
fb85abff 5436 /* 3. For the case of the optimized realignment, create the first vector
5437 load at the loop preheader. */
5438
5439 if (alignment_support_scheme == dr_explicit_realign_optimized)
5440 {
5441 /* Create msq_init = *(floor(p1)) in the loop preheader */
1a91d914 5442 gassign *new_stmt;
fb85abff 5443
5444 gcc_assert (!compute_in_loop);
fb85abff 5445 vec_dest = vect_create_destination_var (scalar_dest, vectype);
a73182ff 5446 ptr = vect_create_data_ref_ptr (stmt_info, vectype,
5447 loop_for_initial_load, NULL_TREE,
3c8b7bc7 5448 &init_addr, NULL, &inc, true);
23bab442 5449 if (TREE_CODE (ptr) == SSA_NAME)
5450 new_temp = copy_ssa_name (ptr);
5451 else
5452 new_temp = make_ssa_name (TREE_TYPE (ptr));
e092c20e 5453 poly_uint64 align = DR_TARGET_ALIGNMENT (dr_info);
5454 tree type = TREE_TYPE (ptr);
e9cf809e 5455 new_stmt = gimple_build_assign
5456 (new_temp, BIT_AND_EXPR, ptr,
e092c20e 5457 fold_build2 (MINUS_EXPR, type,
5458 build_int_cst (type, 0),
5459 build_int_cst (type, align)));
86638c2e 5460 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5461 gcc_assert (!new_bb);
2cb9ef39 5462 data_ref
5463 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
5464 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
1c4c7e32 5465 vect_copy_ref_info (data_ref, DR_REF (dr));
fb85abff 5466 new_stmt = gimple_build_assign (vec_dest, data_ref);
5467 new_temp = make_ssa_name (vec_dest, new_stmt);
5468 gimple_assign_set_lhs (new_stmt, new_temp);
ad074595 5469 if (pe)
5470 {
5471 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5472 gcc_assert (!new_bb);
5473 }
5474 else
5475 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5476
fb85abff 5477 msq_init = gimple_assign_lhs (new_stmt);
5478 }
5479
5480 /* 4. Create realignment token using a target builtin, if available.
5481 It is done either inside the containing loop, or before LOOP (as
5482 determined above). */
5483
5484 if (targetm.vectorize.builtin_mask_for_load)
5485 {
1a91d914 5486 gcall *new_stmt;
fb85abff 5487 tree builtin_decl;
5488
5489 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
ad074595 5490 if (!init_addr)
fb85abff 5491 {
5492 /* Generate the INIT_ADDR computation outside LOOP. */
a73182ff 5493 init_addr = vect_create_addr_base_for_vector_ref (stmt_info, &stmts,
9e879814 5494 NULL_TREE);
ad074595 5495 if (loop)
5496 {
5497 pe = loop_preheader_edge (loop);
5498 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
5499 gcc_assert (!new_bb);
5500 }
5501 else
5502 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
fb85abff 5503 }
5504
5505 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
5506 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
5507 vec_dest =
5508 vect_create_destination_var (scalar_dest,
5509 gimple_call_return_type (new_stmt));
5510 new_temp = make_ssa_name (vec_dest, new_stmt);
5511 gimple_call_set_lhs (new_stmt, new_temp);
5512
5513 if (compute_in_loop)
5514 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5515 else
5516 {
5517 /* Generate the misalignment computation outside LOOP. */
5518 pe = loop_preheader_edge (loop);
5519 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
5520 gcc_assert (!new_bb);
5521 }
5522
5523 *realignment_token = gimple_call_lhs (new_stmt);
5524
5525 /* The result of the CALL_EXPR to this builtin is determined from
5526 the value of the parameter and no global variables are touched
5527 which makes the builtin a "const" function. Requiring the
5528 builtin to have the "const" attribute makes it unnecessary
5529 to call mark_call_clobbered. */
5530 gcc_assert (TREE_READONLY (builtin_decl));
5531 }
5532
5533 if (alignment_support_scheme == dr_explicit_realign)
5534 return msq;
5535
5536 gcc_assert (!compute_in_loop);
5537 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
5538
5539
5540 /* 5. Create msq = phi <msq_init, lsq> in loop */
5541
5542 pe = loop_preheader_edge (containing_loop);
5543 vec_dest = vect_create_destination_var (scalar_dest, vectype);
f9e245b2 5544 msq = make_ssa_name (vec_dest);
fb85abff 5545 phi_stmt = create_phi_node (msq, containing_loop->header);
60d535d2 5546 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
fb85abff 5547
5548 return msq;
5549}
5550
5551
ee612634 5552/* Function vect_grouped_load_supported.
fb85abff 5553
bc691ae4 5554 COUNT is the size of the load group (the number of statements plus the
5555 number of gaps). SINGLE_ELEMENT_P is true if there is actually
5556 only one statement, with a gap of COUNT - 1.
5557
5558 Returns true if a suitable permute exists. */
fb85abff 5559
5560bool
bc691ae4 5561vect_grouped_load_supported (tree vectype, bool single_element_p,
5562 unsigned HOST_WIDE_INT count)
fb85abff 5563{
3754d046 5564 machine_mode mode = TYPE_MODE (vectype);
fb85abff 5565
bc691ae4 5566 /* If this is single-element interleaving with an element distance
5567 that leaves unused vector loads around punt - we at least create
5568 very sub-optimal code in that case (and blow up memory,
5569 see PR65518). */
f08ee65f 5570 if (single_element_p && maybe_gt (count, TYPE_VECTOR_SUBPARTS (vectype)))
bc691ae4 5571 {
5572 if (dump_enabled_p ())
5573 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5574 "single-element interleaving not supported "
5575 "for not adjacent vector loads\n");
5576 return false;
5577 }
5578
1e1bca71 5579 /* vect_permute_load_chain requires the group size to be equal to 3 or
5580 be a power of two. */
5581 if (count != 3 && exact_log2 (count) == -1)
481fc474 5582 {
6d8fb6cf 5583 if (dump_enabled_p ())
7bd765d4 5584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1e1bca71 5585 "the size of the group of accesses"
5586 " is not a power of 2 or not equal to 3\n");
481fc474 5587 return false;
5588 }
5589
42f6a6e8 5590 /* Check that the permutation is supported. */
5591 if (VECTOR_MODE_P (mode))
5592 {
ba7efd65 5593 unsigned int i, j;
1e1bca71 5594 if (count == 3)
42f6a6e8 5595 {
ba7efd65 5596 unsigned int nelt;
5597 if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
5598 {
5599 if (dump_enabled_p ())
5600 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5601 "cannot handle groups of 3 loads for"
5602 " variable-length vectors\n");
5603 return false;
5604 }
5605
c3fa7fe9 5606 vec_perm_builder sel (nelt, nelt, 1);
5607 sel.quick_grow (nelt);
1957c019 5608 vec_perm_indices indices;
1e1bca71 5609 unsigned int k;
5610 for (k = 0; k < 3; k++)
5611 {
5612 for (i = 0; i < nelt; i++)
5613 if (3 * i + k < 2 * nelt)
5614 sel[i] = 3 * i + k;
5615 else
5616 sel[i] = 0;
1957c019 5617 indices.new_vector (sel, 2, nelt);
5618 if (!can_vec_perm_const_p (mode, indices))
1e1bca71 5619 {
5620 if (dump_enabled_p ())
5621 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5622 "shuffle of 3 loads is not supported by"
5623 " target\n");
5c6f6a61 5624 return false;
1e1bca71 5625 }
5626 for (i = 0, j = 0; i < nelt; i++)
5627 if (3 * i + k < 2 * nelt)
5628 sel[i] = i;
5629 else
5630 sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
1957c019 5631 indices.new_vector (sel, 2, nelt);
5632 if (!can_vec_perm_const_p (mode, indices))
1e1bca71 5633 {
5634 if (dump_enabled_p ())
5635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5636 "shuffle of 3 loads is not supported by"
5637 " target\n");
5638 return false;
5639 }
5640 }
5641 return true;
5642 }
5643 else
5644 {
5645 /* If length is not equal to 3 then only power of 2 is supported. */
ac29ece2 5646 gcc_assert (pow2p_hwi (count));
ba7efd65 5647 poly_uint64 nelt = GET_MODE_NUNITS (mode);
1957c019 5648
c3fa7fe9 5649 /* The encoding has a single stepped pattern. */
5650 vec_perm_builder sel (nelt, 1, 3);
5651 sel.quick_grow (3);
5652 for (i = 0; i < 3; i++)
1e1bca71 5653 sel[i] = i * 2;
1957c019 5654 vec_perm_indices indices (sel, 2, nelt);
5655 if (can_vec_perm_const_p (mode, indices))
1e1bca71 5656 {
c3fa7fe9 5657 for (i = 0; i < 3; i++)
1e1bca71 5658 sel[i] = i * 2 + 1;
1957c019 5659 indices.new_vector (sel, 2, nelt);
5660 if (can_vec_perm_const_p (mode, indices))
1e1bca71 5661 return true;
5662 }
5663 }
42f6a6e8 5664 }
fb85abff 5665
6d8fb6cf 5666 if (dump_enabled_p ())
7bd765d4 5667 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1e1bca71 5668 "extract even/odd not supported by target\n");
6620d7d7 5669 return false;
fb85abff 5670}
5671
2dd8e84c 5672/* Return TRUE if vec_{masked_}load_lanes is available for COUNT vectors of
5673 type VECTYPE. MASKED_P says whether the masked form is needed. */
94b7b4dd 5674
5675bool
2dd8e84c 5676vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count,
5677 bool masked_p)
94b7b4dd 5678{
2dd8e84c 5679 if (masked_p)
5680 return vect_lanes_optab_supported_p ("vec_mask_load_lanes",
5681 vec_mask_load_lanes_optab,
5682 vectype, count);
5683 else
5684 return vect_lanes_optab_supported_p ("vec_load_lanes",
5685 vec_load_lanes_optab,
5686 vectype, count);
94b7b4dd 5687}
fb85abff 5688
5689/* Function vect_permute_load_chain.
5690
5691 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
1e1bca71 5692 a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
5693 the input data correctly. Return the final references for loads in
5694 RESULT_CHAIN.
fb85abff 5695
5696 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
5697 The input is 4 vectors each containing 8 elements. We assign a number to each
5698 element, the input sequence is:
5699
5700 1st vec: 0 1 2 3 4 5 6 7
5701 2nd vec: 8 9 10 11 12 13 14 15
48e1416a 5702 3rd vec: 16 17 18 19 20 21 22 23
fb85abff 5703 4th vec: 24 25 26 27 28 29 30 31
5704
5705 The output sequence should be:
5706
5707 1st vec: 0 4 8 12 16 20 24 28
5708 2nd vec: 1 5 9 13 17 21 25 29
48e1416a 5709 3rd vec: 2 6 10 14 18 22 26 30
fb85abff 5710 4th vec: 3 7 11 15 19 23 27 31
5711
5712 i.e., the first output vector should contain the first elements of each
5713 interleaving group, etc.
5714
282bf14c 5715 We use extract_even/odd instructions to create such output. The input of
5716 each extract_even/odd operation is two vectors
48e1416a 5717 1st vec 2nd vec
5718 0 1 2 3 4 5 6 7
fb85abff 5719
282bf14c 5720 and the output is the vector of extracted even/odd elements. The output of
fb85abff 5721 extract_even will be: 0 2 4 6
5722 and of extract_odd: 1 3 5 7
5723
48e1416a 5724
282bf14c 5725 The permutation is done in log LENGTH stages. In each stage extract_even
5726 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
5727 their order. In our example,
fb85abff 5728
5729 E1: extract_even (1st vec, 2nd vec)
5730 E2: extract_odd (1st vec, 2nd vec)
5731 E3: extract_even (3rd vec, 4th vec)
5732 E4: extract_odd (3rd vec, 4th vec)
5733
5734 The output for the first stage will be:
5735
5736 E1: 0 2 4 6 8 10 12 14
5737 E2: 1 3 5 7 9 11 13 15
48e1416a 5738 E3: 16 18 20 22 24 26 28 30
fb85abff 5739 E4: 17 19 21 23 25 27 29 31
5740
5741 In order to proceed and create the correct sequence for the next stage (or
48e1416a 5742 for the correct output, if the second stage is the last one, as in our
5743 example), we first put the output of extract_even operation and then the
fb85abff 5744 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
5745 The input for the second stage is:
5746
5747 1st vec (E1): 0 2 4 6 8 10 12 14
48e1416a 5748 2nd vec (E3): 16 18 20 22 24 26 28 30
5749 3rd vec (E2): 1 3 5 7 9 11 13 15
fb85abff 5750 4th vec (E4): 17 19 21 23 25 27 29 31
5751
5752 The output of the second stage:
5753
5754 E1: 0 4 8 12 16 20 24 28
5755 E2: 2 6 10 14 18 22 26 30
5756 E3: 1 5 9 13 17 21 25 29
5757 E4: 3 7 11 15 19 23 27 31
5758
5759 And RESULT_CHAIN after reordering:
5760
5761 1st vec (E1): 0 4 8 12 16 20 24 28
5762 2nd vec (E3): 1 5 9 13 17 21 25 29
48e1416a 5763 3rd vec (E2): 2 6 10 14 18 22 26 30
fb85abff 5764 4th vec (E4): 3 7 11 15 19 23 27 31. */
5765
481fc474 5766static void
f1f41a6c 5767vect_permute_load_chain (vec<tree> dr_chain,
48e1416a 5768 unsigned int length,
ecc42a77 5769 stmt_vec_info stmt_info,
fb85abff 5770 gimple_stmt_iterator *gsi,
f1f41a6c 5771 vec<tree> *result_chain)
fb85abff 5772{
03d37e4e 5773 tree data_ref, first_vect, second_vect;
42f6a6e8 5774 tree perm_mask_even, perm_mask_odd;
1e1bca71 5775 tree perm3_mask_low, perm3_mask_high;
42acab1c 5776 gimple *perm_stmt;
1c2fef9a 5777 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
42f6a6e8 5778 unsigned int i, j, log_length = exact_log2 (length);
282dc861 5779
1648f21f 5780 result_chain->quick_grow (length);
5781 memcpy (result_chain->address (), dr_chain.address (),
5782 length * sizeof (tree));
42f6a6e8 5783
1e1bca71 5784 if (length == 3)
fb85abff 5785 {
8b221927 5786 /* vect_grouped_load_supported ensures that this is constant. */
f08ee65f 5787 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
1e1bca71 5788 unsigned int k;
fb85abff 5789
c3fa7fe9 5790 vec_perm_builder sel (nelt, nelt, 1);
5791 sel.quick_grow (nelt);
1957c019 5792 vec_perm_indices indices;
1e1bca71 5793 for (k = 0; k < 3; k++)
5794 {
5795 for (i = 0; i < nelt; i++)
5796 if (3 * i + k < 2 * nelt)
5797 sel[i] = 3 * i + k;
5798 else
5799 sel[i] = 0;
1957c019 5800 indices.new_vector (sel, 2, nelt);
5801 perm3_mask_low = vect_gen_perm_mask_checked (vectype, indices);
1e1bca71 5802
5803 for (i = 0, j = 0; i < nelt; i++)
5804 if (3 * i + k < 2 * nelt)
5805 sel[i] = i;
5806 else
5807 sel[i] = nelt + ((nelt + k) % 3) + 3 * (j++);
1957c019 5808 indices.new_vector (sel, 2, nelt);
5809 perm3_mask_high = vect_gen_perm_mask_checked (vectype, indices);
1e1bca71 5810
5811 first_vect = dr_chain[0];
5812 second_vect = dr_chain[1];
5813
5814 /* Create interleaving stmt (low part of):
5815 low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5816 ...}> */
321d85d9 5817 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_low");
e9cf809e 5818 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
5819 second_vect, perm3_mask_low);
a73182ff 5820 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
48e1416a 5821
1e1bca71 5822 /* Create interleaving stmt (high part of):
5823 high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5824 ...}> */
5825 first_vect = data_ref;
5826 second_vect = dr_chain[2];
321d85d9 5827 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3_high");
e9cf809e 5828 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, first_vect,
5829 second_vect, perm3_mask_high);
a73182ff 5830 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
1e1bca71 5831 (*result_chain)[k] = data_ref;
fb85abff 5832 }
fb85abff 5833 }
1e1bca71 5834 else
5835 {
5836 /* If length is not equal to 3 then only power of 2 is supported. */
ac29ece2 5837 gcc_assert (pow2p_hwi (length));
1e1bca71 5838
c3fa7fe9 5839 /* The encoding has a single stepped pattern. */
f08ee65f 5840 poly_uint64 nelt = TYPE_VECTOR_SUBPARTS (vectype);
c3fa7fe9 5841 vec_perm_builder sel (nelt, 1, 3);
5842 sel.quick_grow (3);
5843 for (i = 0; i < 3; ++i)
1e1bca71 5844 sel[i] = i * 2;
1957c019 5845 vec_perm_indices indices (sel, 2, nelt);
5846 perm_mask_even = vect_gen_perm_mask_checked (vectype, indices);
1e1bca71 5847
c3fa7fe9 5848 for (i = 0; i < 3; ++i)
1e1bca71 5849 sel[i] = i * 2 + 1;
1957c019 5850 indices.new_vector (sel, 2, nelt);
5851 perm_mask_odd = vect_gen_perm_mask_checked (vectype, indices);
fb85abff 5852
1e1bca71 5853 for (i = 0; i < log_length; i++)
5854 {
5855 for (j = 0; j < length; j += 2)
5856 {
5857 first_vect = dr_chain[j];
5858 second_vect = dr_chain[j+1];
5859
5860 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5861 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
e9cf809e 5862 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5863 first_vect, second_vect,
5864 perm_mask_even);
a73182ff 5865 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
1e1bca71 5866 (*result_chain)[j/2] = data_ref;
5867
5868 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5869 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
e9cf809e 5870 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
5871 first_vect, second_vect,
5872 perm_mask_odd);
a73182ff 5873 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
1e1bca71 5874 (*result_chain)[j/2+length/2] = data_ref;
5875 }
5876 memcpy (dr_chain.address (), result_chain->address (),
5877 length * sizeof (tree));
5878 }
5879 }
5880}
fb85abff 5881
926f7a02 5882/* Function vect_shift_permute_load_chain.
5883
5884 Given a chain of loads in DR_CHAIN of LENGTH 2 or 3, generate
5885 sequence of stmts to reorder the input data accordingly.
5886 Return the final references for loads in RESULT_CHAIN.
5887 Return true if successed, false otherwise.
5888
5889 E.g., LENGTH is 3 and the scalar type is short, i.e., VF is 8.
5890 The input is 3 vectors each containing 8 elements. We assign a
5891 number to each element, the input sequence is:
5892
5893 1st vec: 0 1 2 3 4 5 6 7
5894 2nd vec: 8 9 10 11 12 13 14 15
5895 3rd vec: 16 17 18 19 20 21 22 23
5896
5897 The output sequence should be:
5898
5899 1st vec: 0 3 6 9 12 15 18 21
5900 2nd vec: 1 4 7 10 13 16 19 22
5901 3rd vec: 2 5 8 11 14 17 20 23
5902
5903 We use 3 shuffle instructions and 3 * 3 - 1 shifts to create such output.
5904
5905 First we shuffle all 3 vectors to get correct elements order:
5906
5907 1st vec: ( 0 3 6) ( 1 4 7) ( 2 5)
5908 2nd vec: ( 8 11 14) ( 9 12 15) (10 13)
5909 3rd vec: (16 19 22) (17 20 23) (18 21)
5910
5911 Next we unite and shift vector 3 times:
5912
5913 1st step:
5914 shift right by 6 the concatenation of:
5915 "1st vec" and "2nd vec"
5916 ( 0 3 6) ( 1 4 7) |( 2 5) _ ( 8 11 14) ( 9 12 15)| (10 13)
5917 "2nd vec" and "3rd vec"
5918 ( 8 11 14) ( 9 12 15) |(10 13) _ (16 19 22) (17 20 23)| (18 21)
5919 "3rd vec" and "1st vec"
5920 (16 19 22) (17 20 23) |(18 21) _ ( 0 3 6) ( 1 4 7)| ( 2 5)
5921 | New vectors |
5922
5923 So that now new vectors are:
5924
5925 1st vec: ( 2 5) ( 8 11 14) ( 9 12 15)
5926 2nd vec: (10 13) (16 19 22) (17 20 23)
5927 3rd vec: (18 21) ( 0 3 6) ( 1 4 7)
5928
5929 2nd step:
5930 shift right by 5 the concatenation of:
5931 "1st vec" and "3rd vec"
5932 ( 2 5) ( 8 11 14) |( 9 12 15) _ (18 21) ( 0 3 6)| ( 1 4 7)
5933 "2nd vec" and "1st vec"
5934 (10 13) (16 19 22) |(17 20 23) _ ( 2 5) ( 8 11 14)| ( 9 12 15)
5935 "3rd vec" and "2nd vec"
5936 (18 21) ( 0 3 6) |( 1 4 7) _ (10 13) (16 19 22)| (17 20 23)
5937 | New vectors |
5938
5939 So that now new vectors are:
5940
5941 1st vec: ( 9 12 15) (18 21) ( 0 3 6)
5942 2nd vec: (17 20 23) ( 2 5) ( 8 11 14)
5943 3rd vec: ( 1 4 7) (10 13) (16 19 22) READY
5944
5945 3rd step:
5946 shift right by 5 the concatenation of:
5947 "1st vec" and "1st vec"
5948 ( 9 12 15) (18 21) |( 0 3 6) _ ( 9 12 15) (18 21)| ( 0 3 6)
5949 shift right by 3 the concatenation of:
5950 "2nd vec" and "2nd vec"
5951 (17 20 23) |( 2 5) ( 8 11 14) _ (17 20 23)| ( 2 5) ( 8 11 14)
5952 | New vectors |
5953
5954 So that now all vectors are READY:
5955 1st vec: ( 0 3 6) ( 9 12 15) (18 21)
5956 2nd vec: ( 2 5) ( 8 11 14) (17 20 23)
5957 3rd vec: ( 1 4 7) (10 13) (16 19 22)
5958
5959 This algorithm is faster than one in vect_permute_load_chain if:
5960 1. "shift of a concatination" is faster than general permutation.
5961 This is usually so.
5962 2. The TARGET machine can't execute vector instructions in parallel.
5963 This is because each step of the algorithm depends on previous.
5964 The algorithm in vect_permute_load_chain is much more parallel.
5965
5966 The algorithm is applicable only for LOAD CHAIN LENGTH less than VF.
5967*/
5968
5969static bool
5970vect_shift_permute_load_chain (vec<tree> dr_chain,
5971 unsigned int length,
ecc42a77 5972 stmt_vec_info stmt_info,
926f7a02 5973 gimple_stmt_iterator *gsi,
5974 vec<tree> *result_chain)
5975{
5976 tree vect[3], vect_shift[3], data_ref, first_vect, second_vect;
5977 tree perm2_mask1, perm2_mask2, perm3_mask;
5978 tree select_mask, shift1_mask, shift2_mask, shift3_mask, shift4_mask;
42acab1c 5979 gimple *perm_stmt;
926f7a02 5980
1c2fef9a 5981 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
926f7a02 5982 unsigned int i;
926f7a02 5983 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5984
f08ee65f 5985 unsigned HOST_WIDE_INT nelt, vf;
5986 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
5987 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
d75596cd 5988 /* Not supported for variable-length vectors. */
5989 return false;
5990
1957c019 5991 vec_perm_builder sel (nelt, nelt, 1);
282dc861 5992 sel.quick_grow (nelt);
5993
926f7a02 5994 result_chain->quick_grow (length);
5995 memcpy (result_chain->address (), dr_chain.address (),
5996 length * sizeof (tree));
5997
d75596cd 5998 if (pow2p_hwi (length) && vf > 4)
926f7a02 5999 {
2cc1223e 6000 unsigned int j, log_length = exact_log2 (length);
926f7a02 6001 for (i = 0; i < nelt / 2; ++i)
6002 sel[i] = i * 2;
6003 for (i = 0; i < nelt / 2; ++i)
6004 sel[nelt / 2 + i] = i * 2 + 1;
1957c019 6005 vec_perm_indices indices (sel, 2, nelt);
6006 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6007 {
6008 if (dump_enabled_p ())
6009 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6010 "shuffle of 2 fields structure is not \
6011 supported by target\n");
6012 return false;
6013 }
1957c019 6014 perm2_mask1 = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6015
6016 for (i = 0; i < nelt / 2; ++i)
6017 sel[i] = i * 2 + 1;
6018 for (i = 0; i < nelt / 2; ++i)
6019 sel[nelt / 2 + i] = i * 2;
1957c019 6020 indices.new_vector (sel, 2, nelt);
6021 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6022 {
6023 if (dump_enabled_p ())
6024 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6025 "shuffle of 2 fields structure is not \
6026 supported by target\n");
6027 return false;
6028 }
1957c019 6029 perm2_mask2 = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6030
6031 /* Generating permutation constant to shift all elements.
6032 For vector length 8 it is {4 5 6 7 8 9 10 11}. */
6033 for (i = 0; i < nelt; i++)
6034 sel[i] = nelt / 2 + i;
1957c019 6035 indices.new_vector (sel, 2, nelt);
6036 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6037 {
6038 if (dump_enabled_p ())
6039 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6040 "shift permutation is not supported by target\n");
6041 return false;
6042 }
1957c019 6043 shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6044
6045 /* Generating permutation constant to select vector from 2.
6046 For vector length 8 it is {0 1 2 3 12 13 14 15}. */
6047 for (i = 0; i < nelt / 2; i++)
6048 sel[i] = i;
6049 for (i = nelt / 2; i < nelt; i++)
6050 sel[i] = nelt + i;
1957c019 6051 indices.new_vector (sel, 2, nelt);
6052 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6053 {
6054 if (dump_enabled_p ())
6055 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6056 "select is not supported by target\n");
6057 return false;
6058 }
1957c019 6059 select_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6060
2cc1223e 6061 for (i = 0; i < log_length; i++)
6062 {
6063 for (j = 0; j < length; j += 2)
6064 {
6065 first_vect = dr_chain[j];
6066 second_vect = dr_chain[j + 1];
6067
6068 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
e9cf809e 6069 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6070 first_vect, first_vect,
6071 perm2_mask1);
a73182ff 6072 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
2cc1223e 6073 vect[0] = data_ref;
6074
6075 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle2");
e9cf809e 6076 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6077 second_vect, second_vect,
6078 perm2_mask2);
a73182ff 6079 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
2cc1223e 6080 vect[1] = data_ref;
926f7a02 6081
2cc1223e 6082 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift");
e9cf809e 6083 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6084 vect[0], vect[1], shift1_mask);
a73182ff 6085 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
2cc1223e 6086 (*result_chain)[j/2 + length/2] = data_ref;
6087
6088 data_ref = make_temp_ssa_name (vectype, NULL, "vect_select");
e9cf809e 6089 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6090 vect[0], vect[1], select_mask);
a73182ff 6091 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
2cc1223e 6092 (*result_chain)[j/2] = data_ref;
6093 }
6094 memcpy (dr_chain.address (), result_chain->address (),
6095 length * sizeof (tree));
6096 }
926f7a02 6097 return true;
6098 }
d75596cd 6099 if (length == 3 && vf > 2)
926f7a02 6100 {
6101 unsigned int k = 0, l = 0;
6102
6103 /* Generating permutation constant to get all elements in rigth order.
6104 For vector length 8 it is {0 3 6 1 4 7 2 5}. */
6105 for (i = 0; i < nelt; i++)
6106 {
6107 if (3 * k + (l % 3) >= nelt)
6108 {
6109 k = 0;
6110 l += (3 - (nelt % 3));
6111 }
6112 sel[i] = 3 * k + (l % 3);
6113 k++;
6114 }
1957c019 6115 vec_perm_indices indices (sel, 2, nelt);
6116 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6117 {
6118 if (dump_enabled_p ())
6119 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6120 "shuffle of 3 fields structure is not \
6121 supported by target\n");
6122 return false;
6123 }
1957c019 6124 perm3_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6125
6126 /* Generating permutation constant to shift all elements.
6127 For vector length 8 it is {6 7 8 9 10 11 12 13}. */
6128 for (i = 0; i < nelt; i++)
6129 sel[i] = 2 * (nelt / 3) + (nelt % 3) + i;
1957c019 6130 indices.new_vector (sel, 2, nelt);
6131 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6132 {
6133 if (dump_enabled_p ())
6134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6135 "shift permutation is not supported by target\n");
6136 return false;
6137 }
1957c019 6138 shift1_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6139
6140 /* Generating permutation constant to shift all elements.
6141 For vector length 8 it is {5 6 7 8 9 10 11 12}. */
6142 for (i = 0; i < nelt; i++)
6143 sel[i] = 2 * (nelt / 3) + 1 + i;
1957c019 6144 indices.new_vector (sel, 2, nelt);
6145 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6146 {
6147 if (dump_enabled_p ())
6148 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6149 "shift permutation is not supported by target\n");
6150 return false;
6151 }
1957c019 6152 shift2_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6153
6154 /* Generating permutation constant to shift all elements.
6155 For vector length 8 it is {3 4 5 6 7 8 9 10}. */
6156 for (i = 0; i < nelt; i++)
6157 sel[i] = (nelt / 3) + (nelt % 3) / 2 + i;
1957c019 6158 indices.new_vector (sel, 2, nelt);
6159 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6160 {
6161 if (dump_enabled_p ())
6162 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6163 "shift permutation is not supported by target\n");
6164 return false;
6165 }
1957c019 6166 shift3_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6167
6168 /* Generating permutation constant to shift all elements.
6169 For vector length 8 it is {5 6 7 8 9 10 11 12}. */
6170 for (i = 0; i < nelt; i++)
6171 sel[i] = 2 * (nelt / 3) + (nelt % 3) / 2 + i;
1957c019 6172 indices.new_vector (sel, 2, nelt);
6173 if (!can_vec_perm_const_p (TYPE_MODE (vectype), indices))
926f7a02 6174 {
6175 if (dump_enabled_p ())
6176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6177 "shift permutation is not supported by target\n");
6178 return false;
6179 }
1957c019 6180 shift4_mask = vect_gen_perm_mask_checked (vectype, indices);
926f7a02 6181
6182 for (k = 0; k < 3; k++)
6183 {
321d85d9 6184 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shuffle3");
e9cf809e 6185 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6186 dr_chain[k], dr_chain[k],
6187 perm3_mask);
a73182ff 6188 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6189 vect[k] = data_ref;
6190 }
6191
6192 for (k = 0; k < 3; k++)
6193 {
6194 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift1");
e9cf809e 6195 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6196 vect[k % 3], vect[(k + 1) % 3],
6197 shift1_mask);
a73182ff 6198 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6199 vect_shift[k] = data_ref;
6200 }
6201
6202 for (k = 0; k < 3; k++)
6203 {
6204 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift2");
e9cf809e 6205 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR,
6206 vect_shift[(4 - k) % 3],
6207 vect_shift[(3 - k) % 3],
6208 shift2_mask);
a73182ff 6209 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6210 vect[k] = data_ref;
6211 }
6212
6213 (*result_chain)[3 - (nelt % 3)] = vect[2];
6214
6215 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift3");
e9cf809e 6216 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[0],
6217 vect[0], shift3_mask);
a73182ff 6218 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6219 (*result_chain)[nelt % 3] = data_ref;
6220
6221 data_ref = make_temp_ssa_name (vectype, NULL, "vect_shift4");
e9cf809e 6222 perm_stmt = gimple_build_assign (data_ref, VEC_PERM_EXPR, vect[1],
6223 vect[1], shift4_mask);
a73182ff 6224 vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
926f7a02 6225 (*result_chain)[0] = data_ref;
6226 return true;
6227 }
6228 return false;
6229}
6230
ee612634 6231/* Function vect_transform_grouped_load.
fb85abff 6232
6233 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
6234 to perform their permutation and ascribe the result vectorized statements to
6235 the scalar statements.
6236*/
6237
481fc474 6238void
ecc42a77 6239vect_transform_grouped_load (stmt_vec_info stmt_info, vec<tree> dr_chain,
6240 int size, gimple_stmt_iterator *gsi)
fb85abff 6241{
3754d046 6242 machine_mode mode;
1e094109 6243 vec<tree> result_chain = vNULL;
fb85abff 6244
48e1416a 6245 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
6246 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
fb85abff 6247 vectors, that are ready for vector computation. */
f1f41a6c 6248 result_chain.create (size);
926f7a02 6249
6250 /* If reassociation width for vector type is 2 or greater target machine can
6251 execute 2 or more vector instructions in parallel. Otherwise try to
6252 get chain for loads group using vect_shift_permute_load_chain. */
1c2fef9a 6253 mode = TYPE_MODE (STMT_VINFO_VECTYPE (stmt_info));
926f7a02 6254 if (targetm.sched.reassociation_width (VEC_PERM_EXPR, mode) > 1
ac29ece2 6255 || pow2p_hwi (size)
a73182ff 6256 || !vect_shift_permute_load_chain (dr_chain, size, stmt_info,
926f7a02 6257 gsi, &result_chain))
a73182ff 6258 vect_permute_load_chain (dr_chain, size, stmt_info, gsi, &result_chain);
6259 vect_record_grouped_load_vectors (stmt_info, result_chain);
f1f41a6c 6260 result_chain.release ();
94b7b4dd 6261}
6262
ee612634 6263/* RESULT_CHAIN contains the output of a group of grouped loads that were
ecc42a77 6264 generated as part of the vectorization of STMT_INFO. Assign the statement
94b7b4dd 6265 for each vector to the associated scalar statement. */
6266
6267void
ecc42a77 6268vect_record_grouped_load_vectors (stmt_vec_info stmt_info,
6269 vec<tree> result_chain)
94b7b4dd 6270{
aebdbd31 6271 vec_info *vinfo = stmt_info->vinfo;
cd24aa3c 6272 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
94b7b4dd 6273 unsigned int i, gap_count;
6274 tree tmp_data_ref;
fb85abff 6275
48e1416a 6276 /* Put a permuted data-ref in the VECTORIZED_STMT field.
6277 Since we scan the chain starting from it's first node, their order
fb85abff 6278 corresponds the order of data-refs in RESULT_CHAIN. */
cd24aa3c 6279 stmt_vec_info next_stmt_info = first_stmt_info;
fb85abff 6280 gap_count = 1;
f1f41a6c 6281 FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
fb85abff 6282 {
cd24aa3c 6283 if (!next_stmt_info)
fb85abff 6284 break;
6285
282bf14c 6286 /* Skip the gaps. Loads created for the gaps will be removed by dead
6287 code elimination pass later. No need to check for the first stmt in
fb85abff 6288 the group, since it always exists.
e1009321 6289 DR_GROUP_GAP is the number of steps in elements from the previous
6290 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
282bf14c 6291 correspond to the gaps. */
cd24aa3c 6292 if (next_stmt_info != first_stmt_info
6293 && gap_count < DR_GROUP_GAP (next_stmt_info))
fb85abff 6294 {
6295 gap_count++;
6296 continue;
6297 }
6298
cd24aa3c 6299 while (next_stmt_info)
fb85abff 6300 {
aebdbd31 6301 stmt_vec_info new_stmt_info = vinfo->lookup_def (tmp_data_ref);
fb85abff 6302 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
6303 copies, and we put the new vector statement in the first available
6304 RELATED_STMT. */
cd24aa3c 6305 if (!STMT_VINFO_VEC_STMT (next_stmt_info))
6306 STMT_VINFO_VEC_STMT (next_stmt_info) = new_stmt_info;
fb85abff 6307 else
6308 {
cd24aa3c 6309 if (!DR_GROUP_SAME_DR_STMT (next_stmt_info))
fb85abff 6310 {
435515db 6311 stmt_vec_info prev_stmt_info
cd24aa3c 6312 = STMT_VINFO_VEC_STMT (next_stmt_info);
aebdbd31 6313 stmt_vec_info rel_stmt_info
435515db 6314 = STMT_VINFO_RELATED_STMT (prev_stmt_info);
aebdbd31 6315 while (rel_stmt_info)
fb85abff 6316 {
435515db 6317 prev_stmt_info = rel_stmt_info;
aebdbd31 6318 rel_stmt_info = STMT_VINFO_RELATED_STMT (rel_stmt_info);
fb85abff 6319 }
6320
435515db 6321 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
fb85abff 6322 }
6323 }
6324
cd24aa3c 6325 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
fb85abff 6326 gap_count = 1;
cd24aa3c 6327 /* If NEXT_STMT_INFO accesses the same DR as the previous statement,
fb85abff 6328 put the same TMP_DATA_REF as its vectorized statement; otherwise
6329 get the next data-ref from RESULT_CHAIN. */
cd24aa3c 6330 if (!next_stmt_info || !DR_GROUP_SAME_DR_STMT (next_stmt_info))
fb85abff 6331 break;
6332 }
6333 }
fb85abff 6334}
6335
6336/* Function vect_force_dr_alignment_p.
6337
6338 Returns whether the alignment of a DECL can be forced to be aligned
6339 on ALIGNMENT bit boundary. */
6340
48e1416a 6341bool
e092c20e 6342vect_can_force_dr_alignment_p (const_tree decl, poly_uint64 alignment)
fb85abff 6343{
53e9c5c4 6344 if (!VAR_P (decl))
fb85abff 6345 return false;
6346
331d5983 6347 if (decl_in_symtab_p (decl)
6348 && !symtab_node::get (decl)->can_increase_alignment_p ())
8cab13cf 6349 return false;
6350
fb85abff 6351 if (TREE_STATIC (decl))
c34f18f1 6352 return (known_le (alignment,
6353 (unsigned HOST_WIDE_INT) MAX_OFILE_ALIGNMENT));
fb85abff 6354 else
e092c20e 6355 return (known_le (alignment, (unsigned HOST_WIDE_INT) MAX_STACK_ALIGNMENT));
fb85abff 6356}
6357
fb85abff 6358
abc9513d 6359/* Return whether the data reference DR_INFO is supported with respect to its
0822b158 6360 alignment.
6361 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
6362 it is aligned, i.e., check if it is possible to vectorize it with different
fb85abff 6363 alignment. */
6364
6365enum dr_alignment_support
abc9513d 6366vect_supportable_dr_alignment (dr_vec_info *dr_info,
0822b158 6367 bool check_aligned_accesses)
fb85abff 6368{
abc9513d 6369 data_reference *dr = dr_info->dr;
6370 stmt_vec_info stmt_info = dr_info->stmt;
fb85abff 6371 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3754d046 6372 machine_mode mode = TYPE_MODE (vectype);
37545e54 6373 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6374 struct loop *vect_loop = NULL;
6375 bool nested_in_vect_loop = false;
fb85abff 6376
abc9513d 6377 if (aligned_access_p (dr_info) && !check_aligned_accesses)
fb85abff 6378 return dr_aligned;
6379
c71d3c24 6380 /* For now assume all conditional loads/stores support unaligned
6381 access without any special code. */
0219dc42 6382 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
6383 if (gimple_call_internal_p (stmt)
6384 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD
6385 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE))
6386 return dr_unaligned_supported;
c71d3c24 6387
ad074595 6388 if (loop_vinfo)
6389 {
6390 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
0219dc42 6391 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt_info);
ad074595 6392 }
37545e54 6393
fb85abff 6394 /* Possibly unaligned access. */
6395
6396 /* We can choose between using the implicit realignment scheme (generating
6397 a misaligned_move stmt) and the explicit realignment scheme (generating
282bf14c 6398 aligned loads with a REALIGN_LOAD). There are two variants to the
6399 explicit realignment scheme: optimized, and unoptimized.
fb85abff 6400 We can optimize the realignment only if the step between consecutive
6401 vector loads is equal to the vector size. Since the vector memory
6402 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
6403 is guaranteed that the misalignment amount remains the same throughout the
6404 execution of the vectorized loop. Therefore, we can create the
6405 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
6406 at the loop preheader.
6407
6408 However, in the case of outer-loop vectorization, when vectorizing a
6409 memory access in the inner-loop nested within the LOOP that is now being
6410 vectorized, while it is guaranteed that the misalignment of the
6411 vectorized memory access will remain the same in different outer-loop
6412 iterations, it is *not* guaranteed that is will remain the same throughout
6413 the execution of the inner-loop. This is because the inner-loop advances
6414 with the original scalar step (and not in steps of VS). If the inner-loop
6415 step happens to be a multiple of VS, then the misalignment remains fixed
6416 and we can use the optimized realignment scheme. For example:
6417
6418 for (i=0; i<N; i++)
6419 for (j=0; j<M; j++)
6420 s += a[i+j];
6421
6422 When vectorizing the i-loop in the above example, the step between
6423 consecutive vector loads is 1, and so the misalignment does not remain
6424 fixed across the execution of the inner-loop, and the realignment cannot
6425 be optimized (as illustrated in the following pseudo vectorized loop):
6426
6427 for (i=0; i<N; i+=4)
6428 for (j=0; j<M; j++){
6429 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
6430 // when j is {0,1,2,3,4,5,6,7,...} respectively.
6431 // (assuming that we start from an aligned address).
6432 }
6433
6434 We therefore have to use the unoptimized realignment scheme:
6435
6436 for (i=0; i<N; i+=4)
6437 for (j=k; j<M; j+=4)
6438 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
6439 // that the misalignment of the initial address is
6440 // 0).
6441
6442 The loop can then be vectorized as follows:
6443
6444 for (k=0; k<4; k++){
6445 rt = get_realignment_token (&vp[k]);
6446 for (i=0; i<N; i+=4){
6447 v1 = vp[i+k];
6448 for (j=k; j<M; j+=4){
6449 v2 = vp[i+j+VS-1];
6450 va = REALIGN_LOAD <v1,v2,rt>;
6451 vs += va;
6452 v1 = v2;
6453 }
6454 }
6455 } */
6456
6457 if (DR_IS_READ (dr))
6458 {
c6b19c5f 6459 bool is_packed = false;
6460 tree type = (TREE_TYPE (DR_REF (dr)));
6461
d6bf3b14 6462 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
fb85abff 6463 && (!targetm.vectorize.builtin_mask_for_load
6464 || targetm.vectorize.builtin_mask_for_load ()))
6465 {
6466 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ca1a4077 6467
6468 /* If we are doing SLP then the accesses need not have the
6469 same alignment, instead it depends on the SLP group size. */
6470 if (loop_vinfo
6471 && STMT_SLP_TYPE (stmt_info)
d75596cd 6472 && !multiple_p (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
cd24aa3c 6473 * (DR_GROUP_SIZE
6474 (DR_GROUP_FIRST_ELEMENT (stmt_info))),
d75596cd 6475 TYPE_VECTOR_SUBPARTS (vectype)))
ca1a4077 6476 ;
6477 else if (!loop_vinfo
6478 || (nested_in_vect_loop
52acb7ae 6479 && maybe_ne (TREE_INT_CST_LOW (DR_STEP (dr)),
6480 GET_MODE_SIZE (TYPE_MODE (vectype)))))
fb85abff 6481 return dr_explicit_realign;
6482 else
6483 return dr_explicit_realign_optimized;
6484 }
abc9513d 6485 if (!known_alignment_for_access_p (dr_info))
cfa724cf 6486 is_packed = not_size_aligned (DR_REF (dr));
48e1416a 6487
33a82fb9 6488 if (targetm.vectorize.support_vector_misalignment
abc9513d 6489 (mode, type, DR_MISALIGNMENT (dr_info), is_packed))
fb85abff 6490 /* Can't software pipeline the loads, but can at least do them. */
6491 return dr_unaligned_supported;
6492 }
c6b19c5f 6493 else
6494 {
6495 bool is_packed = false;
6496 tree type = (TREE_TYPE (DR_REF (dr)));
fb85abff 6497
abc9513d 6498 if (!known_alignment_for_access_p (dr_info))
cfa724cf 6499 is_packed = not_size_aligned (DR_REF (dr));
48e1416a 6500
33a82fb9 6501 if (targetm.vectorize.support_vector_misalignment
abc9513d 6502 (mode, type, DR_MISALIGNMENT (dr_info), is_packed))
c6b19c5f 6503 return dr_unaligned_supported;
6504 }
48e1416a 6505
fb85abff 6506 /* Unsupported. */
6507 return dr_unaligned_unsupported;
6508}