]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-data-refs.c
gimple-walk.h: New File.
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
CommitLineData
b8698a0f 1/* Data References Analysis and Manipulation Utilities for Vectorization.
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
237e9c04 29#include "tm_p.h"
ebfd146a
IR
30#include "target.h"
31#include "basic-block.h"
cf835838 32#include "gimple-pretty-print.h"
45b0be94 33#include "gimplify.h"
5be5c238 34#include "gimple-iterator.h"
442b4905
AM
35#include "gimple-ssa.h"
36#include "tree-phinodes.h"
37#include "ssa-iterators.h"
38#include "tree-ssanames.h"
e28030cf
AM
39#include "tree-ssa-loop-ivopts.h"
40#include "tree-ssa-loop-manip.h"
442b4905 41#include "tree-ssa-loop.h"
7ee2468b 42#include "dumpfile.h"
ebfd146a 43#include "cfgloop.h"
ebfd146a
IR
44#include "tree-chrec.h"
45#include "tree-scalar-evolution.h"
46#include "tree-vectorizer.h"
718f9c0f 47#include "diagnostic-core.h"
2eb79bbb
SB
48/* Need to include rtl.h, expr.h, etc. for optabs. */
49#include "expr.h"
50#include "optabs.h"
ebfd146a 51
272c6793
RS
52/* Return true if load- or store-lanes optab OPTAB is implemented for
53 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
54
55static bool
56vect_lanes_optab_supported_p (const char *name, convert_optab optab,
57 tree vectype, unsigned HOST_WIDE_INT count)
58{
59 enum machine_mode mode, array_mode;
60 bool limit_p;
61
62 mode = TYPE_MODE (vectype);
63 limit_p = !targetm.array_mode_supported_p (mode, count);
64 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
65 MODE_INT, limit_p);
66
67 if (array_mode == BLKmode)
68 {
73fbfcad 69 if (dump_enabled_p ())
e645e942
TJ
70 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
71 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]\n",
78c60e3d 72 GET_MODE_NAME (mode), count);
272c6793
RS
73 return false;
74 }
75
76 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
77 {
73fbfcad 78 if (dump_enabled_p ())
78c60e3d 79 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 80 "cannot use %s<%s><%s>\n", name,
78c60e3d 81 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
272c6793
RS
82 return false;
83 }
84
73fbfcad 85 if (dump_enabled_p ())
78c60e3d 86 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 87 "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
78c60e3d 88 GET_MODE_NAME (mode));
272c6793
RS
89
90 return true;
91}
92
93
ebfd146a 94/* Return the smallest scalar part of STMT.
ff802fa1
IR
95 This is used to determine the vectype of the stmt. We generally set the
96 vectype according to the type of the result (lhs). For stmts whose
ebfd146a 97 result-type is different than the type of the arguments (e.g., demotion,
b8698a0f 98 promotion), vectype will be reset appropriately (later). Note that we have
ebfd146a 99 to visit the smallest datatype in this function, because that determines the
ff802fa1 100 VF. If the smallest datatype in the loop is present only as the rhs of a
ebfd146a
IR
101 promotion operation - we'd miss it.
102 Such a case, where a variable of this datatype does not appear in the lhs
103 anywhere in the loop, can only occur if it's an invariant: e.g.:
b8698a0f 104 'int_x = (int) short_inv', which we'd expect to have been optimized away by
ff802fa1
IR
105 invariant motion. However, we cannot rely on invariant motion to always
106 take invariants out of the loop, and so in the case of promotion we also
107 have to check the rhs.
ebfd146a
IR
108 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
109 types. */
110
111tree
112vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
113 HOST_WIDE_INT *rhs_size_unit)
114{
115 tree scalar_type = gimple_expr_type (stmt);
116 HOST_WIDE_INT lhs, rhs;
117
118 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
119
120 if (is_gimple_assign (stmt)
121 && (gimple_assign_cast_p (stmt)
122 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
39f3fed6 123 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
ebfd146a
IR
124 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
125 {
126 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
127
128 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
129 if (rhs < lhs)
130 scalar_type = rhs_type;
131 }
b8698a0f
L
132
133 *lhs_size_unit = lhs;
ebfd146a
IR
134 *rhs_size_unit = rhs;
135 return scalar_type;
136}
137
138
ebfd146a
IR
139/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
140 tested at run-time. Return TRUE if DDR was successfully inserted.
141 Return false if versioning is not supported. */
142
143static bool
144vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
145{
146 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
147
148 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
149 return false;
150
73fbfcad 151 if (dump_enabled_p ())
ebfd146a 152 {
78c60e3d
SS
153 dump_printf_loc (MSG_NOTE, vect_location,
154 "mark for run-time aliasing test between ");
155 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
156 dump_printf (MSG_NOTE, " and ");
157 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
e645e942 158 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
159 }
160
161 if (optimize_loop_nest_for_size_p (loop))
162 {
73fbfcad 163 if (dump_enabled_p ())
e645e942
TJ
164 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
165 "versioning not supported when optimizing"
166 " for size.\n");
ebfd146a
IR
167 return false;
168 }
169
170 /* FORNOW: We don't support versioning with outer-loop vectorization. */
171 if (loop->inner)
172 {
73fbfcad 173 if (dump_enabled_p ())
e645e942
TJ
174 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
175 "versioning not yet supported for outer-loops.\n");
ebfd146a
IR
176 return false;
177 }
178
319e6439
RG
179 /* FORNOW: We don't support creating runtime alias tests for non-constant
180 step. */
181 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
182 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
183 {
73fbfcad 184 if (dump_enabled_p ())
e645e942 185 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 186 "versioning not yet supported for non-constant "
e645e942 187 "step\n");
319e6439
RG
188 return false;
189 }
190
9771b263 191 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
ebfd146a
IR
192 return true;
193}
194
a70d6342 195
ebfd146a
IR
196/* Function vect_analyze_data_ref_dependence.
197
198 Return TRUE if there (might) exist a dependence between a memory-reference
199 DRA and a memory-reference DRB. When versioning for alias may check a
777e1f09
RG
200 dependence at run-time, return FALSE. Adjust *MAX_VF according to
201 the data dependence. */
b8698a0f 202
ebfd146a
IR
203static bool
204vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
5bfdb7d8 205 loop_vec_info loop_vinfo, int *max_vf)
ebfd146a
IR
206{
207 unsigned int i;
5abe1e05 208 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
209 struct data_reference *dra = DDR_A (ddr);
210 struct data_reference *drb = DDR_B (ddr);
b8698a0f 211 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
ebfd146a 212 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
ebfd146a
IR
213 lambda_vector dist_v;
214 unsigned int loop_depth;
b8698a0f 215
5abe1e05 216 /* In loop analysis all data references should be vectorizable. */
4b5caab7
IR
217 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
218 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
5abe1e05 219 gcc_unreachable ();
4b5caab7 220
5abe1e05 221 /* Independent data accesses. */
ebfd146a 222 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5abe1e05 223 return false;
a70d6342 224
5abe1e05
RB
225 if (dra == drb
226 || (DR_IS_READ (dra) && DR_IS_READ (drb)))
ebfd146a 227 return false;
b8698a0f 228
5abe1e05 229 /* Unknown data dependence. */
ebfd146a
IR
230 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
231 {
74bf76ed
JJ
232 /* If user asserted safelen consecutive iterations can be
233 executed concurrently, assume independence. */
234 if (loop->safelen >= 2)
235 {
236 if (loop->safelen < *max_vf)
237 *max_vf = loop->safelen;
238 return false;
239 }
240
90eb75f2
RB
241 if (STMT_VINFO_GATHER_P (stmtinfo_a)
242 || STMT_VINFO_GATHER_P (stmtinfo_b))
243 {
244 if (dump_enabled_p ())
245 {
246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
247 "versioning for alias not supported for: "
248 "can't determine dependence between ");
249 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
250 DR_REF (dra));
251 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
252 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
253 DR_REF (drb));
e645e942 254 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
90eb75f2 255 }
fdf6a7b9 256 return true;
90eb75f2
RB
257 }
258
73fbfcad 259 if (dump_enabled_p ())
5abe1e05
RB
260 {
261 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
262 "versioning for alias required: "
263 "can't determine dependence between ");
264 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
265 DR_REF (dra));
266 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
267 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
268 DR_REF (drb));
e645e942 269 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5abe1e05 270 }
e4a707c4 271
5abe1e05
RB
272 /* Add to list of ddrs that need to be tested at run-time. */
273 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
a70d6342
IR
274 }
275
5abe1e05 276 /* Known data dependence. */
ebfd146a
IR
277 if (DDR_NUM_DIST_VECTS (ddr) == 0)
278 {
74bf76ed
JJ
279 /* If user asserted safelen consecutive iterations can be
280 executed concurrently, assume independence. */
281 if (loop->safelen >= 2)
282 {
283 if (loop->safelen < *max_vf)
284 *max_vf = loop->safelen;
285 return false;
286 }
287
90eb75f2
RB
288 if (STMT_VINFO_GATHER_P (stmtinfo_a)
289 || STMT_VINFO_GATHER_P (stmtinfo_b))
290 {
291 if (dump_enabled_p ())
292 {
293 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
294 "versioning for alias not supported for: "
295 "bad dist vector for ");
296 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
297 DR_REF (dra));
298 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
299 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
300 DR_REF (drb));
e645e942 301 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
90eb75f2 302 }
fdf6a7b9 303 return true;
90eb75f2
RB
304 }
305
73fbfcad 306 if (dump_enabled_p ())
ebfd146a 307 {
e645e942 308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
309 "versioning for alias required: "
310 "bad dist vector for ");
311 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
312 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
313 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
e645e942 314 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
315 }
316 /* Add to list of ddrs that need to be tested at run-time. */
317 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
b8698a0f 318 }
ebfd146a
IR
319
320 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
9771b263 321 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
ebfd146a
IR
322 {
323 int dist = dist_v[loop_depth];
324
73fbfcad 325 if (dump_enabled_p ())
78c60e3d 326 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 327 "dependence distance = %d.\n", dist);
ebfd146a 328
777e1f09 329 if (dist == 0)
ebfd146a 330 {
73fbfcad 331 if (dump_enabled_p ())
ebfd146a 332 {
e645e942
TJ
333 dump_printf_loc (MSG_NOTE, vect_location,
334 "dependence distance == 0 between ");
78c60e3d
SS
335 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
336 dump_printf (MSG_NOTE, " and ");
337 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 338 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
339 }
340
5185d248
RB
341 /* When we perform grouped accesses and perform implicit CSE
342 by detecting equal accesses and doing disambiguation with
343 runtime alias tests like for
344 .. = a[i];
345 .. = a[i+1];
346 a[i] = ..;
347 a[i+1] = ..;
348 *p = ..;
349 .. = a[i];
350 .. = a[i+1];
351 where we will end up loading { a[i], a[i+1] } once, make
352 sure that inserting group loads before the first load and
353 stores after the last store will do the right thing. */
354 if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
355 && GROUP_SAME_DR_STMT (stmtinfo_a))
356 || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)
357 && GROUP_SAME_DR_STMT (stmtinfo_b)))
358 {
359 gimple earlier_stmt;
360 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
361 if (DR_IS_WRITE
362 (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
363 {
364 if (dump_enabled_p ())
365 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
366 "READ_WRITE dependence in interleaving."
367 "\n");
5185d248
RB
368 return true;
369 }
ebfd146a 370 }
b8698a0f 371
777e1f09
RG
372 continue;
373 }
374
375 if (dist > 0 && DDR_REVERSED_P (ddr))
376 {
377 /* If DDR_REVERSED_P the order of the data-refs in DDR was
378 reversed (to make distance vector positive), and the actual
379 distance is negative. */
73fbfcad 380 if (dump_enabled_p ())
78c60e3d 381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 382 "dependence distance negative.\n");
777e1f09
RG
383 continue;
384 }
385
386 if (abs (dist) >= 2
387 && abs (dist) < *max_vf)
388 {
389 /* The dependence distance requires reduction of the maximal
390 vectorization factor. */
391 *max_vf = abs (dist);
73fbfcad 392 if (dump_enabled_p ())
78c60e3d 393 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
394 "adjusting maximal vectorization factor to %i\n",
395 *max_vf);
ebfd146a
IR
396 }
397
777e1f09 398 if (abs (dist) >= *max_vf)
ebfd146a 399 {
b8698a0f 400 /* Dependence distance does not create dependence, as far as
777e1f09 401 vectorization is concerned, in this case. */
73fbfcad 402 if (dump_enabled_p ())
78c60e3d 403 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 404 "dependence distance >= VF.\n");
ebfd146a
IR
405 continue;
406 }
407
73fbfcad 408 if (dump_enabled_p ())
ebfd146a 409 {
78c60e3d 410 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
411 "not vectorized, possible dependence "
412 "between data-refs ");
78c60e3d
SS
413 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
414 dump_printf (MSG_NOTE, " and ");
415 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 416 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
417 }
418
419 return true;
420 }
421
422 return false;
423}
424
425/* Function vect_analyze_data_ref_dependences.
b8698a0f 426
ebfd146a 427 Examine all the data references in the loop, and make sure there do not
777e1f09
RG
428 exist any data dependences between them. Set *MAX_VF according to
429 the maximum vectorization factor the data dependences allow. */
b8698a0f 430
ebfd146a 431bool
5abe1e05 432vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
ebfd146a
IR
433{
434 unsigned int i;
ebfd146a
IR
435 struct data_dependence_relation *ddr;
436
73fbfcad 437 if (dump_enabled_p ())
78c60e3d 438 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 439 "=== vect_analyze_data_ref_dependences ===\n");
5abe1e05
RB
440
441 if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
442 &LOOP_VINFO_DDRS (loop_vinfo),
443 LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
444 return false;
445
446 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
447 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
448 return false;
449
450 return true;
451}
452
453
454/* Function vect_slp_analyze_data_ref_dependence.
455
456 Return TRUE if there (might) exist a dependence between a memory-reference
457 DRA and a memory-reference DRB. When versioning for alias may check a
458 dependence at run-time, return FALSE. Adjust *MAX_VF according to
459 the data dependence. */
460
461static bool
462vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
463{
464 struct data_reference *dra = DDR_A (ddr);
465 struct data_reference *drb = DDR_B (ddr);
466
467 /* We need to check dependences of statements marked as unvectorizable
468 as well, they still can prohibit vectorization. */
469
470 /* Independent data accesses. */
471 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
472 return false;
473
474 if (dra == drb)
475 return false;
476
477 /* Read-read is OK. */
478 if (DR_IS_READ (dra) && DR_IS_READ (drb))
479 return false;
480
e6c9d234
RB
481 /* If dra and drb are part of the same interleaving chain consider
482 them independent. */
483 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
484 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
485 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
486 return false;
487
5abe1e05
RB
488 /* Unknown data dependence. */
489 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
fcac74a1 490 {
5abe1e05
RB
491 gimple earlier_stmt;
492
493 if (dump_enabled_p ())
494 {
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 "can't determine dependence between ");
497 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
498 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
499 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
e645e942 500 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5abe1e05
RB
501 }
502
503 /* We do not vectorize basic blocks with write-write dependencies. */
504 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
505 return true;
506
507 /* Check that it's not a load-after-store dependence. */
508 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
509 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
510 return true;
511
512 return false;
fcac74a1 513 }
5abe1e05
RB
514
515 if (dump_enabled_p ())
fcac74a1 516 {
5abe1e05
RB
517 dump_printf_loc (MSG_NOTE, vect_location,
518 "determined dependence between ");
519 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
520 dump_printf (MSG_NOTE, " and ");
521 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 522 dump_printf (MSG_NOTE, "\n");
fcac74a1 523 }
b8698a0f 524
5abe1e05
RB
525 /* Do not vectorize basic blocks with write-write dependences. */
526 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
527 return true;
528
529 /* Check dependence between DRA and DRB for basic block vectorization.
530 If the accesses share same bases and offsets, we can compare their initial
531 constant offsets to decide whether they differ or not. In case of a read-
532 write dependence we check that the load is before the store to ensure that
533 vectorization will not change the order of the accesses. */
534
535 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
536 gimple earlier_stmt;
537
538 /* Check that the data-refs have same bases and offsets. If not, we can't
539 determine if they are dependent. */
540 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
541 || !dr_equal_offsets_p (dra, drb))
542 return true;
543
544 /* Check the types. */
545 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
546 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
547
548 if (type_size_a != type_size_b
549 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
550 TREE_TYPE (DR_REF (drb))))
551 return true;
552
553 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
554 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
555
556 /* Two different locations - no dependence. */
557 if (init_a != init_b)
558 return false;
559
560 /* We have a read-write dependence. Check that the load is before the store.
561 When we vectorize basic blocks, vector load can be only before
562 corresponding scalar load, and vector store can be only after its
563 corresponding scalar store. So the order of the acceses is preserved in
564 case the load is before the store. */
565 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
566 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
567 return false;
568
569 return true;
570}
571
572
573/* Function vect_analyze_data_ref_dependences.
574
575 Examine all the data references in the basic-block, and make sure there
576 do not exist any data dependences between them. Set *MAX_VF according to
577 the maximum vectorization factor the data dependences allow. */
578
579bool
580vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
581{
582 struct data_dependence_relation *ddr;
583 unsigned int i;
584
585 if (dump_enabled_p ())
586 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 587 "=== vect_slp_analyze_data_ref_dependences ===\n");
5abe1e05
RB
588
589 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
590 &BB_VINFO_DDRS (bb_vinfo),
591 vNULL, true))
592 return false;
593
594 FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
595 if (vect_slp_analyze_data_ref_dependence (ddr))
ebfd146a
IR
596 return false;
597
598 return true;
599}
600
601
602/* Function vect_compute_data_ref_alignment
603
604 Compute the misalignment of the data reference DR.
605
606 Output:
607 1. If during the misalignment computation it is found that the data reference
608 cannot be vectorized then false is returned.
609 2. DR_MISALIGNMENT (DR) is defined.
610
611 FOR NOW: No analysis is actually performed. Misalignment is calculated
612 only for trivial cases. TODO. */
613
614static bool
615vect_compute_data_ref_alignment (struct data_reference *dr)
616{
617 gimple stmt = DR_STMT (dr);
b8698a0f 618 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 619 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 620 struct loop *loop = NULL;
ebfd146a
IR
621 tree ref = DR_REF (dr);
622 tree vectype;
623 tree base, base_addr;
624 bool base_aligned;
625 tree misalign;
626 tree aligned_to, alignment;
b8698a0f 627
73fbfcad 628 if (dump_enabled_p ())
78c60e3d 629 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 630 "vect_compute_data_ref_alignment:\n");
ebfd146a 631
a70d6342
IR
632 if (loop_vinfo)
633 loop = LOOP_VINFO_LOOP (loop_vinfo);
b8698a0f 634
ebfd146a
IR
635 /* Initialize misalignment to unknown. */
636 SET_DR_MISALIGNMENT (dr, -1);
637
7595989b
RG
638 /* Strided loads perform only component accesses, misalignment information
639 is irrelevant for them. */
640 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
641 return true;
642
ebfd146a
IR
643 misalign = DR_INIT (dr);
644 aligned_to = DR_ALIGNED_TO (dr);
645 base_addr = DR_BASE_ADDRESS (dr);
646 vectype = STMT_VINFO_VECTYPE (stmt_info);
647
648 /* In case the dataref is in an inner-loop of the loop that is being
649 vectorized (LOOP), we use the base and misalignment information
ff802fa1 650 relative to the outer-loop (LOOP). This is ok only if the misalignment
ebfd146a
IR
651 stays the same throughout the execution of the inner-loop, which is why
652 we have to check that the stride of the dataref in the inner-loop evenly
653 divides by the vector size. */
a70d6342 654 if (loop && nested_in_vect_loop_p (loop, stmt))
ebfd146a
IR
655 {
656 tree step = DR_STEP (dr);
657 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
b8698a0f 658
ebfd146a
IR
659 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
660 {
73fbfcad 661 if (dump_enabled_p ())
78c60e3d 662 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 663 "inner step divides the vector-size.\n");
ebfd146a
IR
664 misalign = STMT_VINFO_DR_INIT (stmt_info);
665 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
666 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
667 }
668 else
669 {
73fbfcad 670 if (dump_enabled_p ())
78c60e3d 671 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 672 "inner step doesn't divide the vector-size.\n");
ebfd146a
IR
673 misalign = NULL_TREE;
674 }
675 }
676
3ebde0e9
UW
677 /* Similarly, if we're doing basic-block vectorization, we can only use
678 base and misalignment information relative to an innermost loop if the
679 misalignment stays the same throughout the execution of the loop.
680 As above, this is the case if the stride of the dataref evenly divides
681 by the vector size. */
682 if (!loop)
683 {
684 tree step = DR_STEP (dr);
685 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
686
687 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
688 {
73fbfcad 689 if (dump_enabled_p ())
e645e942
TJ
690 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
691 "SLP: step doesn't divide the vector-size.\n");
3ebde0e9
UW
692 misalign = NULL_TREE;
693 }
694 }
695
ebfd146a
IR
696 base = build_fold_indirect_ref (base_addr);
697 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
698
699 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
700 || !misalign)
701 {
73fbfcad 702 if (dump_enabled_p ())
ebfd146a 703 {
78c60e3d 704 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 705 "Unknown alignment for access: ");
78c60e3d 706 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
e645e942 707 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
708 }
709 return true;
710 }
711
b8698a0f 712 if ((DECL_P (base)
ebfd146a
IR
713 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
714 alignment) >= 0)
715 || (TREE_CODE (base_addr) == SSA_NAME
716 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
717 TREE_TYPE (base_addr)))),
7cf64710 718 alignment) >= 0)
0eb77834 719 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
ebfd146a
IR
720 base_aligned = true;
721 else
b8698a0f 722 base_aligned = false;
ebfd146a 723
b8698a0f 724 if (!base_aligned)
ebfd146a 725 {
d6682315
RG
726 /* Do not change the alignment of global variables here if
727 flag_section_anchors is enabled as we already generated
728 RTL for other functions. Most global variables should
729 have been aligned during the IPA increase_alignment pass. */
730 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
731 || (TREE_STATIC (base) && flag_section_anchors))
ebfd146a 732 {
73fbfcad 733 if (dump_enabled_p ())
ebfd146a 734 {
78c60e3d 735 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 736 "can't force alignment of ref: ");
78c60e3d 737 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
e645e942 738 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
739 }
740 return true;
741 }
b8698a0f 742
ebfd146a
IR
743 /* Force the alignment of the decl.
744 NOTE: This is the only change to the code we make during
745 the analysis phase, before deciding to vectorize the loop. */
73fbfcad 746 if (dump_enabled_p ())
720f5239 747 {
78c60e3d
SS
748 dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
749 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
e645e942 750 dump_printf (MSG_NOTE, "\n");
720f5239
IR
751 }
752
c716e67f
XDL
753 ((dataref_aux *)dr->aux)->base_decl = base;
754 ((dataref_aux *)dr->aux)->base_misaligned = true;
ebfd146a
IR
755 }
756
46241ea9
RG
757 /* If this is a backward running DR then first access in the larger
758 vectype actually is N-1 elements before the address in the DR.
759 Adjust misalign accordingly. */
760 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
761 {
762 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
763 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
764 otherwise we wouldn't be here. */
765 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
766 /* PLUS because DR_STEP was negative. */
767 misalign = size_binop (PLUS_EXPR, misalign, offset);
768 }
769
ebfd146a
IR
770 /* Modulo alignment. */
771 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
772
773 if (!host_integerp (misalign, 1))
774 {
775 /* Negative or overflowed misalignment value. */
73fbfcad 776 if (dump_enabled_p ())
78c60e3d 777 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 778 "unexpected misalign value\n");
ebfd146a
IR
779 return false;
780 }
781
782 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
783
73fbfcad 784 if (dump_enabled_p ())
ebfd146a 785 {
78c60e3d
SS
786 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
787 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
788 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
e645e942 789 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
790 }
791
792 return true;
793}
794
795
796/* Function vect_compute_data_refs_alignment
797
798 Compute the misalignment of data references in the loop.
799 Return FALSE if a data reference is found that cannot be vectorized. */
800
801static bool
b8698a0f 802vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
a70d6342 803 bb_vec_info bb_vinfo)
ebfd146a 804{
9771b263 805 vec<data_reference_p> datarefs;
ebfd146a
IR
806 struct data_reference *dr;
807 unsigned int i;
808
a70d6342
IR
809 if (loop_vinfo)
810 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
811 else
812 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
b8698a0f 813
9771b263 814 FOR_EACH_VEC_ELT (datarefs, i, dr)
4b5caab7
IR
815 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
816 && !vect_compute_data_ref_alignment (dr))
817 {
818 if (bb_vinfo)
819 {
820 /* Mark unsupported statement as unvectorizable. */
821 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
822 continue;
823 }
824 else
825 return false;
826 }
ebfd146a
IR
827
828 return true;
829}
830
831
832/* Function vect_update_misalignment_for_peel
833
834 DR - the data reference whose misalignment is to be adjusted.
835 DR_PEEL - the data reference whose misalignment is being made
836 zero in the vector loop by the peel.
837 NPEEL - the number of iterations in the peel loop if the misalignment
838 of DR_PEEL is known at compile time. */
839
840static void
841vect_update_misalignment_for_peel (struct data_reference *dr,
842 struct data_reference *dr_peel, int npeel)
843{
844 unsigned int i;
9771b263 845 vec<dr_p> same_align_drs;
ebfd146a
IR
846 struct data_reference *current_dr;
847 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
848 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
849 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
850 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
851
852 /* For interleaved data accesses the step in the loop must be multiplied by
853 the size of the interleaving group. */
0d0293ac 854 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
e14c1050 855 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
0d0293ac 856 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
e14c1050 857 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
ebfd146a
IR
858
859 /* It can be assumed that the data refs with the same alignment as dr_peel
860 are aligned in the vector loop. */
861 same_align_drs
862 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
9771b263 863 FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
ebfd146a
IR
864 {
865 if (current_dr != dr)
866 continue;
867 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
868 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
869 SET_DR_MISALIGNMENT (dr, 0);
870 return;
871 }
872
873 if (known_alignment_for_access_p (dr)
874 && known_alignment_for_access_p (dr_peel))
875 {
d8ba5b19 876 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
ebfd146a
IR
877 int misal = DR_MISALIGNMENT (dr);
878 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
d8ba5b19 879 misal += negative ? -npeel * dr_size : npeel * dr_size;
5aea1e76 880 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
ebfd146a
IR
881 SET_DR_MISALIGNMENT (dr, misal);
882 return;
883 }
884
73fbfcad 885 if (dump_enabled_p ())
e645e942 886 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.\n");
ebfd146a
IR
887 SET_DR_MISALIGNMENT (dr, -1);
888}
889
890
891/* Function vect_verify_datarefs_alignment
892
893 Return TRUE if all data references in the loop can be
894 handled with respect to alignment. */
895
a70d6342
IR
896bool
897vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 898{
9771b263 899 vec<data_reference_p> datarefs;
ebfd146a
IR
900 struct data_reference *dr;
901 enum dr_alignment_support supportable_dr_alignment;
902 unsigned int i;
903
a70d6342
IR
904 if (loop_vinfo)
905 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
906 else
907 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
908
9771b263 909 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
910 {
911 gimple stmt = DR_STMT (dr);
912 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
913
38eec4c6
UW
914 if (!STMT_VINFO_RELEVANT_P (stmt_info))
915 continue;
916
4b5caab7
IR
917 /* For interleaving, only the alignment of the first access matters.
918 Skip statements marked as not vectorizable. */
0d0293ac 919 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 920 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
4b5caab7 921 || !STMT_VINFO_VECTORIZABLE (stmt_info))
ebfd146a
IR
922 continue;
923
a82960aa
RG
924 /* Strided loads perform only component accesses, alignment is
925 irrelevant for them. */
926 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
927 continue;
928
720f5239 929 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
ebfd146a
IR
930 if (!supportable_dr_alignment)
931 {
73fbfcad 932 if (dump_enabled_p ())
ebfd146a
IR
933 {
934 if (DR_IS_READ (dr))
78c60e3d
SS
935 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
936 "not vectorized: unsupported unaligned load.");
ebfd146a 937 else
78c60e3d
SS
938 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
939 "not vectorized: unsupported unaligned "
940 "store.");
4b5caab7 941
78c60e3d
SS
942 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
943 DR_REF (dr));
e645e942 944 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
945 }
946 return false;
947 }
73fbfcad 948 if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
78c60e3d 949 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 950 "Vectorizing an unaligned access.\n");
ebfd146a
IR
951 }
952 return true;
953}
954
4c9bcf89
RG
955/* Given an memory reference EXP return whether its alignment is less
956 than its size. */
957
958static bool
959not_size_aligned (tree exp)
960{
961 if (!host_integerp (TYPE_SIZE (TREE_TYPE (exp)), 1))
962 return true;
963
cd1cae35 964 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
4c9bcf89
RG
965 > get_object_alignment (exp));
966}
ebfd146a
IR
967
968/* Function vector_alignment_reachable_p
969
970 Return true if vector alignment for DR is reachable by peeling
971 a few loop iterations. Return false otherwise. */
972
973static bool
974vector_alignment_reachable_p (struct data_reference *dr)
975{
976 gimple stmt = DR_STMT (dr);
977 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
978 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
979
0d0293ac 980 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
981 {
982 /* For interleaved access we peel only if number of iterations in
983 the prolog loop ({VF - misalignment}), is a multiple of the
984 number of the interleaved accesses. */
985 int elem_size, mis_in_elements;
986 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
987
988 /* FORNOW: handle only known alignment. */
989 if (!known_alignment_for_access_p (dr))
990 return false;
991
992 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
993 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
994
e14c1050 995 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
ebfd146a
IR
996 return false;
997 }
998
999 /* If misalignment is known at the compile time then allow peeling
1000 only if natural alignment is reachable through peeling. */
1001 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1002 {
b8698a0f 1003 HOST_WIDE_INT elmsize =
ebfd146a 1004 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
73fbfcad 1005 if (dump_enabled_p ())
ebfd146a 1006 {
e645e942
TJ
1007 dump_printf_loc (MSG_NOTE, vect_location,
1008 "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1009 dump_printf (MSG_NOTE,
1010 ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
ebfd146a
IR
1011 }
1012 if (DR_MISALIGNMENT (dr) % elmsize)
1013 {
73fbfcad 1014 if (dump_enabled_p ())
e645e942
TJ
1015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1016 "data size does not divide the misalignment.\n");
ebfd146a
IR
1017 return false;
1018 }
1019 }
1020
1021 if (!known_alignment_for_access_p (dr))
1022 {
4c9bcf89
RG
1023 tree type = TREE_TYPE (DR_REF (dr));
1024 bool is_packed = not_size_aligned (DR_REF (dr));
73fbfcad 1025 if (dump_enabled_p ())
e645e942
TJ
1026 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1027 "Unknown misalignment, is_packed = %d\n",is_packed);
afb119be
RB
1028 if ((TYPE_USER_ALIGN (type) && !is_packed)
1029 || targetm.vectorize.vector_alignment_reachable (type, is_packed))
ebfd146a
IR
1030 return true;
1031 else
1032 return false;
1033 }
1034
1035 return true;
1036}
1037
720f5239
IR
1038
1039/* Calculate the cost of the memory access represented by DR. */
1040
92345349 1041static void
720f5239
IR
1042vect_get_data_access_cost (struct data_reference *dr,
1043 unsigned int *inside_cost,
92345349
BS
1044 unsigned int *outside_cost,
1045 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1046{
1047 gimple stmt = DR_STMT (dr);
1048 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1049 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1050 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1051 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1052 int ncopies = vf / nunits;
720f5239 1053
38eec4c6 1054 if (DR_IS_READ (dr))
92345349
BS
1055 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1056 NULL, body_cost_vec, false);
720f5239 1057 else
92345349 1058 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
720f5239 1059
73fbfcad 1060 if (dump_enabled_p ())
78c60e3d
SS
1061 dump_printf_loc (MSG_NOTE, vect_location,
1062 "vect_get_data_access_cost: inside_cost = %d, "
e645e942 1063 "outside_cost = %d.\n", *inside_cost, *outside_cost);
720f5239
IR
1064}
1065
1066
720f5239
IR
1067/* Insert DR into peeling hash table with NPEEL as key. */
1068
1069static void
1070vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1071 int npeel)
1072{
1073 struct _vect_peel_info elem, *slot;
bf190e8d 1074 _vect_peel_info **new_slot;
720f5239
IR
1075 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1076
1077 elem.npeel = npeel;
bf190e8d 1078 slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find (&elem);
720f5239
IR
1079 if (slot)
1080 slot->count++;
1081 else
1082 {
1083 slot = XNEW (struct _vect_peel_info);
1084 slot->npeel = npeel;
1085 slot->dr = dr;
1086 slot->count = 1;
bf190e8d 1087 new_slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find_slot (slot, INSERT);
720f5239
IR
1088 *new_slot = slot;
1089 }
1090
d6d11272 1091 if (!supportable_dr_alignment && unlimited_cost_model ())
720f5239
IR
1092 slot->count += VECT_MAX_COST;
1093}
1094
1095
1096/* Traverse peeling hash table to find peeling option that aligns maximum
1097 number of data accesses. */
1098
bf190e8d
LC
1099int
1100vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1101 _vect_peel_extended_info *max)
720f5239 1102{
bf190e8d 1103 vect_peel_info elem = *slot;
720f5239 1104
44542f8e
IR
1105 if (elem->count > max->peel_info.count
1106 || (elem->count == max->peel_info.count
1107 && max->peel_info.npeel > elem->npeel))
720f5239
IR
1108 {
1109 max->peel_info.npeel = elem->npeel;
1110 max->peel_info.count = elem->count;
1111 max->peel_info.dr = elem->dr;
1112 }
1113
1114 return 1;
1115}
1116
1117
ff802fa1
IR
1118/* Traverse peeling hash table and calculate cost for each peeling option.
1119 Find the one with the lowest cost. */
720f5239 1120
bf190e8d
LC
1121int
1122vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1123 _vect_peel_extended_info *min)
720f5239 1124{
bf190e8d 1125 vect_peel_info elem = *slot;
720f5239
IR
1126 int save_misalignment, dummy;
1127 unsigned int inside_cost = 0, outside_cost = 0, i;
1128 gimple stmt = DR_STMT (elem->dr);
1129 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1130 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9771b263 1131 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
720f5239 1132 struct data_reference *dr;
92345349
BS
1133 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1134 int single_iter_cost;
1135
9771b263
DN
1136 prologue_cost_vec.create (2);
1137 body_cost_vec.create (2);
1138 epilogue_cost_vec.create (2);
720f5239 1139
9771b263 1140 FOR_EACH_VEC_ELT (datarefs, i, dr)
720f5239
IR
1141 {
1142 stmt = DR_STMT (dr);
1143 stmt_info = vinfo_for_stmt (stmt);
1144 /* For interleaving, only the alignment of the first access
1145 matters. */
0d0293ac 1146 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 1147 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
720f5239
IR
1148 continue;
1149
1150 save_misalignment = DR_MISALIGNMENT (dr);
1151 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
92345349
BS
1152 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1153 &body_cost_vec);
720f5239
IR
1154 SET_DR_MISALIGNMENT (dr, save_misalignment);
1155 }
1156
92345349
BS
1157 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1158 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1159 &dummy, single_iter_cost,
1160 &prologue_cost_vec,
1161 &epilogue_cost_vec);
1162
1163 /* Prologue and epilogue costs are added to the target model later.
1164 These costs depend only on the scalar iteration cost, the
1165 number of peeling iterations finally chosen, and the number of
1166 misaligned statements. So discard the information found here. */
9771b263
DN
1167 prologue_cost_vec.release ();
1168 epilogue_cost_vec.release ();
720f5239
IR
1169
1170 if (inside_cost < min->inside_cost
1171 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1172 {
1173 min->inside_cost = inside_cost;
1174 min->outside_cost = outside_cost;
9771b263 1175 min->body_cost_vec.release ();
92345349 1176 min->body_cost_vec = body_cost_vec;
720f5239
IR
1177 min->peel_info.dr = elem->dr;
1178 min->peel_info.npeel = elem->npeel;
1179 }
92345349 1180 else
9771b263 1181 body_cost_vec.release ();
720f5239
IR
1182
1183 return 1;
1184}
1185
1186
1187/* Choose best peeling option by traversing peeling hash table and either
1188 choosing an option with the lowest cost (if cost model is enabled) or the
1189 option that aligns as many accesses as possible. */
1190
1191static struct data_reference *
1192vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
c3e7ee41 1193 unsigned int *npeel,
92345349 1194 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1195{
1196 struct _vect_peel_extended_info res;
1197
1198 res.peel_info.dr = NULL;
c3284718 1199 res.body_cost_vec = stmt_vector_for_cost ();
720f5239 1200
d6d11272 1201 if (!unlimited_cost_model ())
720f5239
IR
1202 {
1203 res.inside_cost = INT_MAX;
1204 res.outside_cost = INT_MAX;
bf190e8d
LC
1205 LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1206 .traverse <_vect_peel_extended_info *,
1207 vect_peeling_hash_get_lowest_cost> (&res);
720f5239
IR
1208 }
1209 else
1210 {
1211 res.peel_info.count = 0;
bf190e8d
LC
1212 LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1213 .traverse <_vect_peel_extended_info *,
1214 vect_peeling_hash_get_most_frequent> (&res);
720f5239
IR
1215 }
1216
1217 *npeel = res.peel_info.npeel;
92345349 1218 *body_cost_vec = res.body_cost_vec;
720f5239
IR
1219 return res.peel_info.dr;
1220}
1221
1222
ebfd146a
IR
1223/* Function vect_enhance_data_refs_alignment
1224
1225 This pass will use loop versioning and loop peeling in order to enhance
1226 the alignment of data references in the loop.
1227
1228 FOR NOW: we assume that whatever versioning/peeling takes place, only the
ff802fa1 1229 original loop is to be vectorized. Any other loops that are created by
ebfd146a 1230 the transformations performed in this pass - are not supposed to be
ff802fa1 1231 vectorized. This restriction will be relaxed.
ebfd146a
IR
1232
1233 This pass will require a cost model to guide it whether to apply peeling
ff802fa1 1234 or versioning or a combination of the two. For example, the scheme that
ebfd146a
IR
1235 intel uses when given a loop with several memory accesses, is as follows:
1236 choose one memory access ('p') which alignment you want to force by doing
ff802fa1 1237 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
ebfd146a
IR
1238 other accesses are not necessarily aligned, or (2) use loop versioning to
1239 generate one loop in which all accesses are aligned, and another loop in
1240 which only 'p' is necessarily aligned.
1241
1242 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1243 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1244 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1245
ff802fa1 1246 Devising a cost model is the most critical aspect of this work. It will
ebfd146a 1247 guide us on which access to peel for, whether to use loop versioning, how
ff802fa1 1248 many versions to create, etc. The cost model will probably consist of
ebfd146a
IR
1249 generic considerations as well as target specific considerations (on
1250 powerpc for example, misaligned stores are more painful than misaligned
1251 loads).
1252
1253 Here are the general steps involved in alignment enhancements:
1254
1255 -- original loop, before alignment analysis:
1256 for (i=0; i<N; i++){
1257 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1258 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1259 }
1260
1261 -- After vect_compute_data_refs_alignment:
1262 for (i=0; i<N; i++){
1263 x = q[i]; # DR_MISALIGNMENT(q) = 3
1264 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1265 }
1266
1267 -- Possibility 1: we do loop versioning:
1268 if (p is aligned) {
1269 for (i=0; i<N; i++){ # loop 1A
1270 x = q[i]; # DR_MISALIGNMENT(q) = 3
1271 p[i] = y; # DR_MISALIGNMENT(p) = 0
1272 }
1273 }
1274 else {
1275 for (i=0; i<N; i++){ # loop 1B
1276 x = q[i]; # DR_MISALIGNMENT(q) = 3
1277 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1278 }
1279 }
1280
1281 -- Possibility 2: we do loop peeling:
1282 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1283 x = q[i];
1284 p[i] = y;
1285 }
1286 for (i = 3; i < N; i++){ # loop 2A
1287 x = q[i]; # DR_MISALIGNMENT(q) = 0
1288 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1289 }
1290
1291 -- Possibility 3: combination of loop peeling and versioning:
1292 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1293 x = q[i];
1294 p[i] = y;
1295 }
1296 if (p is aligned) {
1297 for (i = 3; i<N; i++){ # loop 3A
1298 x = q[i]; # DR_MISALIGNMENT(q) = 0
1299 p[i] = y; # DR_MISALIGNMENT(p) = 0
1300 }
1301 }
1302 else {
1303 for (i = 3; i<N; i++){ # loop 3B
1304 x = q[i]; # DR_MISALIGNMENT(q) = 0
1305 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1306 }
1307 }
1308
ff802fa1 1309 These loops are later passed to loop_transform to be vectorized. The
ebfd146a
IR
1310 vectorizer will use the alignment information to guide the transformation
1311 (whether to generate regular loads/stores, or with special handling for
1312 misalignment). */
1313
1314bool
1315vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1316{
9771b263 1317 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
ebfd146a
IR
1318 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1319 enum dr_alignment_support supportable_dr_alignment;
720f5239 1320 struct data_reference *dr0 = NULL, *first_store = NULL;
ebfd146a 1321 struct data_reference *dr;
720f5239 1322 unsigned int i, j;
ebfd146a
IR
1323 bool do_peeling = false;
1324 bool do_versioning = false;
1325 bool stat;
1326 gimple stmt;
1327 stmt_vec_info stmt_info;
720f5239
IR
1328 unsigned int npeel = 0;
1329 bool all_misalignments_unknown = true;
1330 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1331 unsigned possible_npeel_number = 1;
1332 tree vectype;
1333 unsigned int nelements, mis, same_align_drs_max = 0;
c3284718 1334 stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
ebfd146a 1335
73fbfcad 1336 if (dump_enabled_p ())
78c60e3d 1337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1338 "=== vect_enhance_data_refs_alignment ===\n");
ebfd146a
IR
1339
1340 /* While cost model enhancements are expected in the future, the high level
1341 view of the code at this time is as follows:
1342
673beced
RE
1343 A) If there is a misaligned access then see if peeling to align
1344 this access can make all data references satisfy
8f439681
RE
1345 vect_supportable_dr_alignment. If so, update data structures
1346 as needed and return true.
ebfd146a
IR
1347
1348 B) If peeling wasn't possible and there is a data reference with an
1349 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1350 then see if loop versioning checks can be used to make all data
1351 references satisfy vect_supportable_dr_alignment. If so, update
1352 data structures as needed and return true.
1353
1354 C) If neither peeling nor versioning were successful then return false if
1355 any data reference does not satisfy vect_supportable_dr_alignment.
1356
1357 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1358
1359 Note, Possibility 3 above (which is peeling and versioning together) is not
1360 being done at this time. */
1361
1362 /* (1) Peeling to force alignment. */
1363
1364 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1365 Considerations:
1366 + How many accesses will become aligned due to the peeling
1367 - How many accesses will become unaligned due to the peeling,
1368 and the cost of misaligned accesses.
b8698a0f 1369 - The cost of peeling (the extra runtime checks, the increase
720f5239 1370 in code size). */
ebfd146a 1371
9771b263 1372 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1373 {
1374 stmt = DR_STMT (dr);
1375 stmt_info = vinfo_for_stmt (stmt);
1376
38eec4c6 1377 if (!STMT_VINFO_RELEVANT_P (stmt_info))
39becbac
RG
1378 continue;
1379
ebfd146a
IR
1380 /* For interleaving, only the alignment of the first access
1381 matters. */
0d0293ac 1382 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 1383 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
ebfd146a
IR
1384 continue;
1385
39becbac
RG
1386 /* For invariant accesses there is nothing to enhance. */
1387 if (integer_zerop (DR_STEP (dr)))
1388 continue;
1389
319e6439
RG
1390 /* Strided loads perform only component accesses, alignment is
1391 irrelevant for them. */
1392 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1393 continue;
1394
720f5239
IR
1395 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1396 do_peeling = vector_alignment_reachable_p (dr);
1397 if (do_peeling)
ebfd146a 1398 {
720f5239
IR
1399 if (known_alignment_for_access_p (dr))
1400 {
1401 unsigned int npeel_tmp;
d8ba5b19
RG
1402 bool negative = tree_int_cst_compare (DR_STEP (dr),
1403 size_zero_node) < 0;
720f5239
IR
1404
1405 /* Save info about DR in the hash table. */
bf190e8d
LC
1406 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
1407 LOOP_VINFO_PEELING_HTAB (loop_vinfo).create (1);
720f5239
IR
1408
1409 vectype = STMT_VINFO_VECTYPE (stmt_info);
1410 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1411 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1412 TREE_TYPE (DR_REF (dr))));
d8ba5b19 1413 npeel_tmp = (negative
8b8bba2d
RG
1414 ? (mis - nelements) : (nelements - mis))
1415 & (nelements - 1);
720f5239
IR
1416
1417 /* For multiple types, it is possible that the bigger type access
ff802fa1 1418 will have more than one peeling option. E.g., a loop with two
720f5239 1419 types: one of size (vector size / 4), and the other one of
ff802fa1 1420 size (vector size / 8). Vectorization factor will 8. If both
720f5239 1421 access are misaligned by 3, the first one needs one scalar
ff802fa1 1422 iteration to be aligned, and the second one needs 5. But the
720f5239
IR
1423 the first one will be aligned also by peeling 5 scalar
1424 iterations, and in that case both accesses will be aligned.
1425 Hence, except for the immediate peeling amount, we also want
1426 to try to add full vector size, while we don't exceed
1427 vectorization factor.
1428 We do this automtically for cost model, since we calculate cost
1429 for every peeling option. */
d6d11272 1430 if (unlimited_cost_model ())
720f5239
IR
1431 possible_npeel_number = vf /nelements;
1432
1433 /* Handle the aligned case. We may decide to align some other
1434 access, making DR unaligned. */
1435 if (DR_MISALIGNMENT (dr) == 0)
1436 {
1437 npeel_tmp = 0;
d6d11272 1438 if (unlimited_cost_model ())
720f5239
IR
1439 possible_npeel_number++;
1440 }
1441
1442 for (j = 0; j < possible_npeel_number; j++)
1443 {
1444 gcc_assert (npeel_tmp <= vf);
1445 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1446 npeel_tmp += nelements;
1447 }
1448
1449 all_misalignments_unknown = false;
1450 /* Data-ref that was chosen for the case that all the
1451 misalignments are unknown is not relevant anymore, since we
1452 have a data-ref with known alignment. */
1453 dr0 = NULL;
1454 }
1455 else
1456 {
4ba5ea11
RB
1457 /* If we don't know any misalignment values, we prefer
1458 peeling for data-ref that has the maximum number of data-refs
720f5239
IR
1459 with the same alignment, unless the target prefers to align
1460 stores over load. */
1461 if (all_misalignments_unknown)
1462 {
4ba5ea11
RB
1463 unsigned same_align_drs
1464 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1465 if (!dr0
1466 || same_align_drs_max < same_align_drs)
720f5239 1467 {
4ba5ea11 1468 same_align_drs_max = same_align_drs;
720f5239
IR
1469 dr0 = dr;
1470 }
4ba5ea11
RB
1471 /* For data-refs with the same number of related
1472 accesses prefer the one where the misalign
1473 computation will be invariant in the outermost loop. */
1474 else if (same_align_drs_max == same_align_drs)
1475 {
1476 struct loop *ivloop0, *ivloop;
1477 ivloop0 = outermost_invariant_loop_for_expr
1478 (loop, DR_BASE_ADDRESS (dr0));
1479 ivloop = outermost_invariant_loop_for_expr
1480 (loop, DR_BASE_ADDRESS (dr));
1481 if ((ivloop && !ivloop0)
1482 || (ivloop && ivloop0
1483 && flow_loop_nested_p (ivloop, ivloop0)))
1484 dr0 = dr;
1485 }
720f5239 1486
b0af49c4 1487 if (!first_store && DR_IS_WRITE (dr))
720f5239
IR
1488 first_store = dr;
1489 }
1490
1491 /* If there are both known and unknown misaligned accesses in the
1492 loop, we choose peeling amount according to the known
1493 accesses. */
720f5239
IR
1494 if (!supportable_dr_alignment)
1495 {
1496 dr0 = dr;
b0af49c4 1497 if (!first_store && DR_IS_WRITE (dr))
720f5239
IR
1498 first_store = dr;
1499 }
1500 }
1501 }
1502 else
1503 {
1504 if (!aligned_access_p (dr))
1505 {
73fbfcad 1506 if (dump_enabled_p ())
e645e942
TJ
1507 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1508 "vector alignment may not be reachable\n");
720f5239
IR
1509 break;
1510 }
1511 }
ebfd146a
IR
1512 }
1513
afb119be
RB
1514 /* Check if we can possibly peel the loop. */
1515 if (!vect_can_advance_ivs_p (loop_vinfo)
ebfd146a
IR
1516 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1517 do_peeling = false;
1518
720f5239
IR
1519 if (do_peeling && all_misalignments_unknown
1520 && vect_supportable_dr_alignment (dr0, false))
1521 {
1522
1523 /* Check if the target requires to prefer stores over loads, i.e., if
1524 misaligned stores are more expensive than misaligned loads (taking
1525 drs with same alignment into account). */
1526 if (first_store && DR_IS_READ (dr0))
1527 {
1528 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1529 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1530 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1531 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
9771b263
DN
1532 stmt_vector_for_cost dummy;
1533 dummy.create (2);
92345349
BS
1534
1535 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1536 &dummy);
1537 vect_get_data_access_cost (first_store, &store_inside_cost,
1538 &store_outside_cost, &dummy);
720f5239 1539
9771b263 1540 dummy.release ();
720f5239
IR
1541
1542 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1543 aligning the load DR0). */
1544 load_inside_penalty = store_inside_cost;
1545 load_outside_penalty = store_outside_cost;
9771b263
DN
1546 for (i = 0;
1547 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1548 DR_STMT (first_store))).iterate (i, &dr);
720f5239
IR
1549 i++)
1550 if (DR_IS_READ (dr))
1551 {
1552 load_inside_penalty += load_inside_cost;
1553 load_outside_penalty += load_outside_cost;
1554 }
1555 else
1556 {
1557 load_inside_penalty += store_inside_cost;
1558 load_outside_penalty += store_outside_cost;
1559 }
1560
1561 /* Calculate the penalty for leaving DR0 unaligned (by
1562 aligning the FIRST_STORE). */
1563 store_inside_penalty = load_inside_cost;
1564 store_outside_penalty = load_outside_cost;
9771b263
DN
1565 for (i = 0;
1566 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1567 DR_STMT (dr0))).iterate (i, &dr);
720f5239
IR
1568 i++)
1569 if (DR_IS_READ (dr))
1570 {
1571 store_inside_penalty += load_inside_cost;
1572 store_outside_penalty += load_outside_cost;
1573 }
1574 else
1575 {
1576 store_inside_penalty += store_inside_cost;
1577 store_outside_penalty += store_outside_cost;
1578 }
1579
1580 if (load_inside_penalty > store_inside_penalty
1581 || (load_inside_penalty == store_inside_penalty
1582 && load_outside_penalty > store_outside_penalty))
1583 dr0 = first_store;
1584 }
1585
1586 /* In case there are only loads with different unknown misalignments, use
1587 peeling only if it may help to align other accesses in the loop. */
9771b263
DN
1588 if (!first_store
1589 && !STMT_VINFO_SAME_ALIGN_REFS (
1590 vinfo_for_stmt (DR_STMT (dr0))).length ()
720f5239
IR
1591 && vect_supportable_dr_alignment (dr0, false)
1592 != dr_unaligned_supported)
1593 do_peeling = false;
1594 }
1595
1596 if (do_peeling && !dr0)
1597 {
1598 /* Peeling is possible, but there is no data access that is not supported
1599 unless aligned. So we try to choose the best possible peeling. */
1600
1601 /* We should get here only if there are drs with known misalignment. */
1602 gcc_assert (!all_misalignments_unknown);
1603
1604 /* Choose the best peeling from the hash table. */
c3e7ee41 1605 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
92345349 1606 &body_cost_vec);
720f5239
IR
1607 if (!dr0 || !npeel)
1608 do_peeling = false;
1609 }
1610
ebfd146a
IR
1611 if (do_peeling)
1612 {
720f5239
IR
1613 stmt = DR_STMT (dr0);
1614 stmt_info = vinfo_for_stmt (stmt);
1615 vectype = STMT_VINFO_VECTYPE (stmt_info);
1616 nelements = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
1617
1618 if (known_alignment_for_access_p (dr0))
1619 {
d8ba5b19
RG
1620 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1621 size_zero_node) < 0;
720f5239
IR
1622 if (!npeel)
1623 {
1624 /* Since it's known at compile time, compute the number of
1625 iterations in the peeled loop (the peeling factor) for use in
1626 updating DR_MISALIGNMENT values. The peeling factor is the
1627 vectorization factor minus the misalignment as an element
1628 count. */
1629 mis = DR_MISALIGNMENT (dr0);
1630 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
8b8bba2d
RG
1631 npeel = ((negative ? mis - nelements : nelements - mis)
1632 & (nelements - 1));
720f5239 1633 }
ebfd146a 1634
b8698a0f 1635 /* For interleaved data access every iteration accesses all the
ebfd146a
IR
1636 members of the group, therefore we divide the number of iterations
1637 by the group size. */
b8698a0f 1638 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
0d0293ac 1639 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
e14c1050 1640 npeel /= GROUP_SIZE (stmt_info);
ebfd146a 1641
73fbfcad 1642 if (dump_enabled_p ())
78c60e3d 1643 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1644 "Try peeling by %d\n", npeel);
ebfd146a
IR
1645 }
1646
1647 /* Ensure that all data refs can be vectorized after the peel. */
9771b263 1648 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1649 {
1650 int save_misalignment;
1651
1652 if (dr == dr0)
1653 continue;
1654
1655 stmt = DR_STMT (dr);
1656 stmt_info = vinfo_for_stmt (stmt);
1657 /* For interleaving, only the alignment of the first access
1658 matters. */
0d0293ac 1659 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 1660 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
ebfd146a
IR
1661 continue;
1662
319e6439
RG
1663 /* Strided loads perform only component accesses, alignment is
1664 irrelevant for them. */
1665 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1666 continue;
1667
ebfd146a
IR
1668 save_misalignment = DR_MISALIGNMENT (dr);
1669 vect_update_misalignment_for_peel (dr, dr0, npeel);
720f5239 1670 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
ebfd146a 1671 SET_DR_MISALIGNMENT (dr, save_misalignment);
b8698a0f 1672
ebfd146a
IR
1673 if (!supportable_dr_alignment)
1674 {
1675 do_peeling = false;
1676 break;
1677 }
1678 }
1679
720f5239
IR
1680 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1681 {
1682 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1683 if (!stat)
1684 do_peeling = false;
1685 else
c7e62a26 1686 {
9771b263 1687 body_cost_vec.release ();
c7e62a26
RG
1688 return stat;
1689 }
720f5239
IR
1690 }
1691
4f17aa0b
XDL
1692 if (do_peeling)
1693 {
1694 unsigned max_allowed_peel
1695 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
1696 if (max_allowed_peel != (unsigned)-1)
1697 {
1698 unsigned max_peel = npeel;
1699 if (max_peel == 0)
1700 {
1701 gimple dr_stmt = DR_STMT (dr0);
1702 stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt);
1703 tree vtype = STMT_VINFO_VECTYPE (vinfo);
1704 max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1;
1705 }
1706 if (max_peel > max_allowed_peel)
1707 {
1708 do_peeling = false;
1709 if (dump_enabled_p ())
1710 dump_printf_loc (MSG_NOTE, vect_location,
1711 "Disable peeling, max peels reached: %d\n", max_peel);
1712 }
1713 }
1714 }
1715
ebfd146a
IR
1716 if (do_peeling)
1717 {
c3e7ee41 1718 stmt_info_for_cost *si;
92345349 1719 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
c3e7ee41 1720
ebfd146a
IR
1721 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1722 If the misalignment of DR_i is identical to that of dr0 then set
1723 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1724 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1725 by the peeling factor times the element size of DR_i (MOD the
1726 vectorization factor times the size). Otherwise, the
1727 misalignment of DR_i must be set to unknown. */
9771b263 1728 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1729 if (dr != dr0)
1730 vect_update_misalignment_for_peel (dr, dr0, npeel);
1731
1732 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
720f5239
IR
1733 if (npeel)
1734 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1735 else
1736 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
ebfd146a 1737 SET_DR_MISALIGNMENT (dr0, 0);
73fbfcad 1738 if (dump_enabled_p ())
78c60e3d
SS
1739 {
1740 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1741 "Alignment of access forced using peeling.\n");
78c60e3d 1742 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1743 "Peeling for alignment will be applied.\n");
78c60e3d 1744 }
c3e7ee41
BS
1745 /* We've delayed passing the inside-loop peeling costs to the
1746 target cost model until we were sure peeling would happen.
1747 Do so now. */
9771b263 1748 if (body_cost_vec.exists ())
c3e7ee41 1749 {
9771b263 1750 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
92345349
BS
1751 {
1752 struct _stmt_vec_info *stmt_info
1753 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1754 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1755 si->misalign, vect_body);
1756 }
9771b263 1757 body_cost_vec.release ();
c3e7ee41
BS
1758 }
1759
a70d6342 1760 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
ebfd146a
IR
1761 gcc_assert (stat);
1762 return stat;
1763 }
1764 }
1765
9771b263 1766 body_cost_vec.release ();
ebfd146a
IR
1767
1768 /* (2) Versioning to force alignment. */
1769
1770 /* Try versioning if:
d6d11272
XDL
1771 1) optimize loop for speed
1772 2) there is at least one unsupported misaligned data ref with an unknown
ebfd146a 1773 misalignment, and
d6d11272
XDL
1774 3) all misaligned data refs with a known misalignment are supported, and
1775 4) the number of runtime alignment checks is within reason. */
ebfd146a 1776
b8698a0f 1777 do_versioning =
d6d11272 1778 optimize_loop_nest_for_speed_p (loop)
ebfd146a
IR
1779 && (!loop->inner); /* FORNOW */
1780
1781 if (do_versioning)
1782 {
9771b263 1783 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1784 {
1785 stmt = DR_STMT (dr);
1786 stmt_info = vinfo_for_stmt (stmt);
1787
1788 /* For interleaving, only the alignment of the first access
1789 matters. */
1790 if (aligned_access_p (dr)
0d0293ac 1791 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 1792 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
ebfd146a
IR
1793 continue;
1794
319e6439
RG
1795 /* Strided loads perform only component accesses, alignment is
1796 irrelevant for them. */
1797 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1798 continue;
1799
720f5239 1800 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
ebfd146a
IR
1801
1802 if (!supportable_dr_alignment)
1803 {
1804 gimple stmt;
1805 int mask;
1806 tree vectype;
1807
1808 if (known_alignment_for_access_p (dr)
9771b263 1809 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
ebfd146a
IR
1810 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1811 {
1812 do_versioning = false;
1813 break;
1814 }
1815
1816 stmt = DR_STMT (dr);
1817 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1818 gcc_assert (vectype);
b8698a0f 1819
ebfd146a
IR
1820 /* The rightmost bits of an aligned address must be zeros.
1821 Construct the mask needed for this test. For example,
1822 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1823 mask must be 15 = 0xf. */
1824 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1825
1826 /* FORNOW: use the same mask to test all potentially unaligned
1827 references in the loop. The vectorizer currently supports
1828 a single vector size, see the reference to
1829 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1830 vectorization factor is computed. */
1831 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1832 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1833 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
9771b263
DN
1834 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
1835 DR_STMT (dr));
ebfd146a
IR
1836 }
1837 }
b8698a0f 1838
ebfd146a 1839 /* Versioning requires at least one misaligned data reference. */
e9dbe7bb 1840 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
ebfd146a
IR
1841 do_versioning = false;
1842 else if (!do_versioning)
9771b263 1843 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
ebfd146a
IR
1844 }
1845
1846 if (do_versioning)
1847 {
9771b263 1848 vec<gimple> may_misalign_stmts
ebfd146a
IR
1849 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1850 gimple stmt;
1851
1852 /* It can now be assumed that the data references in the statements
1853 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1854 of the loop being vectorized. */
9771b263 1855 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
ebfd146a
IR
1856 {
1857 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1858 dr = STMT_VINFO_DATA_REF (stmt_info);
1859 SET_DR_MISALIGNMENT (dr, 0);
73fbfcad 1860 if (dump_enabled_p ())
e645e942
TJ
1861 dump_printf_loc (MSG_NOTE, vect_location,
1862 "Alignment of access forced using versioning.\n");
ebfd146a
IR
1863 }
1864
73fbfcad 1865 if (dump_enabled_p ())
e645e942
TJ
1866 dump_printf_loc (MSG_NOTE, vect_location,
1867 "Versioning for alignment will be applied.\n");
ebfd146a
IR
1868
1869 /* Peeling and versioning can't be done together at this time. */
1870 gcc_assert (! (do_peeling && do_versioning));
1871
a70d6342 1872 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
ebfd146a
IR
1873 gcc_assert (stat);
1874 return stat;
1875 }
1876
1877 /* This point is reached if neither peeling nor versioning is being done. */
1878 gcc_assert (! (do_peeling || do_versioning));
1879
a70d6342 1880 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
ebfd146a
IR
1881 return stat;
1882}
1883
1884
777e1f09
RG
1885/* Function vect_find_same_alignment_drs.
1886
1887 Update group and alignment relations according to the chosen
1888 vectorization factor. */
1889
1890static void
1891vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1892 loop_vec_info loop_vinfo)
1893{
1894 unsigned int i;
1895 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1896 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1897 struct data_reference *dra = DDR_A (ddr);
1898 struct data_reference *drb = DDR_B (ddr);
1899 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1900 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1901 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1902 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1903 lambda_vector dist_v;
1904 unsigned int loop_depth;
1905
1906 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1907 return;
1908
720f5239 1909 if (dra == drb)
777e1f09
RG
1910 return;
1911
1912 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1913 return;
1914
1915 /* Loop-based vectorization and known data dependence. */
1916 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1917 return;
1918
46241ea9
RG
1919 /* Data-dependence analysis reports a distance vector of zero
1920 for data-references that overlap only in the first iteration
1921 but have different sign step (see PR45764).
1922 So as a sanity check require equal DR_STEP. */
1923 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1924 return;
1925
777e1f09 1926 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
9771b263 1927 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
777e1f09
RG
1928 {
1929 int dist = dist_v[loop_depth];
1930
73fbfcad 1931 if (dump_enabled_p ())
78c60e3d 1932 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1933 "dependence distance = %d.\n", dist);
777e1f09
RG
1934
1935 /* Same loop iteration. */
1936 if (dist == 0
1937 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1938 {
1939 /* Two references with distance zero have the same alignment. */
9771b263
DN
1940 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
1941 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
73fbfcad 1942 if (dump_enabled_p ())
777e1f09 1943 {
e645e942
TJ
1944 dump_printf_loc (MSG_NOTE, vect_location,
1945 "accesses have the same alignment.\n");
78c60e3d 1946 dump_printf (MSG_NOTE,
e645e942 1947 "dependence distance modulo vf == 0 between ");
78c60e3d
SS
1948 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
1949 dump_printf (MSG_NOTE, " and ");
1950 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 1951 dump_printf (MSG_NOTE, "\n");
777e1f09
RG
1952 }
1953 }
1954 }
1955}
1956
1957
ebfd146a
IR
1958/* Function vect_analyze_data_refs_alignment
1959
1960 Analyze the alignment of the data-references in the loop.
1961 Return FALSE if a data reference is found that cannot be vectorized. */
1962
1963bool
b8698a0f 1964vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
a70d6342 1965 bb_vec_info bb_vinfo)
ebfd146a 1966{
73fbfcad 1967 if (dump_enabled_p ())
78c60e3d 1968 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1969 "=== vect_analyze_data_refs_alignment ===\n");
ebfd146a 1970
777e1f09
RG
1971 /* Mark groups of data references with same alignment using
1972 data dependence information. */
1973 if (loop_vinfo)
1974 {
9771b263 1975 vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
777e1f09
RG
1976 struct data_dependence_relation *ddr;
1977 unsigned int i;
1978
9771b263 1979 FOR_EACH_VEC_ELT (ddrs, i, ddr)
777e1f09
RG
1980 vect_find_same_alignment_drs (ddr, loop_vinfo);
1981 }
1982
a70d6342 1983 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
ebfd146a 1984 {
73fbfcad 1985 if (dump_enabled_p ())
e645e942
TJ
1986 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1987 "not vectorized: can't calculate alignment "
1988 "for data ref.\n");
ebfd146a
IR
1989 return false;
1990 }
1991
1992 return true;
1993}
1994
1995
0d0293ac
MM
1996/* Analyze groups of accesses: check that DR belongs to a group of
1997 accesses of legal size, step, etc. Detect gaps, single element
1998 interleaving, and other special cases. Set grouped access info.
ebfd146a
IR
1999 Collect groups of strided stores for further use in SLP analysis. */
2000
2001static bool
2002vect_analyze_group_access (struct data_reference *dr)
2003{
2004 tree step = DR_STEP (dr);
2005 tree scalar_type = TREE_TYPE (DR_REF (dr));
2006 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2007 gimple stmt = DR_STMT (dr);
2008 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2009 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 2010 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a 2011 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
0d0293ac 2012 HOST_WIDE_INT groupsize, last_accessed_element = 1;
ebfd146a 2013 bool slp_impossible = false;
deaf836c
IR
2014 struct loop *loop = NULL;
2015
2016 if (loop_vinfo)
2017 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 2018
0d0293ac
MM
2019 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2020 size of the interleaving group (including gaps). */
08940f33 2021 groupsize = absu_hwi (dr_step) / type_size;
ebfd146a
IR
2022
2023 /* Not consecutive access is possible only if it is a part of interleaving. */
e14c1050 2024 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a
IR
2025 {
2026 /* Check if it this DR is a part of interleaving, and is a single
2027 element of the group that is accessed in the loop. */
b8698a0f 2028
ebfd146a
IR
2029 /* Gaps are supported only for loads. STEP must be a multiple of the type
2030 size. The size of the group must be a power of 2. */
2031 if (DR_IS_READ (dr)
2032 && (dr_step % type_size) == 0
0d0293ac
MM
2033 && groupsize > 0
2034 && exact_log2 (groupsize) != -1)
ebfd146a 2035 {
e14c1050 2036 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
0d0293ac 2037 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
73fbfcad 2038 if (dump_enabled_p ())
ebfd146a 2039 {
e645e942
TJ
2040 dump_printf_loc (MSG_NOTE, vect_location,
2041 "Detected single element interleaving ");
78c60e3d
SS
2042 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2043 dump_printf (MSG_NOTE, " step ");
2044 dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
e645e942 2045 dump_printf (MSG_NOTE, "\n");
ebfd146a 2046 }
48df3fa6
IR
2047
2048 if (loop_vinfo)
2049 {
73fbfcad 2050 if (dump_enabled_p ())
78c60e3d 2051 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
2052 "Data access with gaps requires scalar "
2053 "epilogue loop\n");
deaf836c
IR
2054 if (loop->inner)
2055 {
73fbfcad 2056 if (dump_enabled_p ())
78c60e3d
SS
2057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2058 "Peeling for outer loop is not"
e645e942 2059 " supported\n");
deaf836c
IR
2060 return false;
2061 }
2062
2063 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
48df3fa6
IR
2064 }
2065
ebfd146a
IR
2066 return true;
2067 }
4b5caab7 2068
73fbfcad 2069 if (dump_enabled_p ())
4b5caab7 2070 {
78c60e3d 2071 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
2072 "not consecutive access ");
2073 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2074 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4b5caab7
IR
2075 }
2076
2077 if (bb_vinfo)
2078 {
2079 /* Mark the statement as unvectorizable. */
2080 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2081 return true;
2082 }
78c60e3d 2083
ebfd146a
IR
2084 return false;
2085 }
2086
e14c1050 2087 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
ebfd146a
IR
2088 {
2089 /* First stmt in the interleaving chain. Check the chain. */
e14c1050 2090 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
ebfd146a 2091 struct data_reference *data_ref = dr;
df398a37 2092 unsigned int count = 1;
ebfd146a
IR
2093 tree prev_init = DR_INIT (data_ref);
2094 gimple prev = stmt;
08940f33
RB
2095 HOST_WIDE_INT diff, gaps = 0;
2096 unsigned HOST_WIDE_INT count_in_bytes;
ebfd146a
IR
2097
2098 while (next)
2099 {
ff802fa1
IR
2100 /* Skip same data-refs. In case that two or more stmts share
2101 data-ref (supported only for loads), we vectorize only the first
2102 stmt, and the rest get their vectorized loads from the first
2103 one. */
ebfd146a
IR
2104 if (!tree_int_cst_compare (DR_INIT (data_ref),
2105 DR_INIT (STMT_VINFO_DATA_REF (
2106 vinfo_for_stmt (next)))))
2107 {
b0af49c4 2108 if (DR_IS_WRITE (data_ref))
ebfd146a 2109 {
73fbfcad 2110 if (dump_enabled_p ())
e645e942
TJ
2111 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2112 "Two store stmts share the same dr.\n");
ebfd146a
IR
2113 return false;
2114 }
2115
ebfd146a 2116 /* For load use the same data-ref load. */
e14c1050 2117 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
ebfd146a
IR
2118
2119 prev = next;
e14c1050 2120 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
ebfd146a
IR
2121 continue;
2122 }
48df3fa6 2123
ebfd146a 2124 prev = next;
08940f33 2125 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
ebfd146a 2126
08940f33
RB
2127 /* All group members have the same STEP by construction. */
2128 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
ebfd146a 2129
ebfd146a
IR
2130 /* Check that the distance between two accesses is equal to the type
2131 size. Otherwise, we have gaps. */
2132 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2133 - TREE_INT_CST_LOW (prev_init)) / type_size;
2134 if (diff != 1)
2135 {
2136 /* FORNOW: SLP of accesses with gaps is not supported. */
2137 slp_impossible = true;
b0af49c4 2138 if (DR_IS_WRITE (data_ref))
ebfd146a 2139 {
73fbfcad 2140 if (dump_enabled_p ())
e645e942
TJ
2141 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2142 "interleaved store with gaps\n");
ebfd146a
IR
2143 return false;
2144 }
4da39468
IR
2145
2146 gaps += diff - 1;
ebfd146a
IR
2147 }
2148
48df3fa6
IR
2149 last_accessed_element += diff;
2150
ebfd146a 2151 /* Store the gap from the previous member of the group. If there is no
e14c1050
IR
2152 gap in the access, GROUP_GAP is always 1. */
2153 GROUP_GAP (vinfo_for_stmt (next)) = diff;
ebfd146a
IR
2154
2155 prev_init = DR_INIT (data_ref);
e14c1050 2156 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
ebfd146a
IR
2157 /* Count the number of data-refs in the chain. */
2158 count++;
2159 }
2160
2161 /* COUNT is the number of accesses found, we multiply it by the size of
2162 the type to get COUNT_IN_BYTES. */
2163 count_in_bytes = type_size * count;
2164
b8698a0f 2165 /* Check that the size of the interleaving (including gaps) is not
a70d6342 2166 greater than STEP. */
08940f33
RB
2167 if (dr_step != 0
2168 && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
ebfd146a 2169 {
73fbfcad 2170 if (dump_enabled_p ())
ebfd146a 2171 {
e645e942 2172 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2173 "interleaving size is greater than step for ");
e645e942
TJ
2174 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2175 DR_REF (dr));
2176 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
2177 }
2178 return false;
2179 }
2180
2181 /* Check that the size of the interleaving is equal to STEP for stores,
2182 i.e., that there are no gaps. */
08940f33
RB
2183 if (dr_step != 0
2184 && absu_hwi (dr_step) != count_in_bytes)
ebfd146a
IR
2185 {
2186 if (DR_IS_READ (dr))
2187 {
2188 slp_impossible = true;
2189 /* There is a gap after the last load in the group. This gap is a
0d0293ac
MM
2190 difference between the groupsize and the number of elements.
2191 When there is no gap, this difference should be 0. */
2192 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
ebfd146a
IR
2193 }
2194 else
2195 {
73fbfcad 2196 if (dump_enabled_p ())
e645e942
TJ
2197 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2198 "interleaved store with gaps\n");
ebfd146a
IR
2199 return false;
2200 }
2201 }
2202
2203 /* Check that STEP is a multiple of type size. */
08940f33
RB
2204 if (dr_step != 0
2205 && (dr_step % type_size) != 0)
ebfd146a 2206 {
73fbfcad 2207 if (dump_enabled_p ())
ebfd146a 2208 {
78c60e3d
SS
2209 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2210 "step is not a multiple of type size: step ");
2211 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2212 dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2213 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2214 TYPE_SIZE_UNIT (scalar_type));
e645e942 2215 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
2216 }
2217 return false;
2218 }
2219
0d0293ac
MM
2220 if (groupsize == 0)
2221 groupsize = count;
b8698a0f 2222
0d0293ac 2223 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
73fbfcad 2224 if (dump_enabled_p ())
e645e942
TJ
2225 dump_printf_loc (MSG_NOTE, vect_location,
2226 "Detected interleaving of size %d\n", (int)groupsize);
ebfd146a 2227
b8698a0f 2228 /* SLP: create an SLP data structure for every interleaving group of
ebfd146a 2229 stores for further analysis in vect_analyse_slp. */
b0af49c4 2230 if (DR_IS_WRITE (dr) && !slp_impossible)
a70d6342
IR
2231 {
2232 if (loop_vinfo)
9771b263 2233 LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
a70d6342 2234 if (bb_vinfo)
9771b263 2235 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
a70d6342 2236 }
48df3fa6
IR
2237
2238 /* There is a gap in the end of the group. */
0d0293ac 2239 if (groupsize - last_accessed_element > 0 && loop_vinfo)
48df3fa6 2240 {
73fbfcad 2241 if (dump_enabled_p ())
78c60e3d 2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
2243 "Data access with gaps requires scalar "
2244 "epilogue loop\n");
deaf836c
IR
2245 if (loop->inner)
2246 {
73fbfcad 2247 if (dump_enabled_p ())
e645e942
TJ
2248 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2249 "Peeling for outer loop is not supported\n");
deaf836c
IR
2250 return false;
2251 }
2252
2253 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
48df3fa6 2254 }
ebfd146a
IR
2255 }
2256
2257 return true;
2258}
2259
2260
2261/* Analyze the access pattern of the data-reference DR.
2262 In case of non-consecutive accesses call vect_analyze_group_access() to
0d0293ac 2263 analyze groups of accesses. */
ebfd146a
IR
2264
2265static bool
2266vect_analyze_data_ref_access (struct data_reference *dr)
2267{
2268 tree step = DR_STEP (dr);
2269 tree scalar_type = TREE_TYPE (DR_REF (dr));
2270 gimple stmt = DR_STMT (dr);
2271 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2272 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 2273 struct loop *loop = NULL;
ebfd146a 2274
a70d6342
IR
2275 if (loop_vinfo)
2276 loop = LOOP_VINFO_LOOP (loop_vinfo);
b8698a0f 2277
a70d6342 2278 if (loop_vinfo && !step)
ebfd146a 2279 {
73fbfcad 2280 if (dump_enabled_p ())
e645e942
TJ
2281 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2282 "bad data-ref access in loop\n");
ebfd146a
IR
2283 return false;
2284 }
2285
6e8dad05 2286 /* Allow invariant loads in not nested loops. */
319e6439 2287 if (loop_vinfo && integer_zerop (step))
39becbac
RG
2288 {
2289 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
6e8dad05
RB
2290 if (nested_in_vect_loop_p (loop, stmt))
2291 {
2292 if (dump_enabled_p ())
2293 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2294 "zero step in inner loop of nest\n");
6e8dad05
RB
2295 return false;
2296 }
39becbac
RG
2297 return DR_IS_READ (dr);
2298 }
ebfd146a 2299
a70d6342 2300 if (loop && nested_in_vect_loop_p (loop, stmt))
ebfd146a
IR
2301 {
2302 /* Interleaved accesses are not yet supported within outer-loop
2303 vectorization for references in the inner-loop. */
e14c1050 2304 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
ebfd146a
IR
2305
2306 /* For the rest of the analysis we use the outer-loop step. */
2307 step = STMT_VINFO_DR_STEP (stmt_info);
319e6439 2308 if (integer_zerop (step))
ebfd146a 2309 {
73fbfcad 2310 if (dump_enabled_p ())
78c60e3d 2311 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2312 "zero step in outer loop.\n");
ebfd146a 2313 if (DR_IS_READ (dr))
b8698a0f 2314 return true;
ebfd146a
IR
2315 else
2316 return false;
2317 }
2318 }
2319
2320 /* Consecutive? */
319e6439 2321 if (TREE_CODE (step) == INTEGER_CST)
ebfd146a 2322 {
319e6439
RG
2323 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2324 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2325 || (dr_step < 0
2326 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2327 {
2328 /* Mark that it is not interleaving. */
2329 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2330 return true;
2331 }
ebfd146a
IR
2332 }
2333
a70d6342 2334 if (loop && nested_in_vect_loop_p (loop, stmt))
ebfd146a 2335 {
73fbfcad 2336 if (dump_enabled_p ())
78c60e3d 2337 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2338 "grouped access in outer loop.\n");
ebfd146a
IR
2339 return false;
2340 }
2341
319e6439
RG
2342 /* Assume this is a DR handled by non-constant strided load case. */
2343 if (TREE_CODE (step) != INTEGER_CST)
2344 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2345
ebfd146a
IR
2346 /* Not consecutive access - check if it's a part of interleaving group. */
2347 return vect_analyze_group_access (dr);
2348}
2349
839c74bc
CH
2350
2351
2352/* A helper function used in the comparator function to sort data
2353 references. T1 and T2 are two data references to be compared.
2354 The function returns -1, 0, or 1. */
2355
2356static int
2357compare_tree (tree t1, tree t2)
2358{
2359 int i, cmp;
2360 enum tree_code code;
2361 char tclass;
2362
2363 if (t1 == t2)
2364 return 0;
2365 if (t1 == NULL)
2366 return -1;
2367 if (t2 == NULL)
2368 return 1;
2369
2370
2371 if (TREE_CODE (t1) != TREE_CODE (t2))
2372 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
2373
2374 code = TREE_CODE (t1);
2375 switch (code)
2376 {
2377 /* For const values, we can just use hash values for comparisons. */
2378 case INTEGER_CST:
2379 case REAL_CST:
2380 case FIXED_CST:
2381 case STRING_CST:
2382 case COMPLEX_CST:
2383 case VECTOR_CST:
2384 {
2385 hashval_t h1 = iterative_hash_expr (t1, 0);
2386 hashval_t h2 = iterative_hash_expr (t2, 0);
2387 if (h1 != h2)
2388 return h1 < h2 ? -1 : 1;
2389 break;
2390 }
2391
2392 case SSA_NAME:
2393 cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
2394 if (cmp != 0)
2395 return cmp;
2396
2397 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
2398 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
2399 break;
2400
2401 default:
2402 tclass = TREE_CODE_CLASS (code);
2403
2404 /* For var-decl, we could compare their UIDs. */
2405 if (tclass == tcc_declaration)
2406 {
2407 if (DECL_UID (t1) != DECL_UID (t2))
2408 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
2409 break;
2410 }
2411
2412 /* For expressions with operands, compare their operands recursively. */
2413 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
2414 {
2415 cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
2416 if (cmp != 0)
2417 return cmp;
2418 }
2419 }
2420
2421 return 0;
2422}
2423
2424
5abe1e05
RB
2425/* Compare two data-references DRA and DRB to group them into chunks
2426 suitable for grouping. */
2427
2428static int
2429dr_group_sort_cmp (const void *dra_, const void *drb_)
2430{
2431 data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2432 data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
5abe1e05
RB
2433 int cmp;
2434
2435 /* Stabilize sort. */
2436 if (dra == drb)
2437 return 0;
2438
2439 /* Ordering of DRs according to base. */
2440 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
2441 {
839c74bc
CH
2442 cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb));
2443 if (cmp != 0)
2444 return cmp;
5abe1e05
RB
2445 }
2446
2447 /* And according to DR_OFFSET. */
2448 if (!dr_equal_offsets_p (dra, drb))
2449 {
839c74bc
CH
2450 cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2451 if (cmp != 0)
2452 return cmp;
5abe1e05
RB
2453 }
2454
2455 /* Put reads before writes. */
2456 if (DR_IS_READ (dra) != DR_IS_READ (drb))
2457 return DR_IS_READ (dra) ? -1 : 1;
2458
2459 /* Then sort after access size. */
2460 if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2461 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
2462 {
839c74bc
CH
2463 cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2464 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2465 if (cmp != 0)
2466 return cmp;
5abe1e05
RB
2467 }
2468
2469 /* And after step. */
2470 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2471 {
839c74bc
CH
2472 cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
2473 if (cmp != 0)
2474 return cmp;
5abe1e05
RB
2475 }
2476
2477 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2478 cmp = tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb));
2479 if (cmp == 0)
2480 return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2481 return cmp;
2482}
ebfd146a
IR
2483
2484/* Function vect_analyze_data_ref_accesses.
2485
2486 Analyze the access pattern of all the data references in the loop.
2487
2488 FORNOW: the only access pattern that is considered vectorizable is a
2489 simple step 1 (consecutive) access.
2490
2491 FORNOW: handle only arrays and pointer accesses. */
2492
2493bool
a70d6342 2494vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
2495{
2496 unsigned int i;
9771b263 2497 vec<data_reference_p> datarefs;
ebfd146a
IR
2498 struct data_reference *dr;
2499
73fbfcad 2500 if (dump_enabled_p ())
78c60e3d 2501 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2502 "=== vect_analyze_data_ref_accesses ===\n");
ebfd146a 2503
a70d6342
IR
2504 if (loop_vinfo)
2505 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2506 else
2507 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2508
5abe1e05
RB
2509 if (datarefs.is_empty ())
2510 return true;
2511
2512 /* Sort the array of datarefs to make building the interleaving chains
2513 linear. */
c3284718 2514 qsort (datarefs.address (), datarefs.length (),
5abe1e05
RB
2515 sizeof (data_reference_p), dr_group_sort_cmp);
2516
2517 /* Build the interleaving chains. */
2518 for (i = 0; i < datarefs.length () - 1;)
2519 {
2520 data_reference_p dra = datarefs[i];
2521 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2522 stmt_vec_info lastinfo = NULL;
2523 for (i = i + 1; i < datarefs.length (); ++i)
2524 {
2525 data_reference_p drb = datarefs[i];
2526 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2527
2528 /* ??? Imperfect sorting (non-compatible types, non-modulo
2529 accesses, same accesses) can lead to a group to be artificially
2530 split here as we don't just skip over those. If it really
2531 matters we can push those to a worklist and re-iterate
2532 over them. The we can just skip ahead to the next DR here. */
2533
2534 /* Check that the data-refs have same first location (except init)
2535 and they are both either store or load (not load and store). */
2536 if (DR_IS_READ (dra) != DR_IS_READ (drb)
2537 || !operand_equal_p (DR_BASE_ADDRESS (dra),
2538 DR_BASE_ADDRESS (drb), 0)
2539 || !dr_equal_offsets_p (dra, drb))
2540 break;
2541
2542 /* Check that the data-refs have the same constant size and step. */
2543 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2544 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
2545 if (!host_integerp (sza, 1)
2546 || !host_integerp (szb, 1)
2547 || !tree_int_cst_equal (sza, szb)
2548 || !host_integerp (DR_STEP (dra), 0)
2549 || !host_integerp (DR_STEP (drb), 0)
2550 || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
2551 break;
2552
2553 /* Do not place the same access in the interleaving chain twice. */
2554 if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
2555 break;
2556
2557 /* Check the types are compatible.
2558 ??? We don't distinguish this during sorting. */
2559 if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2560 TREE_TYPE (DR_REF (drb))))
2561 break;
2562
2563 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
2564 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2565 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
2566 gcc_assert (init_a < init_b);
2567
2568 /* If init_b == init_a + the size of the type * k, we have an
2569 interleaving, and DRA is accessed before DRB. */
2570 HOST_WIDE_INT type_size_a = TREE_INT_CST_LOW (sza);
2571 if ((init_b - init_a) % type_size_a != 0)
2572 break;
2573
2574 /* The step (if not zero) is greater than the difference between
2575 data-refs' inits. This splits groups into suitable sizes. */
2576 HOST_WIDE_INT step = TREE_INT_CST_LOW (DR_STEP (dra));
2577 if (step != 0 && step <= (init_b - init_a))
2578 break;
2579
2580 if (dump_enabled_p ())
2581 {
2582 dump_printf_loc (MSG_NOTE, vect_location,
2583 "Detected interleaving ");
2584 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2585 dump_printf (MSG_NOTE, " and ");
2586 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 2587 dump_printf (MSG_NOTE, "\n");
5abe1e05
RB
2588 }
2589
2590 /* Link the found element into the group list. */
2591 if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
2592 {
2593 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
2594 lastinfo = stmtinfo_a;
2595 }
2596 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
2597 GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
2598 lastinfo = stmtinfo_b;
2599 }
2600 }
2601
9771b263 2602 FOR_EACH_VEC_ELT (datarefs, i, dr)
4b5caab7
IR
2603 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2604 && !vect_analyze_data_ref_access (dr))
ebfd146a 2605 {
73fbfcad 2606 if (dump_enabled_p ())
e645e942
TJ
2607 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2608 "not vectorized: complicated access pattern.\n");
4b5caab7
IR
2609
2610 if (bb_vinfo)
2611 {
2612 /* Mark the statement as not vectorizable. */
2613 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2614 continue;
2615 }
2616 else
2617 return false;
ebfd146a
IR
2618 }
2619
2620 return true;
2621}
2622
a05a89fa
CH
2623
2624/* Operator == between two dr_addr_with_seg_len objects.
2625
2626 This equality operator is used to make sure two data refs
2627 are the same one so that we will consider to combine the
2628 aliasing checks of those two pairs of data dependent data
2629 refs. */
2630
2631static bool
2632operator == (const dr_addr_with_seg_len& d1,
2633 const dr_addr_with_seg_len& d2)
2634{
2635 return operand_equal_p (d1.basic_addr, d2.basic_addr, 0)
2636 && compare_tree (d1.offset, d2.offset) == 0
2637 && compare_tree (d1.seg_len, d2.seg_len) == 0;
2638}
2639
2640/* Function comp_dr_addr_with_seg_len_pair.
2641
2642 Comparison function for sorting objects of dr_addr_with_seg_len_pair_t
2643 so that we can combine aliasing checks in one scan. */
2644
2645static int
2646comp_dr_addr_with_seg_len_pair (const void *p1_, const void *p2_)
2647{
2648 const dr_addr_with_seg_len_pair_t* p1 =
2649 (const dr_addr_with_seg_len_pair_t *) p1_;
2650 const dr_addr_with_seg_len_pair_t* p2 =
2651 (const dr_addr_with_seg_len_pair_t *) p2_;
2652
2653 const dr_addr_with_seg_len &p11 = p1->first,
2654 &p12 = p1->second,
2655 &p21 = p2->first,
2656 &p22 = p2->second;
2657
2658 int comp_res = compare_tree (p11.basic_addr, p21.basic_addr);
2659 if (comp_res != 0)
2660 return comp_res;
2661
2662 comp_res = compare_tree (p12.basic_addr, p22.basic_addr);
2663 if (comp_res != 0)
2664 return comp_res;
2665
2666 if (TREE_CODE (p11.offset) != INTEGER_CST
2667 || TREE_CODE (p21.offset) != INTEGER_CST)
2668 {
2669 comp_res = compare_tree (p11.offset, p21.offset);
2670 if (comp_res != 0)
2671 return comp_res;
2672 }
1e563667 2673 else if (tree_int_cst_compare (p11.offset, p21.offset) < 0)
a05a89fa 2674 return -1;
1e563667 2675 else if (tree_int_cst_compare (p11.offset, p21.offset) > 0)
a05a89fa
CH
2676 return 1;
2677 if (TREE_CODE (p12.offset) != INTEGER_CST
2678 || TREE_CODE (p22.offset) != INTEGER_CST)
2679 {
2680 comp_res = compare_tree (p12.offset, p22.offset);
2681 if (comp_res != 0)
2682 return comp_res;
2683 }
1e563667 2684 else if (tree_int_cst_compare (p12.offset, p22.offset) < 0)
a05a89fa 2685 return -1;
1e563667 2686 else if (tree_int_cst_compare (p12.offset, p22.offset) > 0)
a05a89fa
CH
2687 return 1;
2688
2689 return 0;
2690}
2691
2692template <class T> static void
2693swap (T& a, T& b)
2694{
2695 T c (a);
2696 a = b;
2697 b = c;
2698}
2699
2700/* Function vect_vfa_segment_size.
2701
2702 Create an expression that computes the size of segment
2703 that will be accessed for a data reference. The functions takes into
2704 account that realignment loads may access one more vector.
2705
2706 Input:
2707 DR: The data reference.
2708 LENGTH_FACTOR: segment length to consider.
2709
2710 Return an expression whose value is the size of segment which will be
2711 accessed by DR. */
2712
2713static tree
2714vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2715{
2716 tree segment_length;
2717
2718 if (integer_zerop (DR_STEP (dr)))
2719 segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2720 else
2721 segment_length = size_binop (MULT_EXPR,
2722 fold_convert (sizetype, DR_STEP (dr)),
2723 fold_convert (sizetype, length_factor));
2724
2725 if (vect_supportable_dr_alignment (dr, false)
2726 == dr_explicit_realign_optimized)
2727 {
2728 tree vector_size = TYPE_SIZE_UNIT
2729 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2730
2731 segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2732 }
2733 return segment_length;
2734}
2735
ebfd146a
IR
2736/* Function vect_prune_runtime_alias_test_list.
2737
2738 Prune a list of ddrs to be tested at run-time by versioning for alias.
a05a89fa 2739 Merge several alias checks into one if possible.
ebfd146a
IR
2740 Return FALSE if resulting list of ddrs is longer then allowed by
2741 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2742
2743bool
2744vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2745{
a05a89fa 2746 vec<ddr_p> may_alias_ddrs =
ebfd146a 2747 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
a05a89fa
CH
2748 vec<dr_addr_with_seg_len_pair_t>& comp_alias_ddrs =
2749 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2750 int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2751 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2752
2753 ddr_p ddr;
2754 unsigned int i;
2755 tree length_factor;
ebfd146a 2756
73fbfcad 2757 if (dump_enabled_p ())
78c60e3d 2758 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2759 "=== vect_prune_runtime_alias_test_list ===\n");
ebfd146a 2760
a05a89fa
CH
2761 if (may_alias_ddrs.is_empty ())
2762 return true;
2763
2764 /* Basically, for each pair of dependent data refs store_ptr_0
2765 and load_ptr_0, we create an expression:
2766
2767 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2768 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2769
2770 for aliasing checks. However, in some cases we can decrease
2771 the number of checks by combining two checks into one. For
2772 example, suppose we have another pair of data refs store_ptr_0
2773 and load_ptr_1, and if the following condition is satisfied:
2774
2775 load_ptr_0 < load_ptr_1 &&
2776 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
2777
2778 (this condition means, in each iteration of vectorized loop,
2779 the accessed memory of store_ptr_0 cannot be between the memory
2780 of load_ptr_0 and load_ptr_1.)
2781
2782 we then can use only the following expression to finish the
2783 alising checks between store_ptr_0 & load_ptr_0 and
2784 store_ptr_0 & load_ptr_1:
2785
2786 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2787 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
2788
2789 Note that we only consider that load_ptr_0 and load_ptr_1 have the
2790 same basic address. */
2791
2792 comp_alias_ddrs.create (may_alias_ddrs.length ());
2793
2794 /* First, we collect all data ref pairs for aliasing checks. */
2795 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
ebfd146a 2796 {
a05a89fa
CH
2797 struct data_reference *dr_a, *dr_b;
2798 gimple dr_group_first_a, dr_group_first_b;
2799 tree segment_length_a, segment_length_b;
2800 gimple stmt_a, stmt_b;
2801
2802 dr_a = DDR_A (ddr);
2803 stmt_a = DR_STMT (DDR_A (ddr));
2804 dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2805 if (dr_group_first_a)
2806 {
2807 stmt_a = dr_group_first_a;
2808 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2809 }
ebfd146a 2810
a05a89fa
CH
2811 dr_b = DDR_B (ddr);
2812 stmt_b = DR_STMT (DDR_B (ddr));
2813 dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2814 if (dr_group_first_b)
2815 {
2816 stmt_b = dr_group_first_b;
2817 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2818 }
ebfd146a 2819
a05a89fa
CH
2820 if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2821 length_factor = scalar_loop_iters;
2822 else
2823 length_factor = size_int (vect_factor);
2824 segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2825 segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2826
2827 dr_addr_with_seg_len_pair_t dr_with_seg_len_pair
2828 (dr_addr_with_seg_len
2829 (dr_a, DR_BASE_ADDRESS (dr_a),
2830 size_binop (PLUS_EXPR, DR_OFFSET (dr_a), DR_INIT (dr_a)),
2831 segment_length_a),
2832 dr_addr_with_seg_len
2833 (dr_b, DR_BASE_ADDRESS (dr_b),
2834 size_binop (PLUS_EXPR, DR_OFFSET (dr_b), DR_INIT (dr_b)),
2835 segment_length_b));
2836
2837 if (compare_tree (dr_with_seg_len_pair.first.basic_addr,
2838 dr_with_seg_len_pair.second.basic_addr) > 0)
2839 swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
2840
2841 comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
2842 }
2843
2844 /* Second, we sort the collected data ref pairs so that we can scan
2845 them once to combine all possible aliasing checks. */
2846 comp_alias_ddrs.qsort (comp_dr_addr_with_seg_len_pair);
ebfd146a 2847
a05a89fa
CH
2848 /* Third, we scan the sorted dr pairs and check if we can combine
2849 alias checks of two neighbouring dr pairs. */
2850 for (size_t i = 1; i < comp_alias_ddrs.length (); ++i)
2851 {
2852 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
2853 dr_addr_with_seg_len *dr_a1 = &comp_alias_ddrs[i-1].first,
2854 *dr_b1 = &comp_alias_ddrs[i-1].second,
2855 *dr_a2 = &comp_alias_ddrs[i].first,
2856 *dr_b2 = &comp_alias_ddrs[i].second;
2857
2858 /* Remove duplicate data ref pairs. */
2859 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
2860 {
2861 if (dump_enabled_p ())
ebfd146a 2862 {
a05a89fa
CH
2863 dump_printf_loc (MSG_NOTE, vect_location,
2864 "found equal ranges ");
2865 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2866 DR_REF (dr_a1->dr));
2867 dump_printf (MSG_NOTE, ", ");
2868 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2869 DR_REF (dr_b1->dr));
2870 dump_printf (MSG_NOTE, " and ");
2871 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2872 DR_REF (dr_a2->dr));
2873 dump_printf (MSG_NOTE, ", ");
2874 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2875 DR_REF (dr_b2->dr));
2876 dump_printf (MSG_NOTE, "\n");
ebfd146a 2877 }
a05a89fa
CH
2878
2879 comp_alias_ddrs.ordered_remove (i--);
2880 continue;
ebfd146a 2881 }
b8698a0f 2882
a05a89fa
CH
2883 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
2884 {
2885 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
2886 and DR_A1 and DR_A2 are two consecutive memrefs. */
2887 if (*dr_a1 == *dr_a2)
2888 {
2889 swap (dr_a1, dr_b1);
2890 swap (dr_a2, dr_b2);
2891 }
2892
2893 if (!operand_equal_p (dr_a1->basic_addr, dr_a2->basic_addr, 0)
2894 || !host_integerp (dr_a1->offset, 0)
2895 || !host_integerp (dr_a2->offset, 0))
2896 continue;
2897
2898 HOST_WIDE_INT diff = TREE_INT_CST_LOW (dr_a2->offset) -
2899 TREE_INT_CST_LOW (dr_a1->offset);
2900
2901
2902 /* Now we check if the following condition is satisfied:
2903
2904 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
2905
2906 where DIFF = DR_A2->OFFSET - DR_A1->OFFSET. However,
2907 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
2908 have to make a best estimation. We can get the minimum value
2909 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
2910 then either of the following two conditions can guarantee the
2911 one above:
2912
2913 1: DIFF <= MIN_SEG_LEN_B
2914 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
2915
2916 */
2917
2918 HOST_WIDE_INT
2919 min_seg_len_b = (TREE_CODE (dr_b1->seg_len) == INTEGER_CST) ?
2920 TREE_INT_CST_LOW (dr_b1->seg_len) :
2921 vect_factor;
2922
2923 if (diff <= min_seg_len_b
2924 || (TREE_CODE (dr_a1->seg_len) == INTEGER_CST
2925 && diff - (HOST_WIDE_INT) TREE_INT_CST_LOW (dr_a1->seg_len) <
2926 min_seg_len_b))
2927 {
2928 dr_a1->seg_len = size_binop (PLUS_EXPR,
2929 dr_a2->seg_len, size_int (diff));
2930 comp_alias_ddrs.ordered_remove (i--);
2931 }
2932 }
ebfd146a
IR
2933 }
2934
a05a89fa
CH
2935 if ((int) comp_alias_ddrs.length () >
2936 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
ebfd146a 2937 {
73fbfcad 2938 if (dump_enabled_p ())
ebfd146a 2939 {
e645e942
TJ
2940 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2941 "disable versioning for alias - max number of "
2942 "generated checks exceeded.\n");
ebfd146a
IR
2943 }
2944
ebfd146a
IR
2945 return false;
2946 }
2947
2948 return true;
2949}
2950
aec7ae7d
JJ
2951/* Check whether a non-affine read in stmt is suitable for gather load
2952 and if so, return a builtin decl for that operation. */
2953
2954tree
2955vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2956 tree *offp, int *scalep)
2957{
2958 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2959 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2960 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2961 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2962 tree offtype = NULL_TREE;
2963 tree decl, base, off;
2964 enum machine_mode pmode;
2965 int punsignedp, pvolatilep;
2966
2967 /* The gather builtins need address of the form
2968 loop_invariant + vector * {1, 2, 4, 8}
2969 or
2970 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2971 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2972 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2973 multiplications and additions in it. To get a vector, we need
2974 a single SSA_NAME that will be defined in the loop and will
2975 contain everything that is not loop invariant and that can be
2976 vectorized. The following code attempts to find such a preexistng
2977 SSA_NAME OFF and put the loop invariants into a tree BASE
2978 that can be gimplified before the loop. */
2979 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2980 &pmode, &punsignedp, &pvolatilep, false);
2981 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2982
2983 if (TREE_CODE (base) == MEM_REF)
2984 {
2985 if (!integer_zerop (TREE_OPERAND (base, 1)))
2986 {
2987 if (off == NULL_TREE)
2988 {
2989 double_int moff = mem_ref_offset (base);
2990 off = double_int_to_tree (sizetype, moff);
2991 }
2992 else
2993 off = size_binop (PLUS_EXPR, off,
2994 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2995 }
2996 base = TREE_OPERAND (base, 0);
2997 }
2998 else
2999 base = build_fold_addr_expr (base);
3000
3001 if (off == NULL_TREE)
3002 off = size_zero_node;
3003
3004 /* If base is not loop invariant, either off is 0, then we start with just
3005 the constant offset in the loop invariant BASE and continue with base
3006 as OFF, otherwise give up.
3007 We could handle that case by gimplifying the addition of base + off
3008 into some SSA_NAME and use that as off, but for now punt. */
3009 if (!expr_invariant_in_loop_p (loop, base))
3010 {
3011 if (!integer_zerop (off))
3012 return NULL_TREE;
3013 off = base;
3014 base = size_int (pbitpos / BITS_PER_UNIT);
3015 }
3016 /* Otherwise put base + constant offset into the loop invariant BASE
3017 and continue with OFF. */
3018 else
3019 {
3020 base = fold_convert (sizetype, base);
3021 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
3022 }
3023
3024 /* OFF at this point may be either a SSA_NAME or some tree expression
3025 from get_inner_reference. Try to peel off loop invariants from it
3026 into BASE as long as possible. */
3027 STRIP_NOPS (off);
3028 while (offtype == NULL_TREE)
3029 {
3030 enum tree_code code;
3031 tree op0, op1, add = NULL_TREE;
3032
3033 if (TREE_CODE (off) == SSA_NAME)
3034 {
3035 gimple def_stmt = SSA_NAME_DEF_STMT (off);
3036
3037 if (expr_invariant_in_loop_p (loop, off))
3038 return NULL_TREE;
3039
3040 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3041 break;
3042
3043 op0 = gimple_assign_rhs1 (def_stmt);
3044 code = gimple_assign_rhs_code (def_stmt);
3045 op1 = gimple_assign_rhs2 (def_stmt);
3046 }
3047 else
3048 {
3049 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
3050 return NULL_TREE;
3051 code = TREE_CODE (off);
3052 extract_ops_from_tree (off, &code, &op0, &op1);
3053 }
3054 switch (code)
3055 {
3056 case POINTER_PLUS_EXPR:
3057 case PLUS_EXPR:
3058 if (expr_invariant_in_loop_p (loop, op0))
3059 {
3060 add = op0;
3061 off = op1;
3062 do_add:
3063 add = fold_convert (sizetype, add);
3064 if (scale != 1)
3065 add = size_binop (MULT_EXPR, add, size_int (scale));
3066 base = size_binop (PLUS_EXPR, base, add);
3067 continue;
3068 }
3069 if (expr_invariant_in_loop_p (loop, op1))
3070 {
3071 add = op1;
3072 off = op0;
3073 goto do_add;
3074 }
3075 break;
3076 case MINUS_EXPR:
3077 if (expr_invariant_in_loop_p (loop, op1))
3078 {
3079 add = fold_convert (sizetype, op1);
3080 add = size_binop (MINUS_EXPR, size_zero_node, add);
3081 off = op0;
3082 goto do_add;
3083 }
3084 break;
3085 case MULT_EXPR:
3086 if (scale == 1 && host_integerp (op1, 0))
3087 {
3088 scale = tree_low_cst (op1, 0);
3089 off = op0;
3090 continue;
3091 }
3092 break;
3093 case SSA_NAME:
3094 off = op0;
3095 continue;
3096 CASE_CONVERT:
3097 if (!POINTER_TYPE_P (TREE_TYPE (op0))
3098 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3099 break;
3100 if (TYPE_PRECISION (TREE_TYPE (op0))
3101 == TYPE_PRECISION (TREE_TYPE (off)))
3102 {
3103 off = op0;
3104 continue;
3105 }
3106 if (TYPE_PRECISION (TREE_TYPE (op0))
3107 < TYPE_PRECISION (TREE_TYPE (off)))
3108 {
3109 off = op0;
3110 offtype = TREE_TYPE (off);
3111 STRIP_NOPS (off);
3112 continue;
3113 }
3114 break;
3115 default:
3116 break;
3117 }
3118 break;
3119 }
3120
3121 /* If at the end OFF still isn't a SSA_NAME or isn't
3122 defined in the loop, punt. */
3123 if (TREE_CODE (off) != SSA_NAME
3124 || expr_invariant_in_loop_p (loop, off))
3125 return NULL_TREE;
3126
3127 if (offtype == NULL_TREE)
3128 offtype = TREE_TYPE (off);
3129
3130 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
3131 offtype, scale);
3132 if (decl == NULL_TREE)
3133 return NULL_TREE;
3134
3135 if (basep)
3136 *basep = base;
3137 if (offp)
3138 *offp = off;
3139 if (scalep)
3140 *scalep = scale;
3141 return decl;
3142}
3143
ebfd146a
IR
3144/* Function vect_analyze_data_refs.
3145
a70d6342 3146 Find all the data references in the loop or basic block.
ebfd146a
IR
3147
3148 The general structure of the analysis of data refs in the vectorizer is as
3149 follows:
b8698a0f 3150 1- vect_analyze_data_refs(loop/bb): call
a70d6342
IR
3151 compute_data_dependences_for_loop/bb to find and analyze all data-refs
3152 in the loop/bb and their dependences.
ebfd146a
IR
3153 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3154 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3155 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3156
3157*/
3158
3159bool
777e1f09
RG
3160vect_analyze_data_refs (loop_vec_info loop_vinfo,
3161 bb_vec_info bb_vinfo,
3162 int *min_vf)
ebfd146a 3163{
a70d6342
IR
3164 struct loop *loop = NULL;
3165 basic_block bb = NULL;
ebfd146a 3166 unsigned int i;
9771b263 3167 vec<data_reference_p> datarefs;
ebfd146a
IR
3168 struct data_reference *dr;
3169 tree scalar_type;
3170
73fbfcad 3171 if (dump_enabled_p ())
78c60e3d
SS
3172 dump_printf_loc (MSG_NOTE, vect_location,
3173 "=== vect_analyze_data_refs ===\n");
b8698a0f 3174
a70d6342
IR
3175 if (loop_vinfo)
3176 {
3177 loop = LOOP_VINFO_LOOP (loop_vinfo);
fcac74a1
RB
3178 if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo))
3179 || find_data_references_in_loop
3180 (loop, &LOOP_VINFO_DATAREFS (loop_vinfo)))
22a8be9e 3181 {
73fbfcad 3182 if (dump_enabled_p ())
e645e942
TJ
3183 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3184 "not vectorized: loop contains function calls"
3185 " or data references that cannot be analyzed\n");
22a8be9e
SP
3186 return false;
3187 }
3188
a70d6342
IR
3189 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3190 }
3191 else
3192 {
1aedeafe
RG
3193 gimple_stmt_iterator gsi;
3194
a70d6342 3195 bb = BB_VINFO_BB (bb_vinfo);
1aedeafe
RG
3196 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3197 {
3198 gimple stmt = gsi_stmt (gsi);
3199 if (!find_data_references_in_stmt (NULL, stmt,
3200 &BB_VINFO_DATAREFS (bb_vinfo)))
3201 {
3202 /* Mark the rest of the basic-block as unvectorizable. */
3203 for (; !gsi_end_p (gsi); gsi_next (&gsi))
d4d5e146
RG
3204 {
3205 stmt = gsi_stmt (gsi);
3206 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
3207 }
1aedeafe
RG
3208 break;
3209 }
3210 }
22a8be9e 3211
a70d6342
IR
3212 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
3213 }
ebfd146a 3214
ff802fa1
IR
3215 /* Go through the data-refs, check that the analysis succeeded. Update
3216 pointer from stmt_vec_info struct to DR and vectype. */
ebfd146a 3217
9771b263 3218 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
3219 {
3220 gimple stmt;
3221 stmt_vec_info stmt_info;
b8698a0f 3222 tree base, offset, init;
aec7ae7d 3223 bool gather = false;
74bf76ed 3224 bool simd_lane_access = false;
777e1f09 3225 int vf;
b8698a0f 3226
fbd7e877 3227again:
ebfd146a
IR
3228 if (!dr || !DR_REF (dr))
3229 {
73fbfcad 3230 if (dump_enabled_p ())
78c60e3d 3231 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3232 "not vectorized: unhandled data-ref\n");
ebfd146a
IR
3233 return false;
3234 }
3235
3236 stmt = DR_STMT (dr);
3237 stmt_info = vinfo_for_stmt (stmt);
3238
fbd7e877
RB
3239 /* Discard clobbers from the dataref vector. We will remove
3240 clobber stmts during vectorization. */
3241 if (gimple_clobber_p (stmt))
3242 {
3243 if (i == datarefs.length () - 1)
3244 {
3245 datarefs.pop ();
3246 break;
3247 }
3248 datarefs[i] = datarefs.pop ();
3249 goto again;
3250 }
3251
ebfd146a
IR
3252 /* Check that analysis of the data-ref succeeded. */
3253 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
aec7ae7d 3254 || !DR_STEP (dr))
ebfd146a 3255 {
74bf76ed
JJ
3256 bool maybe_gather
3257 = DR_IS_READ (dr)
aec7ae7d 3258 && !TREE_THIS_VOLATILE (DR_REF (dr))
74bf76ed
JJ
3259 && targetm.vectorize.builtin_gather != NULL;
3260 bool maybe_simd_lane_access
3261 = loop_vinfo && loop->simduid;
3262
3263 /* If target supports vector gather loads, or if this might be
3264 a SIMD lane access, see if they can't be used. */
3265 if (loop_vinfo
3266 && (maybe_gather || maybe_simd_lane_access)
aec7ae7d
JJ
3267 && !nested_in_vect_loop_p (loop, stmt))
3268 {
3269 struct data_reference *newdr
3270 = create_data_ref (NULL, loop_containing_stmt (stmt),
3271 DR_REF (dr), stmt, true);
3272 gcc_assert (newdr != NULL && DR_REF (newdr));
3273 if (DR_BASE_ADDRESS (newdr)
3274 && DR_OFFSET (newdr)
3275 && DR_INIT (newdr)
3276 && DR_STEP (newdr)
3277 && integer_zerop (DR_STEP (newdr)))
3278 {
74bf76ed
JJ
3279 if (maybe_simd_lane_access)
3280 {
3281 tree off = DR_OFFSET (newdr);
3282 STRIP_NOPS (off);
3283 if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
3284 && TREE_CODE (off) == MULT_EXPR
3285 && host_integerp (TREE_OPERAND (off, 1), 1))
3286 {
3287 tree step = TREE_OPERAND (off, 1);
3288 off = TREE_OPERAND (off, 0);
3289 STRIP_NOPS (off);
3290 if (CONVERT_EXPR_P (off)
3291 && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
3292 0)))
3293 < TYPE_PRECISION (TREE_TYPE (off)))
3294 off = TREE_OPERAND (off, 0);
3295 if (TREE_CODE (off) == SSA_NAME)
3296 {
3297 gimple def = SSA_NAME_DEF_STMT (off);
3298 tree reft = TREE_TYPE (DR_REF (newdr));
3299 if (gimple_call_internal_p (def)
3300 && gimple_call_internal_fn (def)
3301 == IFN_GOMP_SIMD_LANE)
3302 {
3303 tree arg = gimple_call_arg (def, 0);
3304 gcc_assert (TREE_CODE (arg) == SSA_NAME);
3305 arg = SSA_NAME_VAR (arg);
3306 if (arg == loop->simduid
3307 /* For now. */
3308 && tree_int_cst_equal
3309 (TYPE_SIZE_UNIT (reft),
3310 step))
3311 {
3312 DR_OFFSET (newdr) = ssize_int (0);
3313 DR_STEP (newdr) = step;
995a1b4a
JJ
3314 DR_ALIGNED_TO (newdr)
3315 = size_int (BIGGEST_ALIGNMENT);
74bf76ed
JJ
3316 dr = newdr;
3317 simd_lane_access = true;
3318 }
3319 }
3320 }
3321 }
3322 }
3323 if (!simd_lane_access && maybe_gather)
3324 {
3325 dr = newdr;
3326 gather = true;
3327 }
aec7ae7d 3328 }
74bf76ed 3329 if (!gather && !simd_lane_access)
aec7ae7d
JJ
3330 free_data_ref (newdr);
3331 }
4b5caab7 3332
74bf76ed 3333 if (!gather && !simd_lane_access)
aec7ae7d 3334 {
73fbfcad 3335 if (dump_enabled_p ())
aec7ae7d 3336 {
e645e942 3337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
3338 "not vectorized: data ref analysis "
3339 "failed ");
3340 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3341 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
aec7ae7d 3342 }
ba65ae42 3343
aec7ae7d 3344 if (bb_vinfo)
fcac74a1 3345 break;
aec7ae7d
JJ
3346
3347 return false;
3348 }
ebfd146a
IR
3349 }
3350
3351 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3352 {
73fbfcad 3353 if (dump_enabled_p ())
78c60e3d
SS
3354 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3355 "not vectorized: base addr of dr is a "
e645e942 3356 "constant\n");
ba65ae42
IR
3357
3358 if (bb_vinfo)
fcac74a1 3359 break;
ba65ae42 3360
74bf76ed 3361 if (gather || simd_lane_access)
aec7ae7d
JJ
3362 free_data_ref (dr);
3363 return false;
ebfd146a
IR
3364 }
3365
8f7de592
IR
3366 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3367 {
73fbfcad 3368 if (dump_enabled_p ())
8f7de592 3369 {
78c60e3d
SS
3370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3371 "not vectorized: volatile type ");
3372 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3373 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8f7de592 3374 }
ba65ae42
IR
3375
3376 if (bb_vinfo)
fcac74a1 3377 break;
ba65ae42 3378
8f7de592
IR
3379 return false;
3380 }
3381
822ba6d7 3382 if (stmt_can_throw_internal (stmt))
5a2c1986 3383 {
73fbfcad 3384 if (dump_enabled_p ())
5a2c1986 3385 {
78c60e3d
SS
3386 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3387 "not vectorized: statement can throw an "
3388 "exception ");
3389 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3390 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5a2c1986 3391 }
ba65ae42
IR
3392
3393 if (bb_vinfo)
fcac74a1 3394 break;
ba65ae42 3395
74bf76ed 3396 if (gather || simd_lane_access)
aec7ae7d 3397 free_data_ref (dr);
5a2c1986
IR
3398 return false;
3399 }
3400
508ef0c6
RG
3401 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3402 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3403 {
73fbfcad 3404 if (dump_enabled_p ())
508ef0c6 3405 {
78c60e3d
SS
3406 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3407 "not vectorized: statement is bitfield "
3408 "access ");
3409 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3410 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
508ef0c6
RG
3411 }
3412
3413 if (bb_vinfo)
fcac74a1 3414 break;
508ef0c6 3415
74bf76ed 3416 if (gather || simd_lane_access)
508ef0c6
RG
3417 free_data_ref (dr);
3418 return false;
3419 }
3420
3421 base = unshare_expr (DR_BASE_ADDRESS (dr));
3422 offset = unshare_expr (DR_OFFSET (dr));
3423 init = unshare_expr (DR_INIT (dr));
3424
9c239085
JJ
3425 if (is_gimple_call (stmt))
3426 {
73fbfcad 3427 if (dump_enabled_p ())
9c239085 3428 {
78c60e3d 3429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3430 "not vectorized: dr in a call ");
78c60e3d 3431 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3432 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
9c239085
JJ
3433 }
3434
3435 if (bb_vinfo)
fcac74a1 3436 break;
9c239085 3437
74bf76ed 3438 if (gather || simd_lane_access)
9c239085
JJ
3439 free_data_ref (dr);
3440 return false;
3441 }
3442
ebfd146a 3443 /* Update DR field in stmt_vec_info struct. */
ebfd146a
IR
3444
3445 /* If the dataref is in an inner-loop of the loop that is considered for
3446 for vectorization, we also want to analyze the access relative to
b8698a0f 3447 the outer-loop (DR contains information only relative to the
ebfd146a
IR
3448 inner-most enclosing loop). We do that by building a reference to the
3449 first location accessed by the inner-loop, and analyze it relative to
b8698a0f
L
3450 the outer-loop. */
3451 if (loop && nested_in_vect_loop_p (loop, stmt))
ebfd146a
IR
3452 {
3453 tree outer_step, outer_base, outer_init;
3454 HOST_WIDE_INT pbitsize, pbitpos;
3455 tree poffset;
3456 enum machine_mode pmode;
3457 int punsignedp, pvolatilep;
3458 affine_iv base_iv, offset_iv;
3459 tree dinit;
3460
b8698a0f 3461 /* Build a reference to the first location accessed by the
ff802fa1 3462 inner-loop: *(BASE+INIT). (The first location is actually
ebfd146a
IR
3463 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3464 tree inner_base = build_fold_indirect_ref
5d49b6a7 3465 (fold_build_pointer_plus (base, init));
ebfd146a 3466
73fbfcad 3467 if (dump_enabled_p ())
ebfd146a 3468 {
78c60e3d
SS
3469 dump_printf_loc (MSG_NOTE, vect_location,
3470 "analyze in outer-loop: ");
3471 dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
e645e942 3472 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
3473 }
3474
b8698a0f 3475 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
ebfd146a
IR
3476 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3477 gcc_assert (outer_base != NULL_TREE);
3478
3479 if (pbitpos % BITS_PER_UNIT != 0)
3480 {
73fbfcad 3481 if (dump_enabled_p ())
78c60e3d
SS
3482 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3483 "failed: bit offset alignment.\n");
ebfd146a
IR
3484 return false;
3485 }
3486
3487 outer_base = build_fold_addr_expr (outer_base);
b8698a0f 3488 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
ebfd146a
IR
3489 &base_iv, false))
3490 {
73fbfcad 3491 if (dump_enabled_p ())
e645e942 3492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 3493 "failed: evolution of base is not affine.\n");
ebfd146a
IR
3494 return false;
3495 }
3496
3497 if (offset)
3498 {
3499 if (poffset)
b8698a0f 3500 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
ebfd146a
IR
3501 poffset);
3502 else
3503 poffset = offset;
3504 }
3505
3506 if (!poffset)
3507 {
3508 offset_iv.base = ssize_int (0);
3509 offset_iv.step = ssize_int (0);
3510 }
b8698a0f 3511 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
ebfd146a
IR
3512 &offset_iv, false))
3513 {
73fbfcad 3514 if (dump_enabled_p ())
e645e942 3515 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 3516 "evolution of offset is not affine.\n");
ebfd146a
IR
3517 return false;
3518 }
3519
3520 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3521 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3522 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3523 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3524 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3525
3526 outer_step = size_binop (PLUS_EXPR,
3527 fold_convert (ssizetype, base_iv.step),
3528 fold_convert (ssizetype, offset_iv.step));
3529
3530 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3531 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
b8698a0f 3532 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
ebfd146a 3533 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
b8698a0f 3534 STMT_VINFO_DR_OFFSET (stmt_info) =
ebfd146a 3535 fold_convert (ssizetype, offset_iv.base);
b8698a0f 3536 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
ebfd146a
IR
3537 size_int (highest_pow2_factor (offset_iv.base));
3538
73fbfcad 3539 if (dump_enabled_p ())
ebfd146a 3540 {
78c60e3d
SS
3541 dump_printf_loc (MSG_NOTE, vect_location,
3542 "\touter base_address: ");
3543 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3544 STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3545 dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3546 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3547 STMT_VINFO_DR_OFFSET (stmt_info));
3548 dump_printf (MSG_NOTE,
3549 "\n\touter constant offset from base address: ");
3550 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3551 STMT_VINFO_DR_INIT (stmt_info));
3552 dump_printf (MSG_NOTE, "\n\touter step: ");
3553 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3554 STMT_VINFO_DR_STEP (stmt_info));
3555 dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3556 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3557 STMT_VINFO_DR_ALIGNED_TO (stmt_info));
e645e942 3558 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
3559 }
3560 }
3561
3562 if (STMT_VINFO_DATA_REF (stmt_info))
3563 {
73fbfcad 3564 if (dump_enabled_p ())
ebfd146a 3565 {
78c60e3d
SS
3566 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3567 "not vectorized: more than one data ref "
3568 "in stmt: ");
3569 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3570 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 3571 }
ba65ae42
IR
3572
3573 if (bb_vinfo)
fcac74a1 3574 break;
ba65ae42 3575
74bf76ed 3576 if (gather || simd_lane_access)
aec7ae7d 3577 free_data_ref (dr);
ebfd146a
IR
3578 return false;
3579 }
8644a673 3580
ebfd146a 3581 STMT_VINFO_DATA_REF (stmt_info) = dr;
74bf76ed
JJ
3582 if (simd_lane_access)
3583 {
3584 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
3585 datarefs[i] = dr;
3586 }
b8698a0f 3587
ebfd146a
IR
3588 /* Set vectype for STMT. */
3589 scalar_type = TREE_TYPE (DR_REF (dr));
3590 STMT_VINFO_VECTYPE (stmt_info) =
3591 get_vectype_for_scalar_type (scalar_type);
b8698a0f 3592 if (!STMT_VINFO_VECTYPE (stmt_info))
ebfd146a 3593 {
73fbfcad 3594 if (dump_enabled_p ())
ebfd146a 3595 {
e645e942 3596 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
3597 "not vectorized: no vectype for stmt: ");
3598 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3599 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3600 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3601 scalar_type);
e645e942 3602 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 3603 }
4b5caab7
IR
3604
3605 if (bb_vinfo)
fcac74a1 3606 break;
aec7ae7d 3607
74bf76ed 3608 if (gather || simd_lane_access)
aec7ae7d
JJ
3609 {
3610 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3611 free_data_ref (dr);
3612 }
3613 return false;
ebfd146a 3614 }
451dabda
RB
3615 else
3616 {
3617 if (dump_enabled_p ())
3618 {
3619 dump_printf_loc (MSG_NOTE, vect_location,
3620 "got vectype for stmt: ");
3621 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3622 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3623 STMT_VINFO_VECTYPE (stmt_info));
e645e942 3624 dump_printf (MSG_NOTE, "\n");
451dabda
RB
3625 }
3626 }
777e1f09
RG
3627
3628 /* Adjust the minimal vectorization factor according to the
3629 vector type. */
3630 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3631 if (vf > *min_vf)
3632 *min_vf = vf;
aec7ae7d
JJ
3633
3634 if (gather)
3635 {
aec7ae7d 3636 tree off;
aec7ae7d 3637
7d75abc8
MM
3638 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3639 if (gather
3640 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3641 gather = false;
319e6439 3642 if (!gather)
aec7ae7d 3643 {
6f723d33
JJ
3644 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3645 free_data_ref (dr);
73fbfcad 3646 if (dump_enabled_p ())
aec7ae7d 3647 {
78c60e3d
SS
3648 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3649 "not vectorized: not suitable for gather "
3650 "load ");
3651 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3652 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
aec7ae7d
JJ
3653 }
3654 return false;
3655 }
3656
9771b263 3657 datarefs[i] = dr;
319e6439
RG
3658 STMT_VINFO_GATHER_P (stmt_info) = true;
3659 }
3660 else if (loop_vinfo
3661 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3662 {
51a905b2
RB
3663 if (nested_in_vect_loop_p (loop, stmt)
3664 || !DR_IS_READ (dr))
319e6439 3665 {
73fbfcad 3666 if (dump_enabled_p ())
319e6439 3667 {
78c60e3d
SS
3668 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3669 "not vectorized: not suitable for strided "
3670 "load ");
3671 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3672 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
319e6439
RG
3673 }
3674 return false;
3675 }
3676 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
aec7ae7d 3677 }
ebfd146a 3678 }
b8698a0f 3679
fcac74a1
RB
3680 /* If we stopped analysis at the first dataref we could not analyze
3681 when trying to vectorize a basic-block mark the rest of the datarefs
3682 as not vectorizable and truncate the vector of datarefs. That
3683 avoids spending useless time in analyzing their dependence. */
3684 if (i != datarefs.length ())
3685 {
3686 gcc_assert (bb_vinfo != NULL);
3687 for (unsigned j = i; j < datarefs.length (); ++j)
3688 {
3689 data_reference_p dr = datarefs[j];
3690 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
3691 free_data_ref (dr);
3692 }
3693 datarefs.truncate (i);
3694 }
3695
ebfd146a
IR
3696 return true;
3697}
3698
3699
3700/* Function vect_get_new_vect_var.
3701
ff802fa1 3702 Returns a name for a new variable. The current naming scheme appends the
b8698a0f
L
3703 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3704 the name of vectorizer generated variables, and appends that to NAME if
ebfd146a
IR
3705 provided. */
3706
3707tree
3708vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3709{
3710 const char *prefix;
3711 tree new_vect_var;
3712
3713 switch (var_kind)
3714 {
3715 case vect_simple_var:
451dabda 3716 prefix = "vect";
ebfd146a
IR
3717 break;
3718 case vect_scalar_var:
451dabda 3719 prefix = "stmp";
ebfd146a
IR
3720 break;
3721 case vect_pointer_var:
451dabda 3722 prefix = "vectp";
ebfd146a
IR
3723 break;
3724 default:
3725 gcc_unreachable ();
3726 }
3727
3728 if (name)
3729 {
451dabda 3730 char* tmp = concat (prefix, "_", name, NULL);
65876d24 3731 new_vect_var = create_tmp_reg (type, tmp);
ebfd146a
IR
3732 free (tmp);
3733 }
3734 else
65876d24 3735 new_vect_var = create_tmp_reg (type, prefix);
ebfd146a
IR
3736
3737 return new_vect_var;
3738}
3739
3740
3741/* Function vect_create_addr_base_for_vector_ref.
3742
3743 Create an expression that computes the address of the first memory location
3744 that will be accessed for a data reference.
3745
3746 Input:
3747 STMT: The statement containing the data reference.
3748 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3749 OFFSET: Optional. If supplied, it is be added to the initial address.
3750 LOOP: Specify relative to which loop-nest should the address be computed.
3751 For example, when the dataref is in an inner-loop nested in an
3752 outer-loop that is now being vectorized, LOOP can be either the
ff802fa1 3753 outer-loop, or the inner-loop. The first memory location accessed
ebfd146a
IR
3754 by the following dataref ('in' points to short):
3755
3756 for (i=0; i<N; i++)
3757 for (j=0; j<M; j++)
3758 s += in[i+j]
3759
3760 is as follows:
3761 if LOOP=i_loop: &in (relative to i_loop)
3762 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3763
3764 Output:
b8698a0f 3765 1. Return an SSA_NAME whose value is the address of the memory location of
ebfd146a
IR
3766 the first vector of the data reference.
3767 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3768 these statement(s) which define the returned SSA_NAME.
3769
3770 FORNOW: We are only handling array accesses with step 1. */
3771
3772tree
3773vect_create_addr_base_for_vector_ref (gimple stmt,
3774 gimple_seq *new_stmt_list,
3775 tree offset,
3776 struct loop *loop)
3777{
3778 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3779 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4bdd44c4 3780 tree data_ref_base;
595c2679 3781 const char *base_name;
4bdd44c4 3782 tree addr_base;
ebfd146a
IR
3783 tree dest;
3784 gimple_seq seq = NULL;
4bdd44c4
RB
3785 tree base_offset;
3786 tree init;
8644a673 3787 tree vect_ptr_type;
ebfd146a 3788 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
a70d6342 3789 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 3790
a70d6342 3791 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
ebfd146a 3792 {
a70d6342 3793 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 3794
a70d6342 3795 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
ebfd146a
IR
3796
3797 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3798 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3799 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3800 }
4bdd44c4
RB
3801 else
3802 {
3803 data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3804 base_offset = unshare_expr (DR_OFFSET (dr));
3805 init = unshare_expr (DR_INIT (dr));
3806 }
ebfd146a 3807
a70d6342 3808 if (loop_vinfo)
595c2679 3809 base_name = get_name (data_ref_base);
a70d6342
IR
3810 else
3811 {
3812 base_offset = ssize_int (0);
3813 init = ssize_int (0);
595c2679 3814 base_name = get_name (DR_REF (dr));
b8698a0f 3815 }
a70d6342 3816
ebfd146a
IR
3817 /* Create base_offset */
3818 base_offset = size_binop (PLUS_EXPR,
3819 fold_convert (sizetype, base_offset),
3820 fold_convert (sizetype, init));
ebfd146a
IR
3821
3822 if (offset)
3823 {
ebfd146a
IR
3824 offset = fold_build2 (MULT_EXPR, sizetype,
3825 fold_convert (sizetype, offset), step);
3826 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3827 base_offset, offset);
ebfd146a
IR
3828 }
3829
3830 /* base + base_offset */
a70d6342 3831 if (loop_vinfo)
5d49b6a7 3832 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
a70d6342
IR
3833 else
3834 {
70f34814
RG
3835 addr_base = build1 (ADDR_EXPR,
3836 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3837 unshare_expr (DR_REF (dr)));
a70d6342 3838 }
b8698a0f 3839
ebfd146a 3840 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
4bdd44c4
RB
3841 addr_base = fold_convert (vect_ptr_type, addr_base);
3842 dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
3843 addr_base = force_gimple_operand (addr_base, &seq, false, dest);
ebfd146a
IR
3844 gimple_seq_add_seq (new_stmt_list, seq);
3845
17fc049f 3846 if (DR_PTR_INFO (dr)
4bdd44c4 3847 && TREE_CODE (addr_base) == SSA_NAME)
128aaeed 3848 {
4bdd44c4 3849 duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr));
128aaeed 3850 if (offset)
4bdd44c4 3851 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
128aaeed 3852 }
17fc049f 3853
73fbfcad 3854 if (dump_enabled_p ())
ebfd146a 3855 {
78c60e3d 3856 dump_printf_loc (MSG_NOTE, vect_location, "created ");
4bdd44c4 3857 dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
e645e942 3858 dump_printf (MSG_NOTE, "\n");
ebfd146a 3859 }
8644a673 3860
4bdd44c4 3861 return addr_base;
ebfd146a
IR
3862}
3863
3864
3865/* Function vect_create_data_ref_ptr.
3866
920e8172
RS
3867 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3868 location accessed in the loop by STMT, along with the def-use update
3869 chain to appropriately advance the pointer through the loop iterations.
3870 Also set aliasing information for the pointer. This pointer is used by
3871 the callers to this function to create a memory reference expression for
3872 vector load/store access.
ebfd146a
IR
3873
3874 Input:
3875 1. STMT: a stmt that references memory. Expected to be of the form
3876 GIMPLE_ASSIGN <name, data-ref> or
3877 GIMPLE_ASSIGN <data-ref, name>.
920e8172
RS
3878 2. AGGR_TYPE: the type of the reference, which should be either a vector
3879 or an array.
3880 3. AT_LOOP: the loop where the vector memref is to be created.
3881 4. OFFSET (optional): an offset to be added to the initial address accessed
ebfd146a 3882 by the data-ref in STMT.
920e8172
RS
3883 5. BSI: location where the new stmts are to be placed if there is no loop
3884 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
ebfd146a 3885 pointing to the initial address.
ebfd146a
IR
3886
3887 Output:
3888 1. Declare a new ptr to vector_type, and have it point to the base of the
3889 data reference (initial addressed accessed by the data reference).
3890 For example, for vector of type V8HI, the following code is generated:
3891
920e8172
RS
3892 v8hi *ap;
3893 ap = (v8hi *)initial_address;
ebfd146a
IR
3894
3895 if OFFSET is not supplied:
3896 initial_address = &a[init];
3897 if OFFSET is supplied:
3898 initial_address = &a[init + OFFSET];
3899
3900 Return the initial_address in INITIAL_ADDRESS.
3901
3902 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
b8698a0f 3903 update the pointer in each iteration of the loop.
ebfd146a
IR
3904
3905 Return the increment stmt that updates the pointer in PTR_INCR.
3906
b8698a0f 3907 3. Set INV_P to true if the access pattern of the data reference in the
ff802fa1 3908 vectorized loop is invariant. Set it to false otherwise.
ebfd146a
IR
3909
3910 4. Return the pointer. */
3911
3912tree
920e8172
RS
3913vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3914 tree offset, tree *initial_address,
3915 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3916 bool only_init, bool *inv_p)
ebfd146a 3917{
595c2679 3918 const char *base_name;
ebfd146a
IR
3919 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3920 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342
IR
3921 struct loop *loop = NULL;
3922 bool nested_in_vect_loop = false;
3923 struct loop *containing_loop = NULL;
920e8172
RS
3924 tree aggr_ptr_type;
3925 tree aggr_ptr;
ebfd146a
IR
3926 tree new_temp;
3927 gimple vec_stmt;
3928 gimple_seq new_stmt_list = NULL;
a70d6342 3929 edge pe = NULL;
ebfd146a 3930 basic_block new_bb;
920e8172 3931 tree aggr_ptr_init;
ebfd146a 3932 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
920e8172 3933 tree aptr;
ebfd146a
IR
3934 gimple_stmt_iterator incr_gsi;
3935 bool insert_after;
3936 tree indx_before_incr, indx_after_incr;
3937 gimple incr;
3938 tree step;
a70d6342 3939 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 3940
920e8172
RS
3941 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3942 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3943
a70d6342
IR
3944 if (loop_vinfo)
3945 {
3946 loop = LOOP_VINFO_LOOP (loop_vinfo);
3947 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3948 containing_loop = (gimple_bb (stmt))->loop_father;
3949 pe = loop_preheader_edge (loop);
3950 }
3951 else
3952 {
3953 gcc_assert (bb_vinfo);
3954 only_init = true;
3955 *ptr_incr = NULL;
3956 }
b8698a0f 3957
ebfd146a
IR
3958 /* Check the step (evolution) of the load in LOOP, and record
3959 whether it's invariant. */
3960 if (nested_in_vect_loop)
3961 step = STMT_VINFO_DR_STEP (stmt_info);
3962 else
3963 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
b8698a0f 3964
08940f33 3965 if (integer_zerop (step))
ebfd146a
IR
3966 *inv_p = true;
3967 else
3968 *inv_p = false;
3969
3970 /* Create an expression for the first address accessed by this load
b8698a0f 3971 in LOOP. */
595c2679 3972 base_name = get_name (DR_BASE_ADDRESS (dr));
ebfd146a 3973
73fbfcad 3974 if (dump_enabled_p ())
ebfd146a 3975 {
595c2679 3976 tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
78c60e3d
SS
3977 dump_printf_loc (MSG_NOTE, vect_location,
3978 "create %s-pointer variable to type: ",
5806f481 3979 get_tree_code_name (TREE_CODE (aggr_type)));
78c60e3d 3980 dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
595c2679 3981 if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
78c60e3d 3982 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
38000232
MG
3983 else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
3984 dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
595c2679 3985 else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
78c60e3d 3986 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
595c2679 3987 else
78c60e3d 3988 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
595c2679 3989 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
e645e942 3990 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
3991 }
3992
4bdd44c4
RB
3993 /* (1) Create the new aggregate-pointer variable.
3994 Vector and array types inherit the alias set of their component
920e8172
RS
3995 type by default so we need to use a ref-all pointer if the data
3996 reference does not conflict with the created aggregated data
3997 reference because it is not addressable. */
4bdd44c4
RB
3998 bool need_ref_all = false;
3999 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
3f49ba3f 4000 get_alias_set (DR_REF (dr))))
4bdd44c4 4001 need_ref_all = true;
3f49ba3f 4002 /* Likewise for any of the data references in the stmt group. */
e14c1050 4003 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
ebfd146a 4004 {
e14c1050 4005 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
5006671f
RG
4006 do
4007 {
4bdd44c4
RB
4008 stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
4009 struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
4010 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
4011 get_alias_set (DR_REF (sdr))))
5006671f 4012 {
4bdd44c4 4013 need_ref_all = true;
5006671f
RG
4014 break;
4015 }
4bdd44c4 4016 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
5006671f
RG
4017 }
4018 while (orig_stmt);
ebfd146a 4019 }
4bdd44c4
RB
4020 aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4021 need_ref_all);
4022 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4023
ebfd146a 4024
ff802fa1
IR
4025 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4026 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4027 def-use update cycles for the pointer: one relative to the outer-loop
4028 (LOOP), which is what steps (3) and (4) below do. The other is relative
4029 to the inner-loop (which is the inner-most loop containing the dataref),
4030 and this is done be step (5) below.
ebfd146a 4031
ff802fa1
IR
4032 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4033 inner-most loop, and so steps (3),(4) work the same, and step (5) is
4034 redundant. Steps (3),(4) create the following:
ebfd146a
IR
4035
4036 vp0 = &base_addr;
4037 LOOP: vp1 = phi(vp0,vp2)
b8698a0f 4038 ...
ebfd146a
IR
4039 ...
4040 vp2 = vp1 + step
4041 goto LOOP
b8698a0f 4042
ff802fa1
IR
4043 If there is an inner-loop nested in loop, then step (5) will also be
4044 applied, and an additional update in the inner-loop will be created:
ebfd146a
IR
4045
4046 vp0 = &base_addr;
4047 LOOP: vp1 = phi(vp0,vp2)
4048 ...
4049 inner: vp3 = phi(vp1,vp4)
4050 vp4 = vp3 + inner_step
4051 if () goto inner
4052 ...
4053 vp2 = vp1 + step
4054 if () goto LOOP */
4055
920e8172
RS
4056 /* (2) Calculate the initial address of the aggregate-pointer, and set
4057 the aggregate-pointer to point to it before the loop. */
ebfd146a
IR
4058
4059 /* Create: (&(base[init_val+offset]) in the loop preheader. */
4060
4061 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
4062 offset, loop);
ebfd146a
IR
4063 if (new_stmt_list)
4064 {
a70d6342
IR
4065 if (pe)
4066 {
4067 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4068 gcc_assert (!new_bb);
4069 }
4070 else
1b29f05e 4071 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
ebfd146a
IR
4072 }
4073
4074 *initial_address = new_temp;
4075
920e8172 4076 /* Create: p = (aggr_type *) initial_base */
17fc049f 4077 if (TREE_CODE (new_temp) != SSA_NAME
920e8172 4078 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
a70d6342 4079 {
920e8172
RS
4080 vec_stmt = gimple_build_assign (aggr_ptr,
4081 fold_convert (aggr_ptr_type, new_temp));
4082 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
17fc049f
RG
4083 /* Copy the points-to information if it exists. */
4084 if (DR_PTR_INFO (dr))
920e8172
RS
4085 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
4086 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
17fc049f
RG
4087 if (pe)
4088 {
4089 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
4090 gcc_assert (!new_bb);
4091 }
4092 else
1b29f05e 4093 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
a70d6342
IR
4094 }
4095 else
920e8172 4096 aggr_ptr_init = new_temp;
ebfd146a 4097
920e8172 4098 /* (3) Handle the updating of the aggregate-pointer inside the loop.
ff802fa1
IR
4099 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4100 inner-loop nested in LOOP (during outer-loop vectorization). */
ebfd146a 4101
a70d6342 4102 /* No update in loop is required. */
b8698a0f 4103 if (only_init && (!loop_vinfo || at_loop == loop))
920e8172 4104 aptr = aggr_ptr_init;
ebfd146a
IR
4105 else
4106 {
920e8172 4107 /* The step of the aggregate pointer is the type size. */
08940f33 4108 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
b8698a0f 4109 /* One exception to the above is when the scalar step of the load in
ebfd146a
IR
4110 LOOP is zero. In this case the step here is also zero. */
4111 if (*inv_p)
08940f33
RB
4112 iv_step = size_zero_node;
4113 else if (tree_int_cst_sgn (step) == -1)
4114 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
ebfd146a
IR
4115
4116 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4117
920e8172 4118 create_iv (aggr_ptr_init,
08940f33 4119 fold_convert (aggr_ptr_type, iv_step),
920e8172 4120 aggr_ptr, loop, &incr_gsi, insert_after,
ebfd146a
IR
4121 &indx_before_incr, &indx_after_incr);
4122 incr = gsi_stmt (incr_gsi);
a70d6342 4123 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
ebfd146a
IR
4124
4125 /* Copy the points-to information if it exists. */
4126 if (DR_PTR_INFO (dr))
4127 {
4128 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4129 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4130 }
ebfd146a
IR
4131 if (ptr_incr)
4132 *ptr_incr = incr;
4133
920e8172 4134 aptr = indx_before_incr;
ebfd146a
IR
4135 }
4136
4137 if (!nested_in_vect_loop || only_init)
920e8172 4138 return aptr;
ebfd146a
IR
4139
4140
920e8172 4141 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
ff802fa1 4142 nested in LOOP, if exists. */
ebfd146a
IR
4143
4144 gcc_assert (nested_in_vect_loop);
4145 if (!only_init)
4146 {
4147 standard_iv_increment_position (containing_loop, &incr_gsi,
4148 &insert_after);
920e8172 4149 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
ebfd146a
IR
4150 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4151 &indx_after_incr);
4152 incr = gsi_stmt (incr_gsi);
a70d6342 4153 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
ebfd146a
IR
4154
4155 /* Copy the points-to information if it exists. */
4156 if (DR_PTR_INFO (dr))
4157 {
4158 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4159 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4160 }
ebfd146a
IR
4161 if (ptr_incr)
4162 *ptr_incr = incr;
4163
b8698a0f 4164 return indx_before_incr;
ebfd146a
IR
4165 }
4166 else
4167 gcc_unreachable ();
4168}
4169
4170
4171/* Function bump_vector_ptr
4172
4173 Increment a pointer (to a vector type) by vector-size. If requested,
b8698a0f 4174 i.e. if PTR-INCR is given, then also connect the new increment stmt
ebfd146a
IR
4175 to the existing def-use update-chain of the pointer, by modifying
4176 the PTR_INCR as illustrated below:
4177
4178 The pointer def-use update-chain before this function:
4179 DATAREF_PTR = phi (p_0, p_2)
4180 ....
b8698a0f 4181 PTR_INCR: p_2 = DATAREF_PTR + step
ebfd146a
IR
4182
4183 The pointer def-use update-chain after this function:
4184 DATAREF_PTR = phi (p_0, p_2)
4185 ....
4186 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4187 ....
4188 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4189
4190 Input:
b8698a0f 4191 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
ebfd146a 4192 in the loop.
b8698a0f 4193 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
ebfd146a 4194 the loop. The increment amount across iterations is expected
b8698a0f 4195 to be vector_size.
ebfd146a
IR
4196 BSI - location where the new update stmt is to be placed.
4197 STMT - the original scalar memory-access stmt that is being vectorized.
4198 BUMP - optional. The offset by which to bump the pointer. If not given,
4199 the offset is assumed to be vector_size.
4200
4201 Output: Return NEW_DATAREF_PTR as illustrated above.
b8698a0f 4202
ebfd146a
IR
4203*/
4204
4205tree
4206bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
4207 gimple stmt, tree bump)
4208{
4209 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4210 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4211 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
4212 tree update = TYPE_SIZE_UNIT (vectype);
4213 gimple incr_stmt;
4214 ssa_op_iter iter;
4215 use_operand_p use_p;
4216 tree new_dataref_ptr;
4217
4218 if (bump)
4219 update = bump;
b8698a0f 4220
070ecdfd
RG
4221 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
4222 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
ebfd146a 4223 dataref_ptr, update);
ebfd146a
IR
4224 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4225
4226 /* Copy the points-to information if it exists. */
4227 if (DR_PTR_INFO (dr))
128aaeed
RB
4228 {
4229 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
644ffefd 4230 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
128aaeed 4231 }
ebfd146a
IR
4232
4233 if (!ptr_incr)
4234 return new_dataref_ptr;
4235
4236 /* Update the vector-pointer's cross-iteration increment. */
4237 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4238 {
4239 tree use = USE_FROM_PTR (use_p);
4240
4241 if (use == dataref_ptr)
4242 SET_USE (use_p, new_dataref_ptr);
4243 else
4244 gcc_assert (tree_int_cst_compare (use, update) == 0);
4245 }
4246
4247 return new_dataref_ptr;
4248}
4249
4250
4251/* Function vect_create_destination_var.
4252
4253 Create a new temporary of type VECTYPE. */
4254
4255tree
4256vect_create_destination_var (tree scalar_dest, tree vectype)
4257{
4258 tree vec_dest;
451dabda
RB
4259 const char *name;
4260 char *new_name;
ebfd146a
IR
4261 tree type;
4262 enum vect_var_kind kind;
4263
4264 kind = vectype ? vect_simple_var : vect_scalar_var;
4265 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4266
4267 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4268
451dabda
RB
4269 name = get_name (scalar_dest);
4270 if (name)
4271 asprintf (&new_name, "%s_%u", name, SSA_NAME_VERSION (scalar_dest));
4272 else
4273 asprintf (&new_name, "_%u", SSA_NAME_VERSION (scalar_dest));
ebfd146a 4274 vec_dest = vect_get_new_vect_var (type, kind, new_name);
451dabda 4275 free (new_name);
ebfd146a
IR
4276
4277 return vec_dest;
4278}
4279
0d0293ac 4280/* Function vect_grouped_store_supported.
ebfd146a 4281
e2c83630
RH
4282 Returns TRUE if interleave high and interleave low permutations
4283 are supported, and FALSE otherwise. */
ebfd146a
IR
4284
4285bool
0d0293ac 4286vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
ebfd146a 4287{
e2c83630 4288 enum machine_mode mode = TYPE_MODE (vectype);
b8698a0f 4289
b602d918
RS
4290 /* vect_permute_store_chain requires the group size to be a power of two. */
4291 if (exact_log2 (count) == -1)
4292 {
73fbfcad 4293 if (dump_enabled_p ())
78c60e3d
SS
4294 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4295 "the size of the group of accesses"
e645e942 4296 " is not a power of 2\n");
b602d918
RS
4297 return false;
4298 }
4299
e2c83630 4300 /* Check that the permutation is supported. */
3fcc1b55
JJ
4301 if (VECTOR_MODE_P (mode))
4302 {
4303 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4304 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4305 for (i = 0; i < nelt / 2; i++)
4306 {
4307 sel[i * 2] = i;
4308 sel[i * 2 + 1] = i + nelt;
4309 }
4310 if (can_vec_perm_p (mode, false, sel))
4311 {
4312 for (i = 0; i < nelt; i++)
4313 sel[i] += nelt / 2;
4314 if (can_vec_perm_p (mode, false, sel))
4315 return true;
4316 }
4317 }
ebfd146a 4318
73fbfcad 4319 if (dump_enabled_p ())
78c60e3d 4320 dump_printf (MSG_MISSED_OPTIMIZATION,
e645e942 4321 "interleave op not supported by target.\n");
a6b3dfde 4322 return false;
ebfd146a
IR
4323}
4324
4325
272c6793
RS
4326/* Return TRUE if vec_store_lanes is available for COUNT vectors of
4327 type VECTYPE. */
4328
4329bool
4330vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4331{
4332 return vect_lanes_optab_supported_p ("vec_store_lanes",
4333 vec_store_lanes_optab,
4334 vectype, count);
4335}
4336
4337
ebfd146a
IR
4338/* Function vect_permute_store_chain.
4339
4340 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
b8698a0f 4341 a power of 2, generate interleave_high/low stmts to reorder the data
ff802fa1 4342 correctly for the stores. Return the final references for stores in
ebfd146a
IR
4343 RESULT_CHAIN.
4344
4345 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
ff802fa1
IR
4346 The input is 4 vectors each containing 8 elements. We assign a number to
4347 each element, the input sequence is:
ebfd146a
IR
4348
4349 1st vec: 0 1 2 3 4 5 6 7
4350 2nd vec: 8 9 10 11 12 13 14 15
b8698a0f 4351 3rd vec: 16 17 18 19 20 21 22 23
ebfd146a
IR
4352 4th vec: 24 25 26 27 28 29 30 31
4353
4354 The output sequence should be:
4355
4356 1st vec: 0 8 16 24 1 9 17 25
4357 2nd vec: 2 10 18 26 3 11 19 27
4358 3rd vec: 4 12 20 28 5 13 21 30
4359 4th vec: 6 14 22 30 7 15 23 31
4360
4361 i.e., we interleave the contents of the four vectors in their order.
4362
ff802fa1 4363 We use interleave_high/low instructions to create such output. The input of
ebfd146a 4364 each interleave_high/low operation is two vectors:
b8698a0f
L
4365 1st vec 2nd vec
4366 0 1 2 3 4 5 6 7
4367 the even elements of the result vector are obtained left-to-right from the
ff802fa1 4368 high/low elements of the first vector. The odd elements of the result are
ebfd146a
IR
4369 obtained left-to-right from the high/low elements of the second vector.
4370 The output of interleave_high will be: 0 4 1 5
4371 and of interleave_low: 2 6 3 7
4372
b8698a0f 4373
ff802fa1 4374 The permutation is done in log LENGTH stages. In each stage interleave_high
b8698a0f
L
4375 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4376 where the first argument is taken from the first half of DR_CHAIN and the
4377 second argument from it's second half.
4378 In our example,
ebfd146a
IR
4379
4380 I1: interleave_high (1st vec, 3rd vec)
4381 I2: interleave_low (1st vec, 3rd vec)
4382 I3: interleave_high (2nd vec, 4th vec)
4383 I4: interleave_low (2nd vec, 4th vec)
4384
4385 The output for the first stage is:
4386
4387 I1: 0 16 1 17 2 18 3 19
4388 I2: 4 20 5 21 6 22 7 23
4389 I3: 8 24 9 25 10 26 11 27
4390 I4: 12 28 13 29 14 30 15 31
4391
4392 The output of the second stage, i.e. the final result is:
4393
4394 I1: 0 8 16 24 1 9 17 25
4395 I2: 2 10 18 26 3 11 19 27
4396 I3: 4 12 20 28 5 13 21 30
4397 I4: 6 14 22 30 7 15 23 31. */
b8698a0f 4398
b602d918 4399void
9771b263 4400vect_permute_store_chain (vec<tree> dr_chain,
b8698a0f 4401 unsigned int length,
ebfd146a
IR
4402 gimple stmt,
4403 gimple_stmt_iterator *gsi,
9771b263 4404 vec<tree> *result_chain)
ebfd146a 4405{
83d5977e 4406 tree vect1, vect2, high, low;
ebfd146a
IR
4407 gimple perm_stmt;
4408 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3fcc1b55
JJ
4409 tree perm_mask_low, perm_mask_high;
4410 unsigned int i, n;
e2c83630 4411 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
3fcc1b55 4412 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
b8698a0f 4413
b6b9227d
JJ
4414 result_chain->quick_grow (length);
4415 memcpy (result_chain->address (), dr_chain.address (),
4416 length * sizeof (tree));
ebfd146a 4417
3fcc1b55
JJ
4418 for (i = 0, n = nelt / 2; i < n; i++)
4419 {
4420 sel[i * 2] = i;
4421 sel[i * 2 + 1] = i + nelt;
4422 }
4423 perm_mask_high = vect_gen_perm_mask (vectype, sel);
e2c83630
RH
4424 gcc_assert (perm_mask_high != NULL);
4425
3fcc1b55
JJ
4426 for (i = 0; i < nelt; i++)
4427 sel[i] += nelt / 2;
4428 perm_mask_low = vect_gen_perm_mask (vectype, sel);
e2c83630 4429 gcc_assert (perm_mask_low != NULL);
3fcc1b55
JJ
4430
4431 for (i = 0, n = exact_log2 (length); i < n; i++)
ebfd146a
IR
4432 {
4433 for (j = 0; j < length/2; j++)
4434 {
9771b263
DN
4435 vect1 = dr_chain[j];
4436 vect2 = dr_chain[j+length/2];
ebfd146a
IR
4437
4438 /* Create interleaving stmt:
3fcc1b55 4439 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
83d5977e 4440 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
3fcc1b55 4441 perm_stmt
73804b12
RG
4442 = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4443 vect1, vect2, perm_mask_high);
ebfd146a 4444 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
9771b263 4445 (*result_chain)[2*j] = high;
ebfd146a
IR
4446
4447 /* Create interleaving stmt:
3fcc1b55
JJ
4448 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4449 nelt*3/2+1, ...}> */
83d5977e 4450 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
3fcc1b55 4451 perm_stmt
73804b12
RG
4452 = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4453 vect1, vect2, perm_mask_low);
ebfd146a 4454 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
9771b263 4455 (*result_chain)[2*j+1] = low;
ebfd146a 4456 }
b6b9227d
JJ
4457 memcpy (dr_chain.address (), result_chain->address (),
4458 length * sizeof (tree));
ebfd146a 4459 }
ebfd146a
IR
4460}
4461
4462/* Function vect_setup_realignment
b8698a0f 4463
ebfd146a
IR
4464 This function is called when vectorizing an unaligned load using
4465 the dr_explicit_realign[_optimized] scheme.
4466 This function generates the following code at the loop prolog:
4467
4468 p = initial_addr;
4469 x msq_init = *(floor(p)); # prolog load
b8698a0f 4470 realignment_token = call target_builtin;
ebfd146a
IR
4471 loop:
4472 x msq = phi (msq_init, ---)
4473
b8698a0f 4474 The stmts marked with x are generated only for the case of
ebfd146a
IR
4475 dr_explicit_realign_optimized.
4476
b8698a0f 4477 The code above sets up a new (vector) pointer, pointing to the first
ebfd146a
IR
4478 location accessed by STMT, and a "floor-aligned" load using that pointer.
4479 It also generates code to compute the "realignment-token" (if the relevant
4480 target hook was defined), and creates a phi-node at the loop-header bb
4481 whose arguments are the result of the prolog-load (created by this
4482 function) and the result of a load that takes place in the loop (to be
4483 created by the caller to this function).
4484
4485 For the case of dr_explicit_realign_optimized:
b8698a0f 4486 The caller to this function uses the phi-result (msq) to create the
ebfd146a
IR
4487 realignment code inside the loop, and sets up the missing phi argument,
4488 as follows:
b8698a0f 4489 loop:
ebfd146a
IR
4490 msq = phi (msq_init, lsq)
4491 lsq = *(floor(p')); # load in loop
4492 result = realign_load (msq, lsq, realignment_token);
4493
4494 For the case of dr_explicit_realign:
4495 loop:
4496 msq = *(floor(p)); # load in loop
4497 p' = p + (VS-1);
4498 lsq = *(floor(p')); # load in loop
4499 result = realign_load (msq, lsq, realignment_token);
4500
4501 Input:
4502 STMT - (scalar) load stmt to be vectorized. This load accesses
4503 a memory location that may be unaligned.
4504 BSI - place where new code is to be inserted.
4505 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
b8698a0f
L
4506 is used.
4507
ebfd146a
IR
4508 Output:
4509 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4510 target hook, if defined.
4511 Return value - the result of the loop-header phi node. */
4512
4513tree
4514vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4515 tree *realignment_token,
4516 enum dr_alignment_support alignment_support_scheme,
4517 tree init_addr,
4518 struct loop **at_loop)
4519{
4520 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4521 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4522 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
20ede5c6 4523 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
69f11a13
IR
4524 struct loop *loop = NULL;
4525 edge pe = NULL;
ebfd146a
IR
4526 tree scalar_dest = gimple_assign_lhs (stmt);
4527 tree vec_dest;
4528 gimple inc;
4529 tree ptr;
4530 tree data_ref;
4531 gimple new_stmt;
4532 basic_block new_bb;
4533 tree msq_init = NULL_TREE;
4534 tree new_temp;
4535 gimple phi_stmt;
4536 tree msq = NULL_TREE;
4537 gimple_seq stmts = NULL;
4538 bool inv_p;
4539 bool compute_in_loop = false;
69f11a13 4540 bool nested_in_vect_loop = false;
ebfd146a 4541 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
69f11a13
IR
4542 struct loop *loop_for_initial_load = NULL;
4543
4544 if (loop_vinfo)
4545 {
4546 loop = LOOP_VINFO_LOOP (loop_vinfo);
4547 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4548 }
ebfd146a
IR
4549
4550 gcc_assert (alignment_support_scheme == dr_explicit_realign
4551 || alignment_support_scheme == dr_explicit_realign_optimized);
4552
4553 /* We need to generate three things:
4554 1. the misalignment computation
4555 2. the extra vector load (for the optimized realignment scheme).
4556 3. the phi node for the two vectors from which the realignment is
ff802fa1 4557 done (for the optimized realignment scheme). */
ebfd146a
IR
4558
4559 /* 1. Determine where to generate the misalignment computation.
4560
4561 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4562 calculation will be generated by this function, outside the loop (in the
4563 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4564 caller, inside the loop.
4565
4566 Background: If the misalignment remains fixed throughout the iterations of
4567 the loop, then both realignment schemes are applicable, and also the
4568 misalignment computation can be done outside LOOP. This is because we are
4569 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4570 are a multiple of VS (the Vector Size), and therefore the misalignment in
4571 different vectorized LOOP iterations is always the same.
4572 The problem arises only if the memory access is in an inner-loop nested
4573 inside LOOP, which is now being vectorized using outer-loop vectorization.
4574 This is the only case when the misalignment of the memory access may not
4575 remain fixed throughout the iterations of the inner-loop (as explained in
4576 detail in vect_supportable_dr_alignment). In this case, not only is the
4577 optimized realignment scheme not applicable, but also the misalignment
4578 computation (and generation of the realignment token that is passed to
4579 REALIGN_LOAD) have to be done inside the loop.
4580
4581 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4582 or not, which in turn determines if the misalignment is computed inside
4583 the inner-loop, or outside LOOP. */
4584
69f11a13 4585 if (init_addr != NULL_TREE || !loop_vinfo)
ebfd146a
IR
4586 {
4587 compute_in_loop = true;
4588 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4589 }
4590
4591
4592 /* 2. Determine where to generate the extra vector load.
4593
4594 For the optimized realignment scheme, instead of generating two vector
4595 loads in each iteration, we generate a single extra vector load in the
4596 preheader of the loop, and in each iteration reuse the result of the
4597 vector load from the previous iteration. In case the memory access is in
4598 an inner-loop nested inside LOOP, which is now being vectorized using
4599 outer-loop vectorization, we need to determine whether this initial vector
4600 load should be generated at the preheader of the inner-loop, or can be
4601 generated at the preheader of LOOP. If the memory access has no evolution
4602 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4603 to be generated inside LOOP (in the preheader of the inner-loop). */
4604
4605 if (nested_in_vect_loop)
4606 {
4607 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4608 bool invariant_in_outerloop =
4609 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4610 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4611 }
4612 else
4613 loop_for_initial_load = loop;
4614 if (at_loop)
4615 *at_loop = loop_for_initial_load;
4616
69f11a13
IR
4617 if (loop_for_initial_load)
4618 pe = loop_preheader_edge (loop_for_initial_load);
4619
ebfd146a
IR
4620 /* 3. For the case of the optimized realignment, create the first vector
4621 load at the loop preheader. */
4622
4623 if (alignment_support_scheme == dr_explicit_realign_optimized)
4624 {
4625 /* Create msq_init = *(floor(p1)) in the loop preheader */
4626
4627 gcc_assert (!compute_in_loop);
ebfd146a 4628 vec_dest = vect_create_destination_var (scalar_dest, vectype);
920e8172
RS
4629 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4630 NULL_TREE, &init_addr, NULL, &inc,
4631 true, &inv_p);
070ecdfd 4632 new_temp = copy_ssa_name (ptr, NULL);
75421dcd 4633 new_stmt = gimple_build_assign_with_ops
070ecdfd 4634 (BIT_AND_EXPR, new_temp, ptr,
75421dcd
RG
4635 build_int_cst (TREE_TYPE (ptr),
4636 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
75421dcd
RG
4637 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4638 gcc_assert (!new_bb);
20ede5c6
RG
4639 data_ref
4640 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4641 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
ebfd146a
IR
4642 new_stmt = gimple_build_assign (vec_dest, data_ref);
4643 new_temp = make_ssa_name (vec_dest, new_stmt);
4644 gimple_assign_set_lhs (new_stmt, new_temp);
69f11a13
IR
4645 if (pe)
4646 {
4647 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4648 gcc_assert (!new_bb);
4649 }
4650 else
4651 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4652
ebfd146a
IR
4653 msq_init = gimple_assign_lhs (new_stmt);
4654 }
4655
4656 /* 4. Create realignment token using a target builtin, if available.
4657 It is done either inside the containing loop, or before LOOP (as
4658 determined above). */
4659
4660 if (targetm.vectorize.builtin_mask_for_load)
4661 {
4662 tree builtin_decl;
4663
4664 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
69f11a13 4665 if (!init_addr)
ebfd146a
IR
4666 {
4667 /* Generate the INIT_ADDR computation outside LOOP. */
4668 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4669 NULL_TREE, loop);
69f11a13
IR
4670 if (loop)
4671 {
4672 pe = loop_preheader_edge (loop);
4673 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4674 gcc_assert (!new_bb);
4675 }
4676 else
4677 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
ebfd146a
IR
4678 }
4679
4680 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4681 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4682 vec_dest =
4683 vect_create_destination_var (scalar_dest,
4684 gimple_call_return_type (new_stmt));
4685 new_temp = make_ssa_name (vec_dest, new_stmt);
4686 gimple_call_set_lhs (new_stmt, new_temp);
4687
4688 if (compute_in_loop)
4689 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4690 else
4691 {
4692 /* Generate the misalignment computation outside LOOP. */
4693 pe = loop_preheader_edge (loop);
4694 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4695 gcc_assert (!new_bb);
4696 }
4697
4698 *realignment_token = gimple_call_lhs (new_stmt);
4699
4700 /* The result of the CALL_EXPR to this builtin is determined from
4701 the value of the parameter and no global variables are touched
4702 which makes the builtin a "const" function. Requiring the
4703 builtin to have the "const" attribute makes it unnecessary
4704 to call mark_call_clobbered. */
4705 gcc_assert (TREE_READONLY (builtin_decl));
4706 }
4707
4708 if (alignment_support_scheme == dr_explicit_realign)
4709 return msq;
4710
4711 gcc_assert (!compute_in_loop);
4712 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4713
4714
4715 /* 5. Create msq = phi <msq_init, lsq> in loop */
4716
4717 pe = loop_preheader_edge (containing_loop);
4718 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4719 msq = make_ssa_name (vec_dest, NULL);
4720 phi_stmt = create_phi_node (msq, containing_loop->header);
9e227d60 4721 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
ebfd146a
IR
4722
4723 return msq;
4724}
4725
4726
0d0293ac 4727/* Function vect_grouped_load_supported.
ebfd146a 4728
e2c83630 4729 Returns TRUE if even and odd permutations are supported,
ebfd146a
IR
4730 and FALSE otherwise. */
4731
4732bool
0d0293ac 4733vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
ebfd146a 4734{
e2c83630 4735 enum machine_mode mode = TYPE_MODE (vectype);
ebfd146a 4736
b602d918
RS
4737 /* vect_permute_load_chain requires the group size to be a power of two. */
4738 if (exact_log2 (count) == -1)
4739 {
73fbfcad 4740 if (dump_enabled_p ())
78c60e3d
SS
4741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4742 "the size of the group of accesses"
e645e942 4743 " is not a power of 2\n");
b602d918
RS
4744 return false;
4745 }
4746
e2c83630
RH
4747 /* Check that the permutation is supported. */
4748 if (VECTOR_MODE_P (mode))
4749 {
4750 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4751 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
ebfd146a 4752
e2c83630
RH
4753 for (i = 0; i < nelt; i++)
4754 sel[i] = i * 2;
4755 if (can_vec_perm_p (mode, false, sel))
4756 {
4757 for (i = 0; i < nelt; i++)
4758 sel[i] = i * 2 + 1;
4759 if (can_vec_perm_p (mode, false, sel))
4760 return true;
4761 }
4762 }
ebfd146a 4763
73fbfcad 4764 if (dump_enabled_p ())
78c60e3d 4765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4766 "extract even/odd not supported by target\n");
a6b3dfde 4767 return false;
ebfd146a
IR
4768}
4769
272c6793
RS
4770/* Return TRUE if vec_load_lanes is available for COUNT vectors of
4771 type VECTYPE. */
4772
4773bool
4774vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4775{
4776 return vect_lanes_optab_supported_p ("vec_load_lanes",
4777 vec_load_lanes_optab,
4778 vectype, count);
4779}
ebfd146a
IR
4780
4781/* Function vect_permute_load_chain.
4782
4783 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
b8698a0f 4784 a power of 2, generate extract_even/odd stmts to reorder the input data
ff802fa1 4785 correctly. Return the final references for loads in RESULT_CHAIN.
ebfd146a
IR
4786
4787 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4788 The input is 4 vectors each containing 8 elements. We assign a number to each
4789 element, the input sequence is:
4790
4791 1st vec: 0 1 2 3 4 5 6 7
4792 2nd vec: 8 9 10 11 12 13 14 15
b8698a0f 4793 3rd vec: 16 17 18 19 20 21 22 23
ebfd146a
IR
4794 4th vec: 24 25 26 27 28 29 30 31
4795
4796 The output sequence should be:
4797
4798 1st vec: 0 4 8 12 16 20 24 28
4799 2nd vec: 1 5 9 13 17 21 25 29
b8698a0f 4800 3rd vec: 2 6 10 14 18 22 26 30
ebfd146a
IR
4801 4th vec: 3 7 11 15 19 23 27 31
4802
4803 i.e., the first output vector should contain the first elements of each
4804 interleaving group, etc.
4805
ff802fa1
IR
4806 We use extract_even/odd instructions to create such output. The input of
4807 each extract_even/odd operation is two vectors
b8698a0f
L
4808 1st vec 2nd vec
4809 0 1 2 3 4 5 6 7
ebfd146a 4810
ff802fa1 4811 and the output is the vector of extracted even/odd elements. The output of
ebfd146a
IR
4812 extract_even will be: 0 2 4 6
4813 and of extract_odd: 1 3 5 7
4814
b8698a0f 4815
ff802fa1
IR
4816 The permutation is done in log LENGTH stages. In each stage extract_even
4817 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4818 their order. In our example,
ebfd146a
IR
4819
4820 E1: extract_even (1st vec, 2nd vec)
4821 E2: extract_odd (1st vec, 2nd vec)
4822 E3: extract_even (3rd vec, 4th vec)
4823 E4: extract_odd (3rd vec, 4th vec)
4824
4825 The output for the first stage will be:
4826
4827 E1: 0 2 4 6 8 10 12 14
4828 E2: 1 3 5 7 9 11 13 15
b8698a0f 4829 E3: 16 18 20 22 24 26 28 30
ebfd146a
IR
4830 E4: 17 19 21 23 25 27 29 31
4831
4832 In order to proceed and create the correct sequence for the next stage (or
b8698a0f
L
4833 for the correct output, if the second stage is the last one, as in our
4834 example), we first put the output of extract_even operation and then the
ebfd146a
IR
4835 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4836 The input for the second stage is:
4837
4838 1st vec (E1): 0 2 4 6 8 10 12 14
b8698a0f
L
4839 2nd vec (E3): 16 18 20 22 24 26 28 30
4840 3rd vec (E2): 1 3 5 7 9 11 13 15
ebfd146a
IR
4841 4th vec (E4): 17 19 21 23 25 27 29 31
4842
4843 The output of the second stage:
4844
4845 E1: 0 4 8 12 16 20 24 28
4846 E2: 2 6 10 14 18 22 26 30
4847 E3: 1 5 9 13 17 21 25 29
4848 E4: 3 7 11 15 19 23 27 31
4849
4850 And RESULT_CHAIN after reordering:
4851
4852 1st vec (E1): 0 4 8 12 16 20 24 28
4853 2nd vec (E3): 1 5 9 13 17 21 25 29
b8698a0f 4854 3rd vec (E2): 2 6 10 14 18 22 26 30
ebfd146a
IR
4855 4th vec (E4): 3 7 11 15 19 23 27 31. */
4856
b602d918 4857static void
9771b263 4858vect_permute_load_chain (vec<tree> dr_chain,
b8698a0f 4859 unsigned int length,
ebfd146a
IR
4860 gimple stmt,
4861 gimple_stmt_iterator *gsi,
9771b263 4862 vec<tree> *result_chain)
ebfd146a 4863{
83d5977e 4864 tree data_ref, first_vect, second_vect;
e2c83630 4865 tree perm_mask_even, perm_mask_odd;
ebfd146a
IR
4866 gimple perm_stmt;
4867 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
e2c83630
RH
4868 unsigned int i, j, log_length = exact_log2 (length);
4869 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4870 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
ebfd146a 4871
3f292312
JJ
4872 result_chain->quick_grow (length);
4873 memcpy (result_chain->address (), dr_chain.address (),
4874 length * sizeof (tree));
e2c83630
RH
4875
4876 for (i = 0; i < nelt; ++i)
4877 sel[i] = i * 2;
4878 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4879 gcc_assert (perm_mask_even != NULL);
4880
4881 for (i = 0; i < nelt; ++i)
4882 sel[i] = i * 2 + 1;
4883 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4884 gcc_assert (perm_mask_odd != NULL);
4885
4886 for (i = 0; i < log_length; i++)
ebfd146a 4887 {
e2c83630 4888 for (j = 0; j < length; j += 2)
ebfd146a 4889 {
9771b263
DN
4890 first_vect = dr_chain[j];
4891 second_vect = dr_chain[j+1];
ebfd146a
IR
4892
4893 /* data_ref = permute_even (first_data_ref, second_data_ref); */
83d5977e 4894 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
73804b12
RG
4895 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4896 first_vect, second_vect,
4897 perm_mask_even);
ebfd146a 4898 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
9771b263 4899 (*result_chain)[j/2] = data_ref;
b8698a0f 4900
ebfd146a 4901 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
83d5977e 4902 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
73804b12
RG
4903 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4904 first_vect, second_vect,
4905 perm_mask_odd);
ebfd146a 4906 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
9771b263 4907 (*result_chain)[j/2+length/2] = data_ref;
ebfd146a 4908 }
3f292312
JJ
4909 memcpy (dr_chain.address (), result_chain->address (),
4910 length * sizeof (tree));
ebfd146a 4911 }
ebfd146a
IR
4912}
4913
4914
0d0293ac 4915/* Function vect_transform_grouped_load.
ebfd146a
IR
4916
4917 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4918 to perform their permutation and ascribe the result vectorized statements to
4919 the scalar statements.
4920*/
4921
b602d918 4922void
9771b263 4923vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
ebfd146a
IR
4924 gimple_stmt_iterator *gsi)
4925{
6e1aa848 4926 vec<tree> result_chain = vNULL;
ebfd146a 4927
b8698a0f
L
4928 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4929 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
ebfd146a 4930 vectors, that are ready for vector computation. */
9771b263 4931 result_chain.create (size);
b602d918 4932 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
0d0293ac 4933 vect_record_grouped_load_vectors (stmt, result_chain);
9771b263 4934 result_chain.release ();
272c6793
RS
4935}
4936
0d0293ac 4937/* RESULT_CHAIN contains the output of a group of grouped loads that were
272c6793
RS
4938 generated as part of the vectorization of STMT. Assign the statement
4939 for each vector to the associated scalar statement. */
4940
4941void
9771b263 4942vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
272c6793 4943{
e14c1050 4944 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
272c6793
RS
4945 gimple next_stmt, new_stmt;
4946 unsigned int i, gap_count;
4947 tree tmp_data_ref;
ebfd146a 4948
b8698a0f
L
4949 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4950 Since we scan the chain starting from it's first node, their order
ebfd146a
IR
4951 corresponds the order of data-refs in RESULT_CHAIN. */
4952 next_stmt = first_stmt;
4953 gap_count = 1;
9771b263 4954 FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
ebfd146a
IR
4955 {
4956 if (!next_stmt)
4957 break;
4958
ff802fa1
IR
4959 /* Skip the gaps. Loads created for the gaps will be removed by dead
4960 code elimination pass later. No need to check for the first stmt in
ebfd146a 4961 the group, since it always exists.
e14c1050
IR
4962 GROUP_GAP is the number of steps in elements from the previous
4963 access (if there is no gap GROUP_GAP is 1). We skip loads that
ff802fa1 4964 correspond to the gaps. */
b8698a0f 4965 if (next_stmt != first_stmt
e14c1050 4966 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
ebfd146a
IR
4967 {
4968 gap_count++;
4969 continue;
4970 }
4971
4972 while (next_stmt)
4973 {
4974 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4975 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4976 copies, and we put the new vector statement in the first available
4977 RELATED_STMT. */
4978 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4979 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4980 else
4981 {
e14c1050 4982 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
ebfd146a
IR
4983 {
4984 gimple prev_stmt =
4985 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4986 gimple rel_stmt =
4987 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4988 while (rel_stmt)
4989 {
4990 prev_stmt = rel_stmt;
b8698a0f 4991 rel_stmt =
ebfd146a
IR
4992 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4993 }
4994
b8698a0f 4995 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
ebfd146a
IR
4996 new_stmt;
4997 }
4998 }
4999
e14c1050 5000 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
5001 gap_count = 1;
5002 /* If NEXT_STMT accesses the same DR as the previous statement,
5003 put the same TMP_DATA_REF as its vectorized statement; otherwise
5004 get the next data-ref from RESULT_CHAIN. */
e14c1050 5005 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
ebfd146a
IR
5006 break;
5007 }
5008 }
ebfd146a
IR
5009}
5010
5011/* Function vect_force_dr_alignment_p.
5012
5013 Returns whether the alignment of a DECL can be forced to be aligned
5014 on ALIGNMENT bit boundary. */
5015
b8698a0f 5016bool
ebfd146a
IR
5017vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
5018{
5019 if (TREE_CODE (decl) != VAR_DECL)
5020 return false;
5021
c205d0b3
RG
5022 /* We cannot change alignment of common or external symbols as another
5023 translation unit may contain a definition with lower alignment.
5024 The rules of common symbol linking mean that the definition
010403d1
RB
5025 will override the common symbol. The same is true for constant
5026 pool entries which may be shared and are not properly merged
5027 by LTO. */
c205d0b3 5028 if (DECL_EXTERNAL (decl)
010403d1
RB
5029 || DECL_COMMON (decl)
5030 || DECL_IN_CONSTANT_POOL (decl))
ebfd146a
IR
5031 return false;
5032
5033 if (TREE_ASM_WRITTEN (decl))
5034 return false;
5035
f89dcfd8
RG
5036 /* Do not override the alignment as specified by the ABI when the used
5037 attribute is set. */
5038 if (DECL_PRESERVE_P (decl))
af4d0d91
RG
5039 return false;
5040
79e02217
JJ
5041 /* Do not override explicit alignment set by the user when an explicit
5042 section name is also used. This is a common idiom used by many
5043 software projects. */
5044 if (DECL_SECTION_NAME (decl) != NULL_TREE
5045 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
5046 return false;
5047
ebfd146a
IR
5048 if (TREE_STATIC (decl))
5049 return (alignment <= MAX_OFILE_ALIGNMENT);
5050 else
5051 return (alignment <= MAX_STACK_ALIGNMENT);
5052}
5053
ebfd146a 5054
720f5239
IR
5055/* Return whether the data reference DR is supported with respect to its
5056 alignment.
5057 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
5058 it is aligned, i.e., check if it is possible to vectorize it with different
ebfd146a
IR
5059 alignment. */
5060
5061enum dr_alignment_support
720f5239
IR
5062vect_supportable_dr_alignment (struct data_reference *dr,
5063 bool check_aligned_accesses)
ebfd146a
IR
5064{
5065 gimple stmt = DR_STMT (dr);
5066 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5067 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
81f40b79 5068 enum machine_mode mode = TYPE_MODE (vectype);
a70d6342
IR
5069 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5070 struct loop *vect_loop = NULL;
5071 bool nested_in_vect_loop = false;
ebfd146a 5072
720f5239 5073 if (aligned_access_p (dr) && !check_aligned_accesses)
ebfd146a
IR
5074 return dr_aligned;
5075
69f11a13
IR
5076 if (loop_vinfo)
5077 {
5078 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
5079 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
5080 }
a70d6342 5081
ebfd146a
IR
5082 /* Possibly unaligned access. */
5083
5084 /* We can choose between using the implicit realignment scheme (generating
5085 a misaligned_move stmt) and the explicit realignment scheme (generating
ff802fa1
IR
5086 aligned loads with a REALIGN_LOAD). There are two variants to the
5087 explicit realignment scheme: optimized, and unoptimized.
ebfd146a
IR
5088 We can optimize the realignment only if the step between consecutive
5089 vector loads is equal to the vector size. Since the vector memory
5090 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
5091 is guaranteed that the misalignment amount remains the same throughout the
5092 execution of the vectorized loop. Therefore, we can create the
5093 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
5094 at the loop preheader.
5095
5096 However, in the case of outer-loop vectorization, when vectorizing a
5097 memory access in the inner-loop nested within the LOOP that is now being
5098 vectorized, while it is guaranteed that the misalignment of the
5099 vectorized memory access will remain the same in different outer-loop
5100 iterations, it is *not* guaranteed that is will remain the same throughout
5101 the execution of the inner-loop. This is because the inner-loop advances
5102 with the original scalar step (and not in steps of VS). If the inner-loop
5103 step happens to be a multiple of VS, then the misalignment remains fixed
5104 and we can use the optimized realignment scheme. For example:
5105
5106 for (i=0; i<N; i++)
5107 for (j=0; j<M; j++)
5108 s += a[i+j];
5109
5110 When vectorizing the i-loop in the above example, the step between
5111 consecutive vector loads is 1, and so the misalignment does not remain
5112 fixed across the execution of the inner-loop, and the realignment cannot
5113 be optimized (as illustrated in the following pseudo vectorized loop):
5114
5115 for (i=0; i<N; i+=4)
5116 for (j=0; j<M; j++){
5117 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
5118 // when j is {0,1,2,3,4,5,6,7,...} respectively.
5119 // (assuming that we start from an aligned address).
5120 }
5121
5122 We therefore have to use the unoptimized realignment scheme:
5123
5124 for (i=0; i<N; i+=4)
5125 for (j=k; j<M; j+=4)
5126 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
5127 // that the misalignment of the initial address is
5128 // 0).
5129
5130 The loop can then be vectorized as follows:
5131
5132 for (k=0; k<4; k++){
5133 rt = get_realignment_token (&vp[k]);
5134 for (i=0; i<N; i+=4){
5135 v1 = vp[i+k];
5136 for (j=k; j<M; j+=4){
5137 v2 = vp[i+j+VS-1];
5138 va = REALIGN_LOAD <v1,v2,rt>;
5139 vs += va;
5140 v1 = v2;
5141 }
5142 }
5143 } */
5144
5145 if (DR_IS_READ (dr))
5146 {
0601d0cf
RE
5147 bool is_packed = false;
5148 tree type = (TREE_TYPE (DR_REF (dr)));
5149
947131ba 5150 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
ebfd146a
IR
5151 && (!targetm.vectorize.builtin_mask_for_load
5152 || targetm.vectorize.builtin_mask_for_load ()))
5153 {
5154 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
69f11a13
IR
5155 if ((nested_in_vect_loop
5156 && (TREE_INT_CST_LOW (DR_STEP (dr))
5157 != GET_MODE_SIZE (TYPE_MODE (vectype))))
5158 || !loop_vinfo)
ebfd146a
IR
5159 return dr_explicit_realign;
5160 else
5161 return dr_explicit_realign_optimized;
5162 }
0601d0cf 5163 if (!known_alignment_for_access_p (dr))
4c9bcf89 5164 is_packed = not_size_aligned (DR_REF (dr));
b8698a0f 5165
afb119be
RB
5166 if ((TYPE_USER_ALIGN (type) && !is_packed)
5167 || targetm.vectorize.
5168 support_vector_misalignment (mode, type,
5169 DR_MISALIGNMENT (dr), is_packed))
ebfd146a
IR
5170 /* Can't software pipeline the loads, but can at least do them. */
5171 return dr_unaligned_supported;
5172 }
0601d0cf
RE
5173 else
5174 {
5175 bool is_packed = false;
5176 tree type = (TREE_TYPE (DR_REF (dr)));
ebfd146a 5177
0601d0cf 5178 if (!known_alignment_for_access_p (dr))
4c9bcf89 5179 is_packed = not_size_aligned (DR_REF (dr));
b8698a0f 5180
afb119be
RB
5181 if ((TYPE_USER_ALIGN (type) && !is_packed)
5182 || targetm.vectorize.
5183 support_vector_misalignment (mode, type,
5184 DR_MISALIGNMENT (dr), is_packed))
0601d0cf
RE
5185 return dr_unaligned_supported;
5186 }
b8698a0f 5187
ebfd146a
IR
5188 /* Unsupported. */
5189 return dr_unaligned_unsupported;
5190}