]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-data-refs.c
Factor unrelated declarations out of tree.h.
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
CommitLineData
b8698a0f 1/* Data References Analysis and Manipulation Utilities for Vectorization.
d1e082c2 2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
ebfd146a
IR
26#include "tm.h"
27#include "ggc.h"
28#include "tree.h"
d8a2d370 29#include "stor-layout.h"
237e9c04 30#include "tm_p.h"
ebfd146a
IR
31#include "target.h"
32#include "basic-block.h"
cf835838 33#include "gimple-pretty-print.h"
18f429e2 34#include "gimple.h"
45b0be94 35#include "gimplify.h"
5be5c238 36#include "gimple-iterator.h"
18f429e2 37#include "gimplify-me.h"
442b4905
AM
38#include "gimple-ssa.h"
39#include "tree-phinodes.h"
40#include "ssa-iterators.h"
d8a2d370 41#include "stringpool.h"
442b4905 42#include "tree-ssanames.h"
e28030cf
AM
43#include "tree-ssa-loop-ivopts.h"
44#include "tree-ssa-loop-manip.h"
442b4905 45#include "tree-ssa-loop.h"
7ee2468b 46#include "dumpfile.h"
ebfd146a 47#include "cfgloop.h"
ebfd146a
IR
48#include "tree-chrec.h"
49#include "tree-scalar-evolution.h"
50#include "tree-vectorizer.h"
718f9c0f 51#include "diagnostic-core.h"
2eb79bbb
SB
52/* Need to include rtl.h, expr.h, etc. for optabs. */
53#include "expr.h"
54#include "optabs.h"
ebfd146a 55
272c6793
RS
56/* Return true if load- or store-lanes optab OPTAB is implemented for
57 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
58
59static bool
60vect_lanes_optab_supported_p (const char *name, convert_optab optab,
61 tree vectype, unsigned HOST_WIDE_INT count)
62{
63 enum machine_mode mode, array_mode;
64 bool limit_p;
65
66 mode = TYPE_MODE (vectype);
67 limit_p = !targetm.array_mode_supported_p (mode, count);
68 array_mode = mode_for_size (count * GET_MODE_BITSIZE (mode),
69 MODE_INT, limit_p);
70
71 if (array_mode == BLKmode)
72 {
73fbfcad 73 if (dump_enabled_p ())
e645e942
TJ
74 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
75 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC "]\n",
78c60e3d 76 GET_MODE_NAME (mode), count);
272c6793
RS
77 return false;
78 }
79
80 if (convert_optab_handler (optab, array_mode, mode) == CODE_FOR_nothing)
81 {
73fbfcad 82 if (dump_enabled_p ())
78c60e3d 83 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 84 "cannot use %s<%s><%s>\n", name,
78c60e3d 85 GET_MODE_NAME (array_mode), GET_MODE_NAME (mode));
272c6793
RS
86 return false;
87 }
88
73fbfcad 89 if (dump_enabled_p ())
78c60e3d 90 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 91 "can use %s<%s><%s>\n", name, GET_MODE_NAME (array_mode),
78c60e3d 92 GET_MODE_NAME (mode));
272c6793
RS
93
94 return true;
95}
96
97
ebfd146a 98/* Return the smallest scalar part of STMT.
ff802fa1
IR
99 This is used to determine the vectype of the stmt. We generally set the
100 vectype according to the type of the result (lhs). For stmts whose
ebfd146a 101 result-type is different than the type of the arguments (e.g., demotion,
b8698a0f 102 promotion), vectype will be reset appropriately (later). Note that we have
ebfd146a 103 to visit the smallest datatype in this function, because that determines the
ff802fa1 104 VF. If the smallest datatype in the loop is present only as the rhs of a
ebfd146a
IR
105 promotion operation - we'd miss it.
106 Such a case, where a variable of this datatype does not appear in the lhs
107 anywhere in the loop, can only occur if it's an invariant: e.g.:
b8698a0f 108 'int_x = (int) short_inv', which we'd expect to have been optimized away by
ff802fa1
IR
109 invariant motion. However, we cannot rely on invariant motion to always
110 take invariants out of the loop, and so in the case of promotion we also
111 have to check the rhs.
ebfd146a
IR
112 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
113 types. */
114
115tree
116vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
117 HOST_WIDE_INT *rhs_size_unit)
118{
119 tree scalar_type = gimple_expr_type (stmt);
120 HOST_WIDE_INT lhs, rhs;
121
122 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
123
124 if (is_gimple_assign (stmt)
125 && (gimple_assign_cast_p (stmt)
126 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
39f3fed6 127 || gimple_assign_rhs_code (stmt) == WIDEN_LSHIFT_EXPR
ebfd146a
IR
128 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
129 {
130 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
131
132 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
133 if (rhs < lhs)
134 scalar_type = rhs_type;
135 }
b8698a0f
L
136
137 *lhs_size_unit = lhs;
ebfd146a
IR
138 *rhs_size_unit = rhs;
139 return scalar_type;
140}
141
142
ebfd146a
IR
143/* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
144 tested at run-time. Return TRUE if DDR was successfully inserted.
145 Return false if versioning is not supported. */
146
147static bool
148vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
149{
150 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
151
152 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
153 return false;
154
73fbfcad 155 if (dump_enabled_p ())
ebfd146a 156 {
78c60e3d
SS
157 dump_printf_loc (MSG_NOTE, vect_location,
158 "mark for run-time aliasing test between ");
159 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_A (ddr)));
160 dump_printf (MSG_NOTE, " and ");
161 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (DDR_B (ddr)));
e645e942 162 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
163 }
164
165 if (optimize_loop_nest_for_size_p (loop))
166 {
73fbfcad 167 if (dump_enabled_p ())
e645e942
TJ
168 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
169 "versioning not supported when optimizing"
170 " for size.\n");
ebfd146a
IR
171 return false;
172 }
173
174 /* FORNOW: We don't support versioning with outer-loop vectorization. */
175 if (loop->inner)
176 {
73fbfcad 177 if (dump_enabled_p ())
e645e942
TJ
178 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
179 "versioning not yet supported for outer-loops.\n");
ebfd146a
IR
180 return false;
181 }
182
319e6439
RG
183 /* FORNOW: We don't support creating runtime alias tests for non-constant
184 step. */
185 if (TREE_CODE (DR_STEP (DDR_A (ddr))) != INTEGER_CST
186 || TREE_CODE (DR_STEP (DDR_B (ddr))) != INTEGER_CST)
187 {
73fbfcad 188 if (dump_enabled_p ())
e645e942 189 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 190 "versioning not yet supported for non-constant "
e645e942 191 "step\n");
319e6439
RG
192 return false;
193 }
194
9771b263 195 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo).safe_push (ddr);
ebfd146a
IR
196 return true;
197}
198
a70d6342 199
ebfd146a
IR
200/* Function vect_analyze_data_ref_dependence.
201
202 Return TRUE if there (might) exist a dependence between a memory-reference
203 DRA and a memory-reference DRB. When versioning for alias may check a
777e1f09
RG
204 dependence at run-time, return FALSE. Adjust *MAX_VF according to
205 the data dependence. */
b8698a0f 206
ebfd146a
IR
207static bool
208vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
5bfdb7d8 209 loop_vec_info loop_vinfo, int *max_vf)
ebfd146a
IR
210{
211 unsigned int i;
5abe1e05 212 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a
IR
213 struct data_reference *dra = DDR_A (ddr);
214 struct data_reference *drb = DDR_B (ddr);
b8698a0f 215 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
ebfd146a 216 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
ebfd146a
IR
217 lambda_vector dist_v;
218 unsigned int loop_depth;
b8698a0f 219
5abe1e05 220 /* In loop analysis all data references should be vectorizable. */
4b5caab7
IR
221 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
222 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
5abe1e05 223 gcc_unreachable ();
4b5caab7 224
5abe1e05 225 /* Independent data accesses. */
ebfd146a 226 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
5abe1e05 227 return false;
a70d6342 228
5abe1e05
RB
229 if (dra == drb
230 || (DR_IS_READ (dra) && DR_IS_READ (drb)))
ebfd146a 231 return false;
b8698a0f 232
5abe1e05 233 /* Unknown data dependence. */
ebfd146a
IR
234 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
235 {
74bf76ed
JJ
236 /* If user asserted safelen consecutive iterations can be
237 executed concurrently, assume independence. */
238 if (loop->safelen >= 2)
239 {
240 if (loop->safelen < *max_vf)
241 *max_vf = loop->safelen;
242 return false;
243 }
244
90eb75f2
RB
245 if (STMT_VINFO_GATHER_P (stmtinfo_a)
246 || STMT_VINFO_GATHER_P (stmtinfo_b))
247 {
248 if (dump_enabled_p ())
249 {
250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
251 "versioning for alias not supported for: "
252 "can't determine dependence between ");
253 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
254 DR_REF (dra));
255 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
256 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
257 DR_REF (drb));
e645e942 258 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
90eb75f2 259 }
fdf6a7b9 260 return true;
90eb75f2
RB
261 }
262
73fbfcad 263 if (dump_enabled_p ())
5abe1e05
RB
264 {
265 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
266 "versioning for alias required: "
267 "can't determine dependence between ");
268 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
269 DR_REF (dra));
270 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
271 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
272 DR_REF (drb));
e645e942 273 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5abe1e05 274 }
e4a707c4 275
5abe1e05
RB
276 /* Add to list of ddrs that need to be tested at run-time. */
277 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
a70d6342
IR
278 }
279
5abe1e05 280 /* Known data dependence. */
ebfd146a
IR
281 if (DDR_NUM_DIST_VECTS (ddr) == 0)
282 {
74bf76ed
JJ
283 /* If user asserted safelen consecutive iterations can be
284 executed concurrently, assume independence. */
285 if (loop->safelen >= 2)
286 {
287 if (loop->safelen < *max_vf)
288 *max_vf = loop->safelen;
289 return false;
290 }
291
90eb75f2
RB
292 if (STMT_VINFO_GATHER_P (stmtinfo_a)
293 || STMT_VINFO_GATHER_P (stmtinfo_b))
294 {
295 if (dump_enabled_p ())
296 {
297 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
298 "versioning for alias not supported for: "
299 "bad dist vector for ");
300 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
301 DR_REF (dra));
302 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
303 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
304 DR_REF (drb));
e645e942 305 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
90eb75f2 306 }
fdf6a7b9 307 return true;
90eb75f2
RB
308 }
309
73fbfcad 310 if (dump_enabled_p ())
ebfd146a 311 {
e645e942 312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
313 "versioning for alias required: "
314 "bad dist vector for ");
315 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
316 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
317 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
e645e942 318 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
319 }
320 /* Add to list of ddrs that need to be tested at run-time. */
321 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
b8698a0f 322 }
ebfd146a
IR
323
324 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
9771b263 325 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
ebfd146a
IR
326 {
327 int dist = dist_v[loop_depth];
328
73fbfcad 329 if (dump_enabled_p ())
78c60e3d 330 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 331 "dependence distance = %d.\n", dist);
ebfd146a 332
777e1f09 333 if (dist == 0)
ebfd146a 334 {
73fbfcad 335 if (dump_enabled_p ())
ebfd146a 336 {
e645e942
TJ
337 dump_printf_loc (MSG_NOTE, vect_location,
338 "dependence distance == 0 between ");
78c60e3d
SS
339 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
340 dump_printf (MSG_NOTE, " and ");
341 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 342 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
343 }
344
5185d248
RB
345 /* When we perform grouped accesses and perform implicit CSE
346 by detecting equal accesses and doing disambiguation with
347 runtime alias tests like for
348 .. = a[i];
349 .. = a[i+1];
350 a[i] = ..;
351 a[i+1] = ..;
352 *p = ..;
353 .. = a[i];
354 .. = a[i+1];
355 where we will end up loading { a[i], a[i+1] } once, make
356 sure that inserting group loads before the first load and
357 stores after the last store will do the right thing. */
358 if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a)
359 && GROUP_SAME_DR_STMT (stmtinfo_a))
360 || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b)
361 && GROUP_SAME_DR_STMT (stmtinfo_b)))
362 {
363 gimple earlier_stmt;
364 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
365 if (DR_IS_WRITE
366 (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
367 {
368 if (dump_enabled_p ())
369 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
370 "READ_WRITE dependence in interleaving."
371 "\n");
5185d248
RB
372 return true;
373 }
ebfd146a 374 }
b8698a0f 375
777e1f09
RG
376 continue;
377 }
378
379 if (dist > 0 && DDR_REVERSED_P (ddr))
380 {
381 /* If DDR_REVERSED_P the order of the data-refs in DDR was
382 reversed (to make distance vector positive), and the actual
383 distance is negative. */
73fbfcad 384 if (dump_enabled_p ())
78c60e3d 385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 386 "dependence distance negative.\n");
777e1f09
RG
387 continue;
388 }
389
390 if (abs (dist) >= 2
391 && abs (dist) < *max_vf)
392 {
393 /* The dependence distance requires reduction of the maximal
394 vectorization factor. */
395 *max_vf = abs (dist);
73fbfcad 396 if (dump_enabled_p ())
78c60e3d 397 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
398 "adjusting maximal vectorization factor to %i\n",
399 *max_vf);
ebfd146a
IR
400 }
401
777e1f09 402 if (abs (dist) >= *max_vf)
ebfd146a 403 {
b8698a0f 404 /* Dependence distance does not create dependence, as far as
777e1f09 405 vectorization is concerned, in this case. */
73fbfcad 406 if (dump_enabled_p ())
78c60e3d 407 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 408 "dependence distance >= VF.\n");
ebfd146a
IR
409 continue;
410 }
411
73fbfcad 412 if (dump_enabled_p ())
ebfd146a 413 {
78c60e3d 414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
415 "not vectorized, possible dependence "
416 "between data-refs ");
78c60e3d
SS
417 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
418 dump_printf (MSG_NOTE, " and ");
419 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 420 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
421 }
422
423 return true;
424 }
425
426 return false;
427}
428
429/* Function vect_analyze_data_ref_dependences.
b8698a0f 430
ebfd146a 431 Examine all the data references in the loop, and make sure there do not
777e1f09
RG
432 exist any data dependences between them. Set *MAX_VF according to
433 the maximum vectorization factor the data dependences allow. */
b8698a0f 434
ebfd146a 435bool
5abe1e05 436vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo, int *max_vf)
ebfd146a
IR
437{
438 unsigned int i;
ebfd146a
IR
439 struct data_dependence_relation *ddr;
440
73fbfcad 441 if (dump_enabled_p ())
78c60e3d 442 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 443 "=== vect_analyze_data_ref_dependences ===\n");
5abe1e05
RB
444
445 if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo),
446 &LOOP_VINFO_DDRS (loop_vinfo),
447 LOOP_VINFO_LOOP_NEST (loop_vinfo), true))
448 return false;
449
450 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo), i, ddr)
451 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
452 return false;
453
454 return true;
455}
456
457
458/* Function vect_slp_analyze_data_ref_dependence.
459
460 Return TRUE if there (might) exist a dependence between a memory-reference
461 DRA and a memory-reference DRB. When versioning for alias may check a
462 dependence at run-time, return FALSE. Adjust *MAX_VF according to
463 the data dependence. */
464
465static bool
466vect_slp_analyze_data_ref_dependence (struct data_dependence_relation *ddr)
467{
468 struct data_reference *dra = DDR_A (ddr);
469 struct data_reference *drb = DDR_B (ddr);
470
471 /* We need to check dependences of statements marked as unvectorizable
472 as well, they still can prohibit vectorization. */
473
474 /* Independent data accesses. */
475 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
476 return false;
477
478 if (dra == drb)
479 return false;
480
481 /* Read-read is OK. */
482 if (DR_IS_READ (dra) && DR_IS_READ (drb))
483 return false;
484
e6c9d234
RB
485 /* If dra and drb are part of the same interleaving chain consider
486 them independent. */
487 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra)))
488 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra)))
489 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb)))))
490 return false;
491
5abe1e05
RB
492 /* Unknown data dependence. */
493 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
fcac74a1 494 {
5abe1e05
RB
495 gimple earlier_stmt;
496
497 if (dump_enabled_p ())
498 {
499 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
500 "can't determine dependence between ");
501 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (dra));
502 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
503 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, DR_REF (drb));
e645e942 504 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5abe1e05
RB
505 }
506
507 /* We do not vectorize basic blocks with write-write dependencies. */
508 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
509 return true;
510
511 /* Check that it's not a load-after-store dependence. */
512 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
513 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
514 return true;
515
516 return false;
fcac74a1 517 }
5abe1e05
RB
518
519 if (dump_enabled_p ())
fcac74a1 520 {
5abe1e05
RB
521 dump_printf_loc (MSG_NOTE, vect_location,
522 "determined dependence between ");
523 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
524 dump_printf (MSG_NOTE, " and ");
525 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 526 dump_printf (MSG_NOTE, "\n");
fcac74a1 527 }
b8698a0f 528
5abe1e05
RB
529 /* Do not vectorize basic blocks with write-write dependences. */
530 if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb))
531 return true;
532
533 /* Check dependence between DRA and DRB for basic block vectorization.
534 If the accesses share same bases and offsets, we can compare their initial
535 constant offsets to decide whether they differ or not. In case of a read-
536 write dependence we check that the load is before the store to ensure that
537 vectorization will not change the order of the accesses. */
538
539 HOST_WIDE_INT type_size_a, type_size_b, init_a, init_b;
540 gimple earlier_stmt;
541
542 /* Check that the data-refs have same bases and offsets. If not, we can't
543 determine if they are dependent. */
544 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0)
545 || !dr_equal_offsets_p (dra, drb))
546 return true;
547
548 /* Check the types. */
549 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
550 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
551
552 if (type_size_a != type_size_b
553 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
554 TREE_TYPE (DR_REF (drb))))
555 return true;
556
557 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
558 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
559
560 /* Two different locations - no dependence. */
561 if (init_a != init_b)
562 return false;
563
564 /* We have a read-write dependence. Check that the load is before the store.
565 When we vectorize basic blocks, vector load can be only before
566 corresponding scalar load, and vector store can be only after its
567 corresponding scalar store. So the order of the acceses is preserved in
568 case the load is before the store. */
569 earlier_stmt = get_earlier_stmt (DR_STMT (dra), DR_STMT (drb));
570 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt))))
571 return false;
572
573 return true;
574}
575
576
577/* Function vect_analyze_data_ref_dependences.
578
579 Examine all the data references in the basic-block, and make sure there
580 do not exist any data dependences between them. Set *MAX_VF according to
581 the maximum vectorization factor the data dependences allow. */
582
583bool
584vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo)
585{
586 struct data_dependence_relation *ddr;
587 unsigned int i;
588
589 if (dump_enabled_p ())
590 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 591 "=== vect_slp_analyze_data_ref_dependences ===\n");
5abe1e05
RB
592
593 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo),
594 &BB_VINFO_DDRS (bb_vinfo),
595 vNULL, true))
596 return false;
597
598 FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo), i, ddr)
599 if (vect_slp_analyze_data_ref_dependence (ddr))
ebfd146a
IR
600 return false;
601
602 return true;
603}
604
605
606/* Function vect_compute_data_ref_alignment
607
608 Compute the misalignment of the data reference DR.
609
610 Output:
611 1. If during the misalignment computation it is found that the data reference
612 cannot be vectorized then false is returned.
613 2. DR_MISALIGNMENT (DR) is defined.
614
615 FOR NOW: No analysis is actually performed. Misalignment is calculated
616 only for trivial cases. TODO. */
617
618static bool
619vect_compute_data_ref_alignment (struct data_reference *dr)
620{
621 gimple stmt = DR_STMT (dr);
b8698a0f 622 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
ebfd146a 623 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 624 struct loop *loop = NULL;
ebfd146a
IR
625 tree ref = DR_REF (dr);
626 tree vectype;
627 tree base, base_addr;
628 bool base_aligned;
629 tree misalign;
630 tree aligned_to, alignment;
b8698a0f 631
73fbfcad 632 if (dump_enabled_p ())
78c60e3d 633 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 634 "vect_compute_data_ref_alignment:\n");
ebfd146a 635
a70d6342
IR
636 if (loop_vinfo)
637 loop = LOOP_VINFO_LOOP (loop_vinfo);
b8698a0f 638
ebfd146a
IR
639 /* Initialize misalignment to unknown. */
640 SET_DR_MISALIGNMENT (dr, -1);
641
7595989b
RG
642 /* Strided loads perform only component accesses, misalignment information
643 is irrelevant for them. */
644 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
645 return true;
646
ebfd146a
IR
647 misalign = DR_INIT (dr);
648 aligned_to = DR_ALIGNED_TO (dr);
649 base_addr = DR_BASE_ADDRESS (dr);
650 vectype = STMT_VINFO_VECTYPE (stmt_info);
651
652 /* In case the dataref is in an inner-loop of the loop that is being
653 vectorized (LOOP), we use the base and misalignment information
ff802fa1 654 relative to the outer-loop (LOOP). This is ok only if the misalignment
ebfd146a
IR
655 stays the same throughout the execution of the inner-loop, which is why
656 we have to check that the stride of the dataref in the inner-loop evenly
657 divides by the vector size. */
a70d6342 658 if (loop && nested_in_vect_loop_p (loop, stmt))
ebfd146a
IR
659 {
660 tree step = DR_STEP (dr);
661 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
b8698a0f 662
ebfd146a
IR
663 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
664 {
73fbfcad 665 if (dump_enabled_p ())
78c60e3d 666 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 667 "inner step divides the vector-size.\n");
ebfd146a
IR
668 misalign = STMT_VINFO_DR_INIT (stmt_info);
669 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
670 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
671 }
672 else
673 {
73fbfcad 674 if (dump_enabled_p ())
78c60e3d 675 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 676 "inner step doesn't divide the vector-size.\n");
ebfd146a
IR
677 misalign = NULL_TREE;
678 }
679 }
680
3ebde0e9
UW
681 /* Similarly, if we're doing basic-block vectorization, we can only use
682 base and misalignment information relative to an innermost loop if the
683 misalignment stays the same throughout the execution of the loop.
684 As above, this is the case if the stride of the dataref evenly divides
685 by the vector size. */
686 if (!loop)
687 {
688 tree step = DR_STEP (dr);
689 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
690
691 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) != 0)
692 {
73fbfcad 693 if (dump_enabled_p ())
e645e942
TJ
694 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
695 "SLP: step doesn't divide the vector-size.\n");
3ebde0e9
UW
696 misalign = NULL_TREE;
697 }
698 }
699
ebfd146a
IR
700 base = build_fold_indirect_ref (base_addr);
701 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
702
703 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
704 || !misalign)
705 {
73fbfcad 706 if (dump_enabled_p ())
ebfd146a 707 {
78c60e3d 708 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 709 "Unknown alignment for access: ");
78c60e3d 710 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, base);
e645e942 711 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
712 }
713 return true;
714 }
715
b8698a0f 716 if ((DECL_P (base)
ebfd146a
IR
717 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
718 alignment) >= 0)
719 || (TREE_CODE (base_addr) == SSA_NAME
720 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
721 TREE_TYPE (base_addr)))),
7cf64710 722 alignment) >= 0)
0eb77834 723 || (get_pointer_alignment (base_addr) >= TYPE_ALIGN (vectype)))
ebfd146a
IR
724 base_aligned = true;
725 else
b8698a0f 726 base_aligned = false;
ebfd146a 727
b8698a0f 728 if (!base_aligned)
ebfd146a 729 {
d6682315
RG
730 /* Do not change the alignment of global variables here if
731 flag_section_anchors is enabled as we already generated
732 RTL for other functions. Most global variables should
733 have been aligned during the IPA increase_alignment pass. */
734 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
735 || (TREE_STATIC (base) && flag_section_anchors))
ebfd146a 736 {
73fbfcad 737 if (dump_enabled_p ())
ebfd146a 738 {
78c60e3d 739 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 740 "can't force alignment of ref: ");
78c60e3d 741 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
e645e942 742 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
743 }
744 return true;
745 }
b8698a0f 746
ebfd146a
IR
747 /* Force the alignment of the decl.
748 NOTE: This is the only change to the code we make during
749 the analysis phase, before deciding to vectorize the loop. */
73fbfcad 750 if (dump_enabled_p ())
720f5239 751 {
78c60e3d
SS
752 dump_printf_loc (MSG_NOTE, vect_location, "force alignment of ");
753 dump_generic_expr (MSG_NOTE, TDF_SLIM, ref);
e645e942 754 dump_printf (MSG_NOTE, "\n");
720f5239
IR
755 }
756
c716e67f
XDL
757 ((dataref_aux *)dr->aux)->base_decl = base;
758 ((dataref_aux *)dr->aux)->base_misaligned = true;
ebfd146a
IR
759 }
760
46241ea9
RG
761 /* If this is a backward running DR then first access in the larger
762 vectype actually is N-1 elements before the address in the DR.
763 Adjust misalign accordingly. */
764 if (tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0)
765 {
766 tree offset = ssize_int (TYPE_VECTOR_SUBPARTS (vectype) - 1);
767 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
768 otherwise we wouldn't be here. */
769 offset = fold_build2 (MULT_EXPR, ssizetype, offset, DR_STEP (dr));
770 /* PLUS because DR_STEP was negative. */
771 misalign = size_binop (PLUS_EXPR, misalign, offset);
772 }
773
ebfd146a
IR
774 /* Modulo alignment. */
775 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
776
cc269bb6 777 if (!tree_fits_uhwi_p (misalign))
ebfd146a
IR
778 {
779 /* Negative or overflowed misalignment value. */
73fbfcad 780 if (dump_enabled_p ())
78c60e3d 781 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 782 "unexpected misalign value\n");
ebfd146a
IR
783 return false;
784 }
785
786 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
787
73fbfcad 788 if (dump_enabled_p ())
ebfd146a 789 {
78c60e3d
SS
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
791 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
792 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, ref);
e645e942 793 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
794 }
795
796 return true;
797}
798
799
800/* Function vect_compute_data_refs_alignment
801
802 Compute the misalignment of data references in the loop.
803 Return FALSE if a data reference is found that cannot be vectorized. */
804
805static bool
b8698a0f 806vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
a70d6342 807 bb_vec_info bb_vinfo)
ebfd146a 808{
9771b263 809 vec<data_reference_p> datarefs;
ebfd146a
IR
810 struct data_reference *dr;
811 unsigned int i;
812
a70d6342
IR
813 if (loop_vinfo)
814 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
815 else
816 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
b8698a0f 817
9771b263 818 FOR_EACH_VEC_ELT (datarefs, i, dr)
4b5caab7
IR
819 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
820 && !vect_compute_data_ref_alignment (dr))
821 {
822 if (bb_vinfo)
823 {
824 /* Mark unsupported statement as unvectorizable. */
825 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
826 continue;
827 }
828 else
829 return false;
830 }
ebfd146a
IR
831
832 return true;
833}
834
835
836/* Function vect_update_misalignment_for_peel
837
838 DR - the data reference whose misalignment is to be adjusted.
839 DR_PEEL - the data reference whose misalignment is being made
840 zero in the vector loop by the peel.
841 NPEEL - the number of iterations in the peel loop if the misalignment
842 of DR_PEEL is known at compile time. */
843
844static void
845vect_update_misalignment_for_peel (struct data_reference *dr,
846 struct data_reference *dr_peel, int npeel)
847{
848 unsigned int i;
9771b263 849 vec<dr_p> same_align_drs;
ebfd146a
IR
850 struct data_reference *current_dr;
851 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
852 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
853 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
854 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
855
856 /* For interleaved data accesses the step in the loop must be multiplied by
857 the size of the interleaving group. */
0d0293ac 858 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
e14c1050 859 dr_size *= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)));
0d0293ac 860 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info))
e14c1050 861 dr_peel_size *= GROUP_SIZE (peel_stmt_info);
ebfd146a
IR
862
863 /* It can be assumed that the data refs with the same alignment as dr_peel
864 are aligned in the vector loop. */
865 same_align_drs
866 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
9771b263 867 FOR_EACH_VEC_ELT (same_align_drs, i, current_dr)
ebfd146a
IR
868 {
869 if (current_dr != dr)
870 continue;
871 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
872 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
873 SET_DR_MISALIGNMENT (dr, 0);
874 return;
875 }
876
877 if (known_alignment_for_access_p (dr)
878 && known_alignment_for_access_p (dr_peel))
879 {
d8ba5b19 880 bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
ebfd146a
IR
881 int misal = DR_MISALIGNMENT (dr);
882 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
d8ba5b19 883 misal += negative ? -npeel * dr_size : npeel * dr_size;
5aea1e76 884 misal &= (TYPE_ALIGN (vectype) / BITS_PER_UNIT) - 1;
ebfd146a
IR
885 SET_DR_MISALIGNMENT (dr, misal);
886 return;
887 }
888
73fbfcad 889 if (dump_enabled_p ())
e645e942 890 dump_printf_loc (MSG_NOTE, vect_location, "Setting misalignment to -1.\n");
ebfd146a
IR
891 SET_DR_MISALIGNMENT (dr, -1);
892}
893
894
895/* Function vect_verify_datarefs_alignment
896
897 Return TRUE if all data references in the loop can be
898 handled with respect to alignment. */
899
a70d6342
IR
900bool
901vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 902{
9771b263 903 vec<data_reference_p> datarefs;
ebfd146a
IR
904 struct data_reference *dr;
905 enum dr_alignment_support supportable_dr_alignment;
906 unsigned int i;
907
a70d6342
IR
908 if (loop_vinfo)
909 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
910 else
911 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
912
9771b263 913 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
914 {
915 gimple stmt = DR_STMT (dr);
916 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
917
38eec4c6
UW
918 if (!STMT_VINFO_RELEVANT_P (stmt_info))
919 continue;
920
4b5caab7
IR
921 /* For interleaving, only the alignment of the first access matters.
922 Skip statements marked as not vectorizable. */
0d0293ac 923 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 924 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
4b5caab7 925 || !STMT_VINFO_VECTORIZABLE (stmt_info))
ebfd146a
IR
926 continue;
927
a82960aa
RG
928 /* Strided loads perform only component accesses, alignment is
929 irrelevant for them. */
930 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
931 continue;
932
720f5239 933 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
ebfd146a
IR
934 if (!supportable_dr_alignment)
935 {
73fbfcad 936 if (dump_enabled_p ())
ebfd146a
IR
937 {
938 if (DR_IS_READ (dr))
78c60e3d
SS
939 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
940 "not vectorized: unsupported unaligned load.");
ebfd146a 941 else
78c60e3d
SS
942 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
943 "not vectorized: unsupported unaligned "
944 "store.");
4b5caab7 945
78c60e3d
SS
946 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
947 DR_REF (dr));
e645e942 948 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
949 }
950 return false;
951 }
73fbfcad 952 if (supportable_dr_alignment != dr_aligned && dump_enabled_p ())
78c60e3d 953 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 954 "Vectorizing an unaligned access.\n");
ebfd146a
IR
955 }
956 return true;
957}
958
4c9bcf89
RG
959/* Given an memory reference EXP return whether its alignment is less
960 than its size. */
961
962static bool
963not_size_aligned (tree exp)
964{
cc269bb6 965 if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp))))
4c9bcf89
RG
966 return true;
967
cd1cae35 968 return (TREE_INT_CST_LOW (TYPE_SIZE (TREE_TYPE (exp)))
4c9bcf89
RG
969 > get_object_alignment (exp));
970}
ebfd146a
IR
971
972/* Function vector_alignment_reachable_p
973
974 Return true if vector alignment for DR is reachable by peeling
975 a few loop iterations. Return false otherwise. */
976
977static bool
978vector_alignment_reachable_p (struct data_reference *dr)
979{
980 gimple stmt = DR_STMT (dr);
981 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
982 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
983
0d0293ac 984 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
ebfd146a
IR
985 {
986 /* For interleaved access we peel only if number of iterations in
987 the prolog loop ({VF - misalignment}), is a multiple of the
988 number of the interleaved accesses. */
989 int elem_size, mis_in_elements;
990 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
991
992 /* FORNOW: handle only known alignment. */
993 if (!known_alignment_for_access_p (dr))
994 return false;
995
996 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
997 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
998
e14c1050 999 if ((nelements - mis_in_elements) % GROUP_SIZE (stmt_info))
ebfd146a
IR
1000 return false;
1001 }
1002
1003 /* If misalignment is known at the compile time then allow peeling
1004 only if natural alignment is reachable through peeling. */
1005 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1006 {
b8698a0f 1007 HOST_WIDE_INT elmsize =
ebfd146a 1008 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
73fbfcad 1009 if (dump_enabled_p ())
ebfd146a 1010 {
e645e942
TJ
1011 dump_printf_loc (MSG_NOTE, vect_location,
1012 "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1013 dump_printf (MSG_NOTE,
1014 ". misalignment = %d.\n", DR_MISALIGNMENT (dr));
ebfd146a
IR
1015 }
1016 if (DR_MISALIGNMENT (dr) % elmsize)
1017 {
73fbfcad 1018 if (dump_enabled_p ())
e645e942
TJ
1019 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1020 "data size does not divide the misalignment.\n");
ebfd146a
IR
1021 return false;
1022 }
1023 }
1024
1025 if (!known_alignment_for_access_p (dr))
1026 {
4c9bcf89
RG
1027 tree type = TREE_TYPE (DR_REF (dr));
1028 bool is_packed = not_size_aligned (DR_REF (dr));
73fbfcad 1029 if (dump_enabled_p ())
e645e942
TJ
1030 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1031 "Unknown misalignment, is_packed = %d\n",is_packed);
afb119be
RB
1032 if ((TYPE_USER_ALIGN (type) && !is_packed)
1033 || targetm.vectorize.vector_alignment_reachable (type, is_packed))
ebfd146a
IR
1034 return true;
1035 else
1036 return false;
1037 }
1038
1039 return true;
1040}
1041
720f5239
IR
1042
1043/* Calculate the cost of the memory access represented by DR. */
1044
92345349 1045static void
720f5239
IR
1046vect_get_data_access_cost (struct data_reference *dr,
1047 unsigned int *inside_cost,
92345349
BS
1048 unsigned int *outside_cost,
1049 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1050{
1051 gimple stmt = DR_STMT (dr);
1052 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1053 int nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1054 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1055 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1056 int ncopies = vf / nunits;
720f5239 1057
38eec4c6 1058 if (DR_IS_READ (dr))
92345349
BS
1059 vect_get_load_cost (dr, ncopies, true, inside_cost, outside_cost,
1060 NULL, body_cost_vec, false);
720f5239 1061 else
92345349 1062 vect_get_store_cost (dr, ncopies, inside_cost, body_cost_vec);
720f5239 1063
73fbfcad 1064 if (dump_enabled_p ())
78c60e3d
SS
1065 dump_printf_loc (MSG_NOTE, vect_location,
1066 "vect_get_data_access_cost: inside_cost = %d, "
e645e942 1067 "outside_cost = %d.\n", *inside_cost, *outside_cost);
720f5239
IR
1068}
1069
1070
720f5239
IR
1071/* Insert DR into peeling hash table with NPEEL as key. */
1072
1073static void
1074vect_peeling_hash_insert (loop_vec_info loop_vinfo, struct data_reference *dr,
1075 int npeel)
1076{
1077 struct _vect_peel_info elem, *slot;
bf190e8d 1078 _vect_peel_info **new_slot;
720f5239
IR
1079 bool supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1080
1081 elem.npeel = npeel;
bf190e8d 1082 slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find (&elem);
720f5239
IR
1083 if (slot)
1084 slot->count++;
1085 else
1086 {
1087 slot = XNEW (struct _vect_peel_info);
1088 slot->npeel = npeel;
1089 slot->dr = dr;
1090 slot->count = 1;
bf190e8d 1091 new_slot = LOOP_VINFO_PEELING_HTAB (loop_vinfo).find_slot (slot, INSERT);
720f5239
IR
1092 *new_slot = slot;
1093 }
1094
d6d11272 1095 if (!supportable_dr_alignment && unlimited_cost_model ())
720f5239
IR
1096 slot->count += VECT_MAX_COST;
1097}
1098
1099
1100/* Traverse peeling hash table to find peeling option that aligns maximum
1101 number of data accesses. */
1102
bf190e8d
LC
1103int
1104vect_peeling_hash_get_most_frequent (_vect_peel_info **slot,
1105 _vect_peel_extended_info *max)
720f5239 1106{
bf190e8d 1107 vect_peel_info elem = *slot;
720f5239 1108
44542f8e
IR
1109 if (elem->count > max->peel_info.count
1110 || (elem->count == max->peel_info.count
1111 && max->peel_info.npeel > elem->npeel))
720f5239
IR
1112 {
1113 max->peel_info.npeel = elem->npeel;
1114 max->peel_info.count = elem->count;
1115 max->peel_info.dr = elem->dr;
1116 }
1117
1118 return 1;
1119}
1120
1121
ff802fa1
IR
1122/* Traverse peeling hash table and calculate cost for each peeling option.
1123 Find the one with the lowest cost. */
720f5239 1124
bf190e8d
LC
1125int
1126vect_peeling_hash_get_lowest_cost (_vect_peel_info **slot,
1127 _vect_peel_extended_info *min)
720f5239 1128{
bf190e8d 1129 vect_peel_info elem = *slot;
720f5239
IR
1130 int save_misalignment, dummy;
1131 unsigned int inside_cost = 0, outside_cost = 0, i;
1132 gimple stmt = DR_STMT (elem->dr);
1133 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1134 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
9771b263 1135 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
720f5239 1136 struct data_reference *dr;
92345349
BS
1137 stmt_vector_for_cost prologue_cost_vec, body_cost_vec, epilogue_cost_vec;
1138 int single_iter_cost;
1139
9771b263
DN
1140 prologue_cost_vec.create (2);
1141 body_cost_vec.create (2);
1142 epilogue_cost_vec.create (2);
720f5239 1143
9771b263 1144 FOR_EACH_VEC_ELT (datarefs, i, dr)
720f5239
IR
1145 {
1146 stmt = DR_STMT (dr);
1147 stmt_info = vinfo_for_stmt (stmt);
1148 /* For interleaving, only the alignment of the first access
1149 matters. */
0d0293ac 1150 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 1151 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
720f5239
IR
1152 continue;
1153
1154 save_misalignment = DR_MISALIGNMENT (dr);
1155 vect_update_misalignment_for_peel (dr, elem->dr, elem->npeel);
92345349
BS
1156 vect_get_data_access_cost (dr, &inside_cost, &outside_cost,
1157 &body_cost_vec);
720f5239
IR
1158 SET_DR_MISALIGNMENT (dr, save_misalignment);
1159 }
1160
92345349
BS
1161 single_iter_cost = vect_get_single_scalar_iteration_cost (loop_vinfo);
1162 outside_cost += vect_get_known_peeling_cost (loop_vinfo, elem->npeel,
1163 &dummy, single_iter_cost,
1164 &prologue_cost_vec,
1165 &epilogue_cost_vec);
1166
1167 /* Prologue and epilogue costs are added to the target model later.
1168 These costs depend only on the scalar iteration cost, the
1169 number of peeling iterations finally chosen, and the number of
1170 misaligned statements. So discard the information found here. */
9771b263
DN
1171 prologue_cost_vec.release ();
1172 epilogue_cost_vec.release ();
720f5239
IR
1173
1174 if (inside_cost < min->inside_cost
1175 || (inside_cost == min->inside_cost && outside_cost < min->outside_cost))
1176 {
1177 min->inside_cost = inside_cost;
1178 min->outside_cost = outside_cost;
9771b263 1179 min->body_cost_vec.release ();
92345349 1180 min->body_cost_vec = body_cost_vec;
720f5239
IR
1181 min->peel_info.dr = elem->dr;
1182 min->peel_info.npeel = elem->npeel;
1183 }
92345349 1184 else
9771b263 1185 body_cost_vec.release ();
720f5239
IR
1186
1187 return 1;
1188}
1189
1190
1191/* Choose best peeling option by traversing peeling hash table and either
1192 choosing an option with the lowest cost (if cost model is enabled) or the
1193 option that aligns as many accesses as possible. */
1194
1195static struct data_reference *
1196vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo,
c3e7ee41 1197 unsigned int *npeel,
92345349 1198 stmt_vector_for_cost *body_cost_vec)
720f5239
IR
1199{
1200 struct _vect_peel_extended_info res;
1201
1202 res.peel_info.dr = NULL;
c3284718 1203 res.body_cost_vec = stmt_vector_for_cost ();
720f5239 1204
d6d11272 1205 if (!unlimited_cost_model ())
720f5239
IR
1206 {
1207 res.inside_cost = INT_MAX;
1208 res.outside_cost = INT_MAX;
bf190e8d
LC
1209 LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1210 .traverse <_vect_peel_extended_info *,
1211 vect_peeling_hash_get_lowest_cost> (&res);
720f5239
IR
1212 }
1213 else
1214 {
1215 res.peel_info.count = 0;
bf190e8d
LC
1216 LOOP_VINFO_PEELING_HTAB (loop_vinfo)
1217 .traverse <_vect_peel_extended_info *,
1218 vect_peeling_hash_get_most_frequent> (&res);
720f5239
IR
1219 }
1220
1221 *npeel = res.peel_info.npeel;
92345349 1222 *body_cost_vec = res.body_cost_vec;
720f5239
IR
1223 return res.peel_info.dr;
1224}
1225
1226
ebfd146a
IR
1227/* Function vect_enhance_data_refs_alignment
1228
1229 This pass will use loop versioning and loop peeling in order to enhance
1230 the alignment of data references in the loop.
1231
1232 FOR NOW: we assume that whatever versioning/peeling takes place, only the
ff802fa1 1233 original loop is to be vectorized. Any other loops that are created by
ebfd146a 1234 the transformations performed in this pass - are not supposed to be
ff802fa1 1235 vectorized. This restriction will be relaxed.
ebfd146a
IR
1236
1237 This pass will require a cost model to guide it whether to apply peeling
ff802fa1 1238 or versioning or a combination of the two. For example, the scheme that
ebfd146a
IR
1239 intel uses when given a loop with several memory accesses, is as follows:
1240 choose one memory access ('p') which alignment you want to force by doing
ff802fa1 1241 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
ebfd146a
IR
1242 other accesses are not necessarily aligned, or (2) use loop versioning to
1243 generate one loop in which all accesses are aligned, and another loop in
1244 which only 'p' is necessarily aligned.
1245
1246 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1247 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1248 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1249
ff802fa1 1250 Devising a cost model is the most critical aspect of this work. It will
ebfd146a 1251 guide us on which access to peel for, whether to use loop versioning, how
ff802fa1 1252 many versions to create, etc. The cost model will probably consist of
ebfd146a
IR
1253 generic considerations as well as target specific considerations (on
1254 powerpc for example, misaligned stores are more painful than misaligned
1255 loads).
1256
1257 Here are the general steps involved in alignment enhancements:
1258
1259 -- original loop, before alignment analysis:
1260 for (i=0; i<N; i++){
1261 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1262 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1263 }
1264
1265 -- After vect_compute_data_refs_alignment:
1266 for (i=0; i<N; i++){
1267 x = q[i]; # DR_MISALIGNMENT(q) = 3
1268 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1269 }
1270
1271 -- Possibility 1: we do loop versioning:
1272 if (p is aligned) {
1273 for (i=0; i<N; i++){ # loop 1A
1274 x = q[i]; # DR_MISALIGNMENT(q) = 3
1275 p[i] = y; # DR_MISALIGNMENT(p) = 0
1276 }
1277 }
1278 else {
1279 for (i=0; i<N; i++){ # loop 1B
1280 x = q[i]; # DR_MISALIGNMENT(q) = 3
1281 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1282 }
1283 }
1284
1285 -- Possibility 2: we do loop peeling:
1286 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1287 x = q[i];
1288 p[i] = y;
1289 }
1290 for (i = 3; i < N; i++){ # loop 2A
1291 x = q[i]; # DR_MISALIGNMENT(q) = 0
1292 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1293 }
1294
1295 -- Possibility 3: combination of loop peeling and versioning:
1296 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1297 x = q[i];
1298 p[i] = y;
1299 }
1300 if (p is aligned) {
1301 for (i = 3; i<N; i++){ # loop 3A
1302 x = q[i]; # DR_MISALIGNMENT(q) = 0
1303 p[i] = y; # DR_MISALIGNMENT(p) = 0
1304 }
1305 }
1306 else {
1307 for (i = 3; i<N; i++){ # loop 3B
1308 x = q[i]; # DR_MISALIGNMENT(q) = 0
1309 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1310 }
1311 }
1312
ff802fa1 1313 These loops are later passed to loop_transform to be vectorized. The
ebfd146a
IR
1314 vectorizer will use the alignment information to guide the transformation
1315 (whether to generate regular loads/stores, or with special handling for
1316 misalignment). */
1317
1318bool
1319vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1320{
9771b263 1321 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
ebfd146a
IR
1322 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1323 enum dr_alignment_support supportable_dr_alignment;
720f5239 1324 struct data_reference *dr0 = NULL, *first_store = NULL;
ebfd146a 1325 struct data_reference *dr;
720f5239 1326 unsigned int i, j;
ebfd146a
IR
1327 bool do_peeling = false;
1328 bool do_versioning = false;
1329 bool stat;
1330 gimple stmt;
1331 stmt_vec_info stmt_info;
720f5239
IR
1332 unsigned int npeel = 0;
1333 bool all_misalignments_unknown = true;
1334 unsigned int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1335 unsigned possible_npeel_number = 1;
1336 tree vectype;
1337 unsigned int nelements, mis, same_align_drs_max = 0;
c3284718 1338 stmt_vector_for_cost body_cost_vec = stmt_vector_for_cost ();
ebfd146a 1339
73fbfcad 1340 if (dump_enabled_p ())
78c60e3d 1341 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1342 "=== vect_enhance_data_refs_alignment ===\n");
ebfd146a
IR
1343
1344 /* While cost model enhancements are expected in the future, the high level
1345 view of the code at this time is as follows:
1346
673beced
RE
1347 A) If there is a misaligned access then see if peeling to align
1348 this access can make all data references satisfy
8f439681
RE
1349 vect_supportable_dr_alignment. If so, update data structures
1350 as needed and return true.
ebfd146a
IR
1351
1352 B) If peeling wasn't possible and there is a data reference with an
1353 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1354 then see if loop versioning checks can be used to make all data
1355 references satisfy vect_supportable_dr_alignment. If so, update
1356 data structures as needed and return true.
1357
1358 C) If neither peeling nor versioning were successful then return false if
1359 any data reference does not satisfy vect_supportable_dr_alignment.
1360
1361 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1362
1363 Note, Possibility 3 above (which is peeling and versioning together) is not
1364 being done at this time. */
1365
1366 /* (1) Peeling to force alignment. */
1367
1368 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1369 Considerations:
1370 + How many accesses will become aligned due to the peeling
1371 - How many accesses will become unaligned due to the peeling,
1372 and the cost of misaligned accesses.
b8698a0f 1373 - The cost of peeling (the extra runtime checks, the increase
720f5239 1374 in code size). */
ebfd146a 1375
9771b263 1376 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1377 {
1378 stmt = DR_STMT (dr);
1379 stmt_info = vinfo_for_stmt (stmt);
1380
38eec4c6 1381 if (!STMT_VINFO_RELEVANT_P (stmt_info))
39becbac
RG
1382 continue;
1383
ebfd146a
IR
1384 /* For interleaving, only the alignment of the first access
1385 matters. */
0d0293ac 1386 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 1387 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
ebfd146a
IR
1388 continue;
1389
39becbac
RG
1390 /* For invariant accesses there is nothing to enhance. */
1391 if (integer_zerop (DR_STEP (dr)))
1392 continue;
1393
319e6439
RG
1394 /* Strided loads perform only component accesses, alignment is
1395 irrelevant for them. */
1396 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1397 continue;
1398
720f5239
IR
1399 supportable_dr_alignment = vect_supportable_dr_alignment (dr, true);
1400 do_peeling = vector_alignment_reachable_p (dr);
1401 if (do_peeling)
ebfd146a 1402 {
720f5239
IR
1403 if (known_alignment_for_access_p (dr))
1404 {
1405 unsigned int npeel_tmp;
d8ba5b19
RG
1406 bool negative = tree_int_cst_compare (DR_STEP (dr),
1407 size_zero_node) < 0;
720f5239
IR
1408
1409 /* Save info about DR in the hash table. */
bf190e8d
LC
1410 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo).is_created ())
1411 LOOP_VINFO_PEELING_HTAB (loop_vinfo).create (1);
720f5239
IR
1412
1413 vectype = STMT_VINFO_VECTYPE (stmt_info);
1414 nelements = TYPE_VECTOR_SUBPARTS (vectype);
1415 mis = DR_MISALIGNMENT (dr) / GET_MODE_SIZE (TYPE_MODE (
1416 TREE_TYPE (DR_REF (dr))));
d8ba5b19 1417 npeel_tmp = (negative
8b8bba2d
RG
1418 ? (mis - nelements) : (nelements - mis))
1419 & (nelements - 1);
720f5239
IR
1420
1421 /* For multiple types, it is possible that the bigger type access
ff802fa1 1422 will have more than one peeling option. E.g., a loop with two
720f5239 1423 types: one of size (vector size / 4), and the other one of
ff802fa1 1424 size (vector size / 8). Vectorization factor will 8. If both
720f5239 1425 access are misaligned by 3, the first one needs one scalar
ff802fa1 1426 iteration to be aligned, and the second one needs 5. But the
720f5239
IR
1427 the first one will be aligned also by peeling 5 scalar
1428 iterations, and in that case both accesses will be aligned.
1429 Hence, except for the immediate peeling amount, we also want
1430 to try to add full vector size, while we don't exceed
1431 vectorization factor.
1432 We do this automtically for cost model, since we calculate cost
1433 for every peeling option. */
d6d11272 1434 if (unlimited_cost_model ())
720f5239
IR
1435 possible_npeel_number = vf /nelements;
1436
1437 /* Handle the aligned case. We may decide to align some other
1438 access, making DR unaligned. */
1439 if (DR_MISALIGNMENT (dr) == 0)
1440 {
1441 npeel_tmp = 0;
d6d11272 1442 if (unlimited_cost_model ())
720f5239
IR
1443 possible_npeel_number++;
1444 }
1445
1446 for (j = 0; j < possible_npeel_number; j++)
1447 {
1448 gcc_assert (npeel_tmp <= vf);
1449 vect_peeling_hash_insert (loop_vinfo, dr, npeel_tmp);
1450 npeel_tmp += nelements;
1451 }
1452
1453 all_misalignments_unknown = false;
1454 /* Data-ref that was chosen for the case that all the
1455 misalignments are unknown is not relevant anymore, since we
1456 have a data-ref with known alignment. */
1457 dr0 = NULL;
1458 }
1459 else
1460 {
4ba5ea11
RB
1461 /* If we don't know any misalignment values, we prefer
1462 peeling for data-ref that has the maximum number of data-refs
720f5239
IR
1463 with the same alignment, unless the target prefers to align
1464 stores over load. */
1465 if (all_misalignments_unknown)
1466 {
4ba5ea11
RB
1467 unsigned same_align_drs
1468 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info).length ();
1469 if (!dr0
1470 || same_align_drs_max < same_align_drs)
720f5239 1471 {
4ba5ea11 1472 same_align_drs_max = same_align_drs;
720f5239
IR
1473 dr0 = dr;
1474 }
4ba5ea11
RB
1475 /* For data-refs with the same number of related
1476 accesses prefer the one where the misalign
1477 computation will be invariant in the outermost loop. */
1478 else if (same_align_drs_max == same_align_drs)
1479 {
1480 struct loop *ivloop0, *ivloop;
1481 ivloop0 = outermost_invariant_loop_for_expr
1482 (loop, DR_BASE_ADDRESS (dr0));
1483 ivloop = outermost_invariant_loop_for_expr
1484 (loop, DR_BASE_ADDRESS (dr));
1485 if ((ivloop && !ivloop0)
1486 || (ivloop && ivloop0
1487 && flow_loop_nested_p (ivloop, ivloop0)))
1488 dr0 = dr;
1489 }
720f5239 1490
b0af49c4 1491 if (!first_store && DR_IS_WRITE (dr))
720f5239
IR
1492 first_store = dr;
1493 }
1494
1495 /* If there are both known and unknown misaligned accesses in the
1496 loop, we choose peeling amount according to the known
1497 accesses. */
720f5239
IR
1498 if (!supportable_dr_alignment)
1499 {
1500 dr0 = dr;
b0af49c4 1501 if (!first_store && DR_IS_WRITE (dr))
720f5239
IR
1502 first_store = dr;
1503 }
1504 }
1505 }
1506 else
1507 {
1508 if (!aligned_access_p (dr))
1509 {
73fbfcad 1510 if (dump_enabled_p ())
e645e942
TJ
1511 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1512 "vector alignment may not be reachable\n");
720f5239
IR
1513 break;
1514 }
1515 }
ebfd146a
IR
1516 }
1517
afb119be
RB
1518 /* Check if we can possibly peel the loop. */
1519 if (!vect_can_advance_ivs_p (loop_vinfo)
ebfd146a
IR
1520 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1521 do_peeling = false;
1522
720f5239
IR
1523 if (do_peeling && all_misalignments_unknown
1524 && vect_supportable_dr_alignment (dr0, false))
1525 {
1526
1527 /* Check if the target requires to prefer stores over loads, i.e., if
1528 misaligned stores are more expensive than misaligned loads (taking
1529 drs with same alignment into account). */
1530 if (first_store && DR_IS_READ (dr0))
1531 {
1532 unsigned int load_inside_cost = 0, load_outside_cost = 0;
1533 unsigned int store_inside_cost = 0, store_outside_cost = 0;
1534 unsigned int load_inside_penalty = 0, load_outside_penalty = 0;
1535 unsigned int store_inside_penalty = 0, store_outside_penalty = 0;
9771b263
DN
1536 stmt_vector_for_cost dummy;
1537 dummy.create (2);
92345349
BS
1538
1539 vect_get_data_access_cost (dr0, &load_inside_cost, &load_outside_cost,
1540 &dummy);
1541 vect_get_data_access_cost (first_store, &store_inside_cost,
1542 &store_outside_cost, &dummy);
720f5239 1543
9771b263 1544 dummy.release ();
720f5239
IR
1545
1546 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1547 aligning the load DR0). */
1548 load_inside_penalty = store_inside_cost;
1549 load_outside_penalty = store_outside_cost;
9771b263
DN
1550 for (i = 0;
1551 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1552 DR_STMT (first_store))).iterate (i, &dr);
720f5239
IR
1553 i++)
1554 if (DR_IS_READ (dr))
1555 {
1556 load_inside_penalty += load_inside_cost;
1557 load_outside_penalty += load_outside_cost;
1558 }
1559 else
1560 {
1561 load_inside_penalty += store_inside_cost;
1562 load_outside_penalty += store_outside_cost;
1563 }
1564
1565 /* Calculate the penalty for leaving DR0 unaligned (by
1566 aligning the FIRST_STORE). */
1567 store_inside_penalty = load_inside_cost;
1568 store_outside_penalty = load_outside_cost;
9771b263
DN
1569 for (i = 0;
1570 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1571 DR_STMT (dr0))).iterate (i, &dr);
720f5239
IR
1572 i++)
1573 if (DR_IS_READ (dr))
1574 {
1575 store_inside_penalty += load_inside_cost;
1576 store_outside_penalty += load_outside_cost;
1577 }
1578 else
1579 {
1580 store_inside_penalty += store_inside_cost;
1581 store_outside_penalty += store_outside_cost;
1582 }
1583
1584 if (load_inside_penalty > store_inside_penalty
1585 || (load_inside_penalty == store_inside_penalty
1586 && load_outside_penalty > store_outside_penalty))
1587 dr0 = first_store;
1588 }
1589
1590 /* In case there are only loads with different unknown misalignments, use
1591 peeling only if it may help to align other accesses in the loop. */
9771b263
DN
1592 if (!first_store
1593 && !STMT_VINFO_SAME_ALIGN_REFS (
1594 vinfo_for_stmt (DR_STMT (dr0))).length ()
720f5239
IR
1595 && vect_supportable_dr_alignment (dr0, false)
1596 != dr_unaligned_supported)
1597 do_peeling = false;
1598 }
1599
1600 if (do_peeling && !dr0)
1601 {
1602 /* Peeling is possible, but there is no data access that is not supported
1603 unless aligned. So we try to choose the best possible peeling. */
1604
1605 /* We should get here only if there are drs with known misalignment. */
1606 gcc_assert (!all_misalignments_unknown);
1607
1608 /* Choose the best peeling from the hash table. */
c3e7ee41 1609 dr0 = vect_peeling_hash_choose_best_peeling (loop_vinfo, &npeel,
92345349 1610 &body_cost_vec);
720f5239
IR
1611 if (!dr0 || !npeel)
1612 do_peeling = false;
1613 }
1614
ebfd146a
IR
1615 if (do_peeling)
1616 {
720f5239
IR
1617 stmt = DR_STMT (dr0);
1618 stmt_info = vinfo_for_stmt (stmt);
1619 vectype = STMT_VINFO_VECTYPE (stmt_info);
1620 nelements = TYPE_VECTOR_SUBPARTS (vectype);
ebfd146a
IR
1621
1622 if (known_alignment_for_access_p (dr0))
1623 {
d8ba5b19
RG
1624 bool negative = tree_int_cst_compare (DR_STEP (dr0),
1625 size_zero_node) < 0;
720f5239
IR
1626 if (!npeel)
1627 {
1628 /* Since it's known at compile time, compute the number of
1629 iterations in the peeled loop (the peeling factor) for use in
1630 updating DR_MISALIGNMENT values. The peeling factor is the
1631 vectorization factor minus the misalignment as an element
1632 count. */
1633 mis = DR_MISALIGNMENT (dr0);
1634 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
8b8bba2d
RG
1635 npeel = ((negative ? mis - nelements : nelements - mis)
1636 & (nelements - 1));
720f5239 1637 }
ebfd146a 1638
b8698a0f 1639 /* For interleaved data access every iteration accesses all the
ebfd146a
IR
1640 members of the group, therefore we divide the number of iterations
1641 by the group size. */
b8698a0f 1642 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
0d0293ac 1643 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
e14c1050 1644 npeel /= GROUP_SIZE (stmt_info);
ebfd146a 1645
73fbfcad 1646 if (dump_enabled_p ())
78c60e3d 1647 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1648 "Try peeling by %d\n", npeel);
ebfd146a
IR
1649 }
1650
1651 /* Ensure that all data refs can be vectorized after the peel. */
9771b263 1652 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1653 {
1654 int save_misalignment;
1655
1656 if (dr == dr0)
1657 continue;
1658
1659 stmt = DR_STMT (dr);
1660 stmt_info = vinfo_for_stmt (stmt);
1661 /* For interleaving, only the alignment of the first access
1662 matters. */
0d0293ac 1663 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 1664 && GROUP_FIRST_ELEMENT (stmt_info) != stmt)
ebfd146a
IR
1665 continue;
1666
319e6439
RG
1667 /* Strided loads perform only component accesses, alignment is
1668 irrelevant for them. */
1669 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1670 continue;
1671
ebfd146a
IR
1672 save_misalignment = DR_MISALIGNMENT (dr);
1673 vect_update_misalignment_for_peel (dr, dr0, npeel);
720f5239 1674 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
ebfd146a 1675 SET_DR_MISALIGNMENT (dr, save_misalignment);
b8698a0f 1676
ebfd146a
IR
1677 if (!supportable_dr_alignment)
1678 {
1679 do_peeling = false;
1680 break;
1681 }
1682 }
1683
720f5239
IR
1684 if (do_peeling && known_alignment_for_access_p (dr0) && npeel == 0)
1685 {
1686 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1687 if (!stat)
1688 do_peeling = false;
1689 else
c7e62a26 1690 {
9771b263 1691 body_cost_vec.release ();
c7e62a26
RG
1692 return stat;
1693 }
720f5239
IR
1694 }
1695
4f17aa0b
XDL
1696 if (do_peeling)
1697 {
1698 unsigned max_allowed_peel
1699 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT);
1700 if (max_allowed_peel != (unsigned)-1)
1701 {
1702 unsigned max_peel = npeel;
1703 if (max_peel == 0)
1704 {
1705 gimple dr_stmt = DR_STMT (dr0);
1706 stmt_vec_info vinfo = vinfo_for_stmt (dr_stmt);
1707 tree vtype = STMT_VINFO_VECTYPE (vinfo);
1708 max_peel = TYPE_VECTOR_SUBPARTS (vtype) - 1;
1709 }
1710 if (max_peel > max_allowed_peel)
1711 {
1712 do_peeling = false;
1713 if (dump_enabled_p ())
1714 dump_printf_loc (MSG_NOTE, vect_location,
1715 "Disable peeling, max peels reached: %d\n", max_peel);
1716 }
1717 }
1718 }
1719
ebfd146a
IR
1720 if (do_peeling)
1721 {
c3e7ee41 1722 stmt_info_for_cost *si;
92345349 1723 void *data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
c3e7ee41 1724
ebfd146a
IR
1725 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1726 If the misalignment of DR_i is identical to that of dr0 then set
1727 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1728 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1729 by the peeling factor times the element size of DR_i (MOD the
1730 vectorization factor times the size). Otherwise, the
1731 misalignment of DR_i must be set to unknown. */
9771b263 1732 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1733 if (dr != dr0)
1734 vect_update_misalignment_for_peel (dr, dr0, npeel);
1735
1736 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
720f5239
IR
1737 if (npeel)
1738 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = npeel;
1739 else
1740 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
ebfd146a 1741 SET_DR_MISALIGNMENT (dr0, 0);
73fbfcad 1742 if (dump_enabled_p ())
78c60e3d
SS
1743 {
1744 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1745 "Alignment of access forced using peeling.\n");
78c60e3d 1746 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1747 "Peeling for alignment will be applied.\n");
78c60e3d 1748 }
c3e7ee41
BS
1749 /* We've delayed passing the inside-loop peeling costs to the
1750 target cost model until we were sure peeling would happen.
1751 Do so now. */
9771b263 1752 if (body_cost_vec.exists ())
c3e7ee41 1753 {
9771b263 1754 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
92345349
BS
1755 {
1756 struct _stmt_vec_info *stmt_info
1757 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1758 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1759 si->misalign, vect_body);
1760 }
9771b263 1761 body_cost_vec.release ();
c3e7ee41
BS
1762 }
1763
a70d6342 1764 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
ebfd146a
IR
1765 gcc_assert (stat);
1766 return stat;
1767 }
1768 }
1769
9771b263 1770 body_cost_vec.release ();
ebfd146a
IR
1771
1772 /* (2) Versioning to force alignment. */
1773
1774 /* Try versioning if:
d6d11272
XDL
1775 1) optimize loop for speed
1776 2) there is at least one unsupported misaligned data ref with an unknown
ebfd146a 1777 misalignment, and
d6d11272
XDL
1778 3) all misaligned data refs with a known misalignment are supported, and
1779 4) the number of runtime alignment checks is within reason. */
ebfd146a 1780
b8698a0f 1781 do_versioning =
d6d11272 1782 optimize_loop_nest_for_speed_p (loop)
ebfd146a
IR
1783 && (!loop->inner); /* FORNOW */
1784
1785 if (do_versioning)
1786 {
9771b263 1787 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
1788 {
1789 stmt = DR_STMT (dr);
1790 stmt_info = vinfo_for_stmt (stmt);
1791
1792 /* For interleaving, only the alignment of the first access
1793 matters. */
1794 if (aligned_access_p (dr)
0d0293ac 1795 || (STMT_VINFO_GROUPED_ACCESS (stmt_info)
e14c1050 1796 && GROUP_FIRST_ELEMENT (stmt_info) != stmt))
ebfd146a
IR
1797 continue;
1798
319e6439
RG
1799 /* Strided loads perform only component accesses, alignment is
1800 irrelevant for them. */
1801 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info))
1802 continue;
1803
720f5239 1804 supportable_dr_alignment = vect_supportable_dr_alignment (dr, false);
ebfd146a
IR
1805
1806 if (!supportable_dr_alignment)
1807 {
1808 gimple stmt;
1809 int mask;
1810 tree vectype;
1811
1812 if (known_alignment_for_access_p (dr)
9771b263 1813 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).length ()
ebfd146a
IR
1814 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1815 {
1816 do_versioning = false;
1817 break;
1818 }
1819
1820 stmt = DR_STMT (dr);
1821 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1822 gcc_assert (vectype);
b8698a0f 1823
ebfd146a
IR
1824 /* The rightmost bits of an aligned address must be zeros.
1825 Construct the mask needed for this test. For example,
1826 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1827 mask must be 15 = 0xf. */
1828 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1829
1830 /* FORNOW: use the same mask to test all potentially unaligned
1831 references in the loop. The vectorizer currently supports
1832 a single vector size, see the reference to
1833 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1834 vectorization factor is computed. */
1835 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1836 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1837 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
9771b263
DN
1838 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).safe_push (
1839 DR_STMT (dr));
ebfd146a
IR
1840 }
1841 }
b8698a0f 1842
ebfd146a 1843 /* Versioning requires at least one misaligned data reference. */
e9dbe7bb 1844 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
ebfd146a
IR
1845 do_versioning = false;
1846 else if (!do_versioning)
9771b263 1847 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo).truncate (0);
ebfd146a
IR
1848 }
1849
1850 if (do_versioning)
1851 {
9771b263 1852 vec<gimple> may_misalign_stmts
ebfd146a
IR
1853 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1854 gimple stmt;
1855
1856 /* It can now be assumed that the data references in the statements
1857 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1858 of the loop being vectorized. */
9771b263 1859 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt)
ebfd146a
IR
1860 {
1861 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1862 dr = STMT_VINFO_DATA_REF (stmt_info);
1863 SET_DR_MISALIGNMENT (dr, 0);
73fbfcad 1864 if (dump_enabled_p ())
e645e942
TJ
1865 dump_printf_loc (MSG_NOTE, vect_location,
1866 "Alignment of access forced using versioning.\n");
ebfd146a
IR
1867 }
1868
73fbfcad 1869 if (dump_enabled_p ())
e645e942
TJ
1870 dump_printf_loc (MSG_NOTE, vect_location,
1871 "Versioning for alignment will be applied.\n");
ebfd146a
IR
1872
1873 /* Peeling and versioning can't be done together at this time. */
1874 gcc_assert (! (do_peeling && do_versioning));
1875
a70d6342 1876 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
ebfd146a
IR
1877 gcc_assert (stat);
1878 return stat;
1879 }
1880
1881 /* This point is reached if neither peeling nor versioning is being done. */
1882 gcc_assert (! (do_peeling || do_versioning));
1883
a70d6342 1884 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
ebfd146a
IR
1885 return stat;
1886}
1887
1888
777e1f09
RG
1889/* Function vect_find_same_alignment_drs.
1890
1891 Update group and alignment relations according to the chosen
1892 vectorization factor. */
1893
1894static void
1895vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1896 loop_vec_info loop_vinfo)
1897{
1898 unsigned int i;
1899 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1900 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1901 struct data_reference *dra = DDR_A (ddr);
1902 struct data_reference *drb = DDR_B (ddr);
1903 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1904 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1905 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1906 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1907 lambda_vector dist_v;
1908 unsigned int loop_depth;
1909
1910 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1911 return;
1912
720f5239 1913 if (dra == drb)
777e1f09
RG
1914 return;
1915
1916 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1917 return;
1918
1919 /* Loop-based vectorization and known data dependence. */
1920 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1921 return;
1922
46241ea9
RG
1923 /* Data-dependence analysis reports a distance vector of zero
1924 for data-references that overlap only in the first iteration
1925 but have different sign step (see PR45764).
1926 So as a sanity check require equal DR_STEP. */
1927 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
1928 return;
1929
777e1f09 1930 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
9771b263 1931 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
777e1f09
RG
1932 {
1933 int dist = dist_v[loop_depth];
1934
73fbfcad 1935 if (dump_enabled_p ())
78c60e3d 1936 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1937 "dependence distance = %d.\n", dist);
777e1f09
RG
1938
1939 /* Same loop iteration. */
1940 if (dist == 0
1941 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1942 {
1943 /* Two references with distance zero have the same alignment. */
9771b263
DN
1944 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a).safe_push (drb);
1945 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b).safe_push (dra);
73fbfcad 1946 if (dump_enabled_p ())
777e1f09 1947 {
e645e942
TJ
1948 dump_printf_loc (MSG_NOTE, vect_location,
1949 "accesses have the same alignment.\n");
78c60e3d 1950 dump_printf (MSG_NOTE,
e645e942 1951 "dependence distance modulo vf == 0 between ");
78c60e3d
SS
1952 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
1953 dump_printf (MSG_NOTE, " and ");
1954 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 1955 dump_printf (MSG_NOTE, "\n");
777e1f09
RG
1956 }
1957 }
1958 }
1959}
1960
1961
ebfd146a
IR
1962/* Function vect_analyze_data_refs_alignment
1963
1964 Analyze the alignment of the data-references in the loop.
1965 Return FALSE if a data reference is found that cannot be vectorized. */
1966
1967bool
b8698a0f 1968vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
a70d6342 1969 bb_vec_info bb_vinfo)
ebfd146a 1970{
73fbfcad 1971 if (dump_enabled_p ())
78c60e3d 1972 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1973 "=== vect_analyze_data_refs_alignment ===\n");
ebfd146a 1974
777e1f09
RG
1975 /* Mark groups of data references with same alignment using
1976 data dependence information. */
1977 if (loop_vinfo)
1978 {
9771b263 1979 vec<ddr_p> ddrs = LOOP_VINFO_DDRS (loop_vinfo);
777e1f09
RG
1980 struct data_dependence_relation *ddr;
1981 unsigned int i;
1982
9771b263 1983 FOR_EACH_VEC_ELT (ddrs, i, ddr)
777e1f09
RG
1984 vect_find_same_alignment_drs (ddr, loop_vinfo);
1985 }
1986
a70d6342 1987 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
ebfd146a 1988 {
73fbfcad 1989 if (dump_enabled_p ())
e645e942
TJ
1990 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1991 "not vectorized: can't calculate alignment "
1992 "for data ref.\n");
ebfd146a
IR
1993 return false;
1994 }
1995
1996 return true;
1997}
1998
1999
0d0293ac
MM
2000/* Analyze groups of accesses: check that DR belongs to a group of
2001 accesses of legal size, step, etc. Detect gaps, single element
2002 interleaving, and other special cases. Set grouped access info.
ebfd146a
IR
2003 Collect groups of strided stores for further use in SLP analysis. */
2004
2005static bool
2006vect_analyze_group_access (struct data_reference *dr)
2007{
2008 tree step = DR_STEP (dr);
2009 tree scalar_type = TREE_TYPE (DR_REF (dr));
2010 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
2011 gimple stmt = DR_STMT (dr);
2012 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2013 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 2014 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
ebfd146a 2015 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
0d0293ac 2016 HOST_WIDE_INT groupsize, last_accessed_element = 1;
ebfd146a 2017 bool slp_impossible = false;
deaf836c
IR
2018 struct loop *loop = NULL;
2019
2020 if (loop_vinfo)
2021 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 2022
0d0293ac
MM
2023 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2024 size of the interleaving group (including gaps). */
08940f33 2025 groupsize = absu_hwi (dr_step) / type_size;
ebfd146a
IR
2026
2027 /* Not consecutive access is possible only if it is a part of interleaving. */
e14c1050 2028 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a
IR
2029 {
2030 /* Check if it this DR is a part of interleaving, and is a single
2031 element of the group that is accessed in the loop. */
b8698a0f 2032
ebfd146a
IR
2033 /* Gaps are supported only for loads. STEP must be a multiple of the type
2034 size. The size of the group must be a power of 2. */
2035 if (DR_IS_READ (dr)
2036 && (dr_step % type_size) == 0
0d0293ac
MM
2037 && groupsize > 0
2038 && exact_log2 (groupsize) != -1)
ebfd146a 2039 {
e14c1050 2040 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = stmt;
0d0293ac 2041 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
73fbfcad 2042 if (dump_enabled_p ())
ebfd146a 2043 {
e645e942
TJ
2044 dump_printf_loc (MSG_NOTE, vect_location,
2045 "Detected single element interleaving ");
78c60e3d
SS
2046 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dr));
2047 dump_printf (MSG_NOTE, " step ");
2048 dump_generic_expr (MSG_NOTE, TDF_SLIM, step);
e645e942 2049 dump_printf (MSG_NOTE, "\n");
ebfd146a 2050 }
48df3fa6
IR
2051
2052 if (loop_vinfo)
2053 {
73fbfcad 2054 if (dump_enabled_p ())
78c60e3d 2055 dump_printf_loc (MSG_NOTE, vect_location,
e645e942
TJ
2056 "Data access with gaps requires scalar "
2057 "epilogue loop\n");
deaf836c
IR
2058 if (loop->inner)
2059 {
73fbfcad 2060 if (dump_enabled_p ())
78c60e3d
SS
2061 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2062 "Peeling for outer loop is not"
e645e942 2063 " supported\n");
deaf836c
IR
2064 return false;
2065 }
2066
2067 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
48df3fa6
IR
2068 }
2069
ebfd146a
IR
2070 return true;
2071 }
4b5caab7 2072
73fbfcad 2073 if (dump_enabled_p ())
4b5caab7 2074 {
78c60e3d 2075 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
2076 "not consecutive access ");
2077 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
2078 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4b5caab7
IR
2079 }
2080
2081 if (bb_vinfo)
2082 {
2083 /* Mark the statement as unvectorizable. */
2084 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2085 return true;
2086 }
78c60e3d 2087
ebfd146a
IR
2088 return false;
2089 }
2090
e14c1050 2091 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt)
ebfd146a
IR
2092 {
2093 /* First stmt in the interleaving chain. Check the chain. */
e14c1050 2094 gimple next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
ebfd146a 2095 struct data_reference *data_ref = dr;
df398a37 2096 unsigned int count = 1;
ebfd146a
IR
2097 tree prev_init = DR_INIT (data_ref);
2098 gimple prev = stmt;
08940f33
RB
2099 HOST_WIDE_INT diff, gaps = 0;
2100 unsigned HOST_WIDE_INT count_in_bytes;
ebfd146a
IR
2101
2102 while (next)
2103 {
ff802fa1
IR
2104 /* Skip same data-refs. In case that two or more stmts share
2105 data-ref (supported only for loads), we vectorize only the first
2106 stmt, and the rest get their vectorized loads from the first
2107 one. */
ebfd146a
IR
2108 if (!tree_int_cst_compare (DR_INIT (data_ref),
2109 DR_INIT (STMT_VINFO_DATA_REF (
2110 vinfo_for_stmt (next)))))
2111 {
b0af49c4 2112 if (DR_IS_WRITE (data_ref))
ebfd146a 2113 {
73fbfcad 2114 if (dump_enabled_p ())
e645e942
TJ
2115 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2116 "Two store stmts share the same dr.\n");
ebfd146a
IR
2117 return false;
2118 }
2119
ebfd146a 2120 /* For load use the same data-ref load. */
e14c1050 2121 GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
ebfd146a
IR
2122
2123 prev = next;
e14c1050 2124 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
ebfd146a
IR
2125 continue;
2126 }
48df3fa6 2127
ebfd146a 2128 prev = next;
08940f33 2129 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
ebfd146a 2130
08940f33
RB
2131 /* All group members have the same STEP by construction. */
2132 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref), step, 0));
ebfd146a 2133
ebfd146a
IR
2134 /* Check that the distance between two accesses is equal to the type
2135 size. Otherwise, we have gaps. */
2136 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
2137 - TREE_INT_CST_LOW (prev_init)) / type_size;
2138 if (diff != 1)
2139 {
2140 /* FORNOW: SLP of accesses with gaps is not supported. */
2141 slp_impossible = true;
b0af49c4 2142 if (DR_IS_WRITE (data_ref))
ebfd146a 2143 {
73fbfcad 2144 if (dump_enabled_p ())
e645e942
TJ
2145 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2146 "interleaved store with gaps\n");
ebfd146a
IR
2147 return false;
2148 }
4da39468
IR
2149
2150 gaps += diff - 1;
ebfd146a
IR
2151 }
2152
48df3fa6
IR
2153 last_accessed_element += diff;
2154
ebfd146a 2155 /* Store the gap from the previous member of the group. If there is no
e14c1050
IR
2156 gap in the access, GROUP_GAP is always 1. */
2157 GROUP_GAP (vinfo_for_stmt (next)) = diff;
ebfd146a
IR
2158
2159 prev_init = DR_INIT (data_ref);
e14c1050 2160 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
ebfd146a
IR
2161 /* Count the number of data-refs in the chain. */
2162 count++;
2163 }
2164
2165 /* COUNT is the number of accesses found, we multiply it by the size of
2166 the type to get COUNT_IN_BYTES. */
2167 count_in_bytes = type_size * count;
2168
b8698a0f 2169 /* Check that the size of the interleaving (including gaps) is not
a70d6342 2170 greater than STEP. */
08940f33
RB
2171 if (dr_step != 0
2172 && absu_hwi (dr_step) < count_in_bytes + gaps * type_size)
ebfd146a 2173 {
73fbfcad 2174 if (dump_enabled_p ())
ebfd146a 2175 {
e645e942 2176 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2177 "interleaving size is greater than step for ");
e645e942
TJ
2178 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2179 DR_REF (dr));
2180 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
2181 }
2182 return false;
2183 }
2184
2185 /* Check that the size of the interleaving is equal to STEP for stores,
2186 i.e., that there are no gaps. */
08940f33
RB
2187 if (dr_step != 0
2188 && absu_hwi (dr_step) != count_in_bytes)
ebfd146a
IR
2189 {
2190 if (DR_IS_READ (dr))
2191 {
2192 slp_impossible = true;
2193 /* There is a gap after the last load in the group. This gap is a
0d0293ac
MM
2194 difference between the groupsize and the number of elements.
2195 When there is no gap, this difference should be 0. */
2196 GROUP_GAP (vinfo_for_stmt (stmt)) = groupsize - count;
ebfd146a
IR
2197 }
2198 else
2199 {
73fbfcad 2200 if (dump_enabled_p ())
e645e942
TJ
2201 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2202 "interleaved store with gaps\n");
ebfd146a
IR
2203 return false;
2204 }
2205 }
2206
2207 /* Check that STEP is a multiple of type size. */
08940f33
RB
2208 if (dr_step != 0
2209 && (dr_step % type_size) != 0)
ebfd146a 2210 {
73fbfcad 2211 if (dump_enabled_p ())
ebfd146a 2212 {
78c60e3d
SS
2213 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2214 "step is not a multiple of type size: step ");
2215 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, step);
2216 dump_printf (MSG_MISSED_OPTIMIZATION, " size ");
2217 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2218 TYPE_SIZE_UNIT (scalar_type));
e645e942 2219 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
2220 }
2221 return false;
2222 }
2223
0d0293ac
MM
2224 if (groupsize == 0)
2225 groupsize = count;
b8698a0f 2226
0d0293ac 2227 GROUP_SIZE (vinfo_for_stmt (stmt)) = groupsize;
73fbfcad 2228 if (dump_enabled_p ())
e645e942
TJ
2229 dump_printf_loc (MSG_NOTE, vect_location,
2230 "Detected interleaving of size %d\n", (int)groupsize);
ebfd146a 2231
b8698a0f 2232 /* SLP: create an SLP data structure for every interleaving group of
ebfd146a 2233 stores for further analysis in vect_analyse_slp. */
b0af49c4 2234 if (DR_IS_WRITE (dr) && !slp_impossible)
a70d6342
IR
2235 {
2236 if (loop_vinfo)
9771b263 2237 LOOP_VINFO_GROUPED_STORES (loop_vinfo).safe_push (stmt);
a70d6342 2238 if (bb_vinfo)
9771b263 2239 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt);
a70d6342 2240 }
48df3fa6
IR
2241
2242 /* There is a gap in the end of the group. */
0d0293ac 2243 if (groupsize - last_accessed_element > 0 && loop_vinfo)
48df3fa6 2244 {
73fbfcad 2245 if (dump_enabled_p ())
78c60e3d 2246 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942
TJ
2247 "Data access with gaps requires scalar "
2248 "epilogue loop\n");
deaf836c
IR
2249 if (loop->inner)
2250 {
73fbfcad 2251 if (dump_enabled_p ())
e645e942
TJ
2252 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2253 "Peeling for outer loop is not supported\n");
deaf836c
IR
2254 return false;
2255 }
2256
2257 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = true;
48df3fa6 2258 }
ebfd146a
IR
2259 }
2260
2261 return true;
2262}
2263
2264
2265/* Analyze the access pattern of the data-reference DR.
2266 In case of non-consecutive accesses call vect_analyze_group_access() to
0d0293ac 2267 analyze groups of accesses. */
ebfd146a
IR
2268
2269static bool
2270vect_analyze_data_ref_access (struct data_reference *dr)
2271{
2272 tree step = DR_STEP (dr);
2273 tree scalar_type = TREE_TYPE (DR_REF (dr));
2274 gimple stmt = DR_STMT (dr);
2275 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2276 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342 2277 struct loop *loop = NULL;
ebfd146a 2278
a70d6342
IR
2279 if (loop_vinfo)
2280 loop = LOOP_VINFO_LOOP (loop_vinfo);
b8698a0f 2281
a70d6342 2282 if (loop_vinfo && !step)
ebfd146a 2283 {
73fbfcad 2284 if (dump_enabled_p ())
e645e942
TJ
2285 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2286 "bad data-ref access in loop\n");
ebfd146a
IR
2287 return false;
2288 }
2289
6e8dad05 2290 /* Allow invariant loads in not nested loops. */
319e6439 2291 if (loop_vinfo && integer_zerop (step))
39becbac
RG
2292 {
2293 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
6e8dad05
RB
2294 if (nested_in_vect_loop_p (loop, stmt))
2295 {
2296 if (dump_enabled_p ())
2297 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2298 "zero step in inner loop of nest\n");
6e8dad05
RB
2299 return false;
2300 }
39becbac
RG
2301 return DR_IS_READ (dr);
2302 }
ebfd146a 2303
a70d6342 2304 if (loop && nested_in_vect_loop_p (loop, stmt))
ebfd146a
IR
2305 {
2306 /* Interleaved accesses are not yet supported within outer-loop
2307 vectorization for references in the inner-loop. */
e14c1050 2308 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
ebfd146a
IR
2309
2310 /* For the rest of the analysis we use the outer-loop step. */
2311 step = STMT_VINFO_DR_STEP (stmt_info);
319e6439 2312 if (integer_zerop (step))
ebfd146a 2313 {
73fbfcad 2314 if (dump_enabled_p ())
78c60e3d 2315 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2316 "zero step in outer loop.\n");
ebfd146a 2317 if (DR_IS_READ (dr))
b8698a0f 2318 return true;
ebfd146a
IR
2319 else
2320 return false;
2321 }
2322 }
2323
2324 /* Consecutive? */
319e6439 2325 if (TREE_CODE (step) == INTEGER_CST)
ebfd146a 2326 {
319e6439
RG
2327 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
2328 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type))
2329 || (dr_step < 0
2330 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type), -dr_step)))
2331 {
2332 /* Mark that it is not interleaving. */
2333 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = NULL;
2334 return true;
2335 }
ebfd146a
IR
2336 }
2337
a70d6342 2338 if (loop && nested_in_vect_loop_p (loop, stmt))
ebfd146a 2339 {
73fbfcad 2340 if (dump_enabled_p ())
78c60e3d 2341 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2342 "grouped access in outer loop.\n");
ebfd146a
IR
2343 return false;
2344 }
2345
319e6439
RG
2346 /* Assume this is a DR handled by non-constant strided load case. */
2347 if (TREE_CODE (step) != INTEGER_CST)
2348 return STMT_VINFO_STRIDE_LOAD_P (stmt_info);
2349
ebfd146a
IR
2350 /* Not consecutive access - check if it's a part of interleaving group. */
2351 return vect_analyze_group_access (dr);
2352}
2353
839c74bc
CH
2354
2355
2356/* A helper function used in the comparator function to sort data
2357 references. T1 and T2 are two data references to be compared.
2358 The function returns -1, 0, or 1. */
2359
2360static int
2361compare_tree (tree t1, tree t2)
2362{
2363 int i, cmp;
2364 enum tree_code code;
2365 char tclass;
2366
2367 if (t1 == t2)
2368 return 0;
2369 if (t1 == NULL)
2370 return -1;
2371 if (t2 == NULL)
2372 return 1;
2373
2374
2375 if (TREE_CODE (t1) != TREE_CODE (t2))
2376 return TREE_CODE (t1) < TREE_CODE (t2) ? -1 : 1;
2377
2378 code = TREE_CODE (t1);
2379 switch (code)
2380 {
2381 /* For const values, we can just use hash values for comparisons. */
2382 case INTEGER_CST:
2383 case REAL_CST:
2384 case FIXED_CST:
2385 case STRING_CST:
2386 case COMPLEX_CST:
2387 case VECTOR_CST:
2388 {
2389 hashval_t h1 = iterative_hash_expr (t1, 0);
2390 hashval_t h2 = iterative_hash_expr (t2, 0);
2391 if (h1 != h2)
2392 return h1 < h2 ? -1 : 1;
2393 break;
2394 }
2395
2396 case SSA_NAME:
2397 cmp = compare_tree (SSA_NAME_VAR (t1), SSA_NAME_VAR (t2));
2398 if (cmp != 0)
2399 return cmp;
2400
2401 if (SSA_NAME_VERSION (t1) != SSA_NAME_VERSION (t2))
2402 return SSA_NAME_VERSION (t1) < SSA_NAME_VERSION (t2) ? -1 : 1;
2403 break;
2404
2405 default:
2406 tclass = TREE_CODE_CLASS (code);
2407
2408 /* For var-decl, we could compare their UIDs. */
2409 if (tclass == tcc_declaration)
2410 {
2411 if (DECL_UID (t1) != DECL_UID (t2))
2412 return DECL_UID (t1) < DECL_UID (t2) ? -1 : 1;
2413 break;
2414 }
2415
2416 /* For expressions with operands, compare their operands recursively. */
2417 for (i = TREE_OPERAND_LENGTH (t1) - 1; i >= 0; --i)
2418 {
2419 cmp = compare_tree (TREE_OPERAND (t1, i), TREE_OPERAND (t2, i));
2420 if (cmp != 0)
2421 return cmp;
2422 }
2423 }
2424
2425 return 0;
2426}
2427
2428
5abe1e05
RB
2429/* Compare two data-references DRA and DRB to group them into chunks
2430 suitable for grouping. */
2431
2432static int
2433dr_group_sort_cmp (const void *dra_, const void *drb_)
2434{
2435 data_reference_p dra = *(data_reference_p *)const_cast<void *>(dra_);
2436 data_reference_p drb = *(data_reference_p *)const_cast<void *>(drb_);
5abe1e05
RB
2437 int cmp;
2438
2439 /* Stabilize sort. */
2440 if (dra == drb)
2441 return 0;
2442
2443 /* Ordering of DRs according to base. */
2444 if (!operand_equal_p (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb), 0))
2445 {
839c74bc
CH
2446 cmp = compare_tree (DR_BASE_ADDRESS (dra), DR_BASE_ADDRESS (drb));
2447 if (cmp != 0)
2448 return cmp;
5abe1e05
RB
2449 }
2450
2451 /* And according to DR_OFFSET. */
2452 if (!dr_equal_offsets_p (dra, drb))
2453 {
839c74bc
CH
2454 cmp = compare_tree (DR_OFFSET (dra), DR_OFFSET (drb));
2455 if (cmp != 0)
2456 return cmp;
5abe1e05
RB
2457 }
2458
2459 /* Put reads before writes. */
2460 if (DR_IS_READ (dra) != DR_IS_READ (drb))
2461 return DR_IS_READ (dra) ? -1 : 1;
2462
2463 /* Then sort after access size. */
2464 if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2465 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))), 0))
2466 {
839c74bc
CH
2467 cmp = compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))),
2468 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
2469 if (cmp != 0)
2470 return cmp;
5abe1e05
RB
2471 }
2472
2473 /* And after step. */
2474 if (!operand_equal_p (DR_STEP (dra), DR_STEP (drb), 0))
2475 {
839c74bc
CH
2476 cmp = compare_tree (DR_STEP (dra), DR_STEP (drb));
2477 if (cmp != 0)
2478 return cmp;
5abe1e05
RB
2479 }
2480
2481 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2482 cmp = tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb));
2483 if (cmp == 0)
2484 return gimple_uid (DR_STMT (dra)) < gimple_uid (DR_STMT (drb)) ? -1 : 1;
2485 return cmp;
2486}
ebfd146a
IR
2487
2488/* Function vect_analyze_data_ref_accesses.
2489
2490 Analyze the access pattern of all the data references in the loop.
2491
2492 FORNOW: the only access pattern that is considered vectorizable is a
2493 simple step 1 (consecutive) access.
2494
2495 FORNOW: handle only arrays and pointer accesses. */
2496
2497bool
a70d6342 2498vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a
IR
2499{
2500 unsigned int i;
9771b263 2501 vec<data_reference_p> datarefs;
ebfd146a
IR
2502 struct data_reference *dr;
2503
73fbfcad 2504 if (dump_enabled_p ())
78c60e3d 2505 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2506 "=== vect_analyze_data_ref_accesses ===\n");
ebfd146a 2507
a70d6342
IR
2508 if (loop_vinfo)
2509 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2510 else
2511 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2512
5abe1e05
RB
2513 if (datarefs.is_empty ())
2514 return true;
2515
2516 /* Sort the array of datarefs to make building the interleaving chains
2517 linear. */
c3284718 2518 qsort (datarefs.address (), datarefs.length (),
5abe1e05
RB
2519 sizeof (data_reference_p), dr_group_sort_cmp);
2520
2521 /* Build the interleaving chains. */
2522 for (i = 0; i < datarefs.length () - 1;)
2523 {
2524 data_reference_p dra = datarefs[i];
2525 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
2526 stmt_vec_info lastinfo = NULL;
2527 for (i = i + 1; i < datarefs.length (); ++i)
2528 {
2529 data_reference_p drb = datarefs[i];
2530 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
2531
2532 /* ??? Imperfect sorting (non-compatible types, non-modulo
2533 accesses, same accesses) can lead to a group to be artificially
2534 split here as we don't just skip over those. If it really
2535 matters we can push those to a worklist and re-iterate
2536 over them. The we can just skip ahead to the next DR here. */
2537
2538 /* Check that the data-refs have same first location (except init)
2539 and they are both either store or load (not load and store). */
2540 if (DR_IS_READ (dra) != DR_IS_READ (drb)
2541 || !operand_equal_p (DR_BASE_ADDRESS (dra),
2542 DR_BASE_ADDRESS (drb), 0)
2543 || !dr_equal_offsets_p (dra, drb))
2544 break;
2545
2546 /* Check that the data-refs have the same constant size and step. */
2547 tree sza = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra)));
2548 tree szb = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb)));
cc269bb6
RS
2549 if (!tree_fits_uhwi_p (sza)
2550 || !tree_fits_uhwi_p (szb)
5abe1e05 2551 || !tree_int_cst_equal (sza, szb)
9541ffee
RS
2552 || !tree_fits_shwi_p (DR_STEP (dra))
2553 || !tree_fits_shwi_p (DR_STEP (drb))
5abe1e05
RB
2554 || !tree_int_cst_equal (DR_STEP (dra), DR_STEP (drb)))
2555 break;
2556
2557 /* Do not place the same access in the interleaving chain twice. */
2558 if (tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb)) == 0)
2559 break;
2560
2561 /* Check the types are compatible.
2562 ??? We don't distinguish this during sorting. */
2563 if (!types_compatible_p (TREE_TYPE (DR_REF (dra)),
2564 TREE_TYPE (DR_REF (drb))))
2565 break;
2566
2567 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
2568 HOST_WIDE_INT init_a = TREE_INT_CST_LOW (DR_INIT (dra));
2569 HOST_WIDE_INT init_b = TREE_INT_CST_LOW (DR_INIT (drb));
2570 gcc_assert (init_a < init_b);
2571
2572 /* If init_b == init_a + the size of the type * k, we have an
2573 interleaving, and DRA is accessed before DRB. */
2574 HOST_WIDE_INT type_size_a = TREE_INT_CST_LOW (sza);
2575 if ((init_b - init_a) % type_size_a != 0)
2576 break;
2577
2578 /* The step (if not zero) is greater than the difference between
2579 data-refs' inits. This splits groups into suitable sizes. */
2580 HOST_WIDE_INT step = TREE_INT_CST_LOW (DR_STEP (dra));
2581 if (step != 0 && step <= (init_b - init_a))
2582 break;
2583
2584 if (dump_enabled_p ())
2585 {
2586 dump_printf_loc (MSG_NOTE, vect_location,
2587 "Detected interleaving ");
2588 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (dra));
2589 dump_printf (MSG_NOTE, " and ");
2590 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_REF (drb));
e645e942 2591 dump_printf (MSG_NOTE, "\n");
5abe1e05
RB
2592 }
2593
2594 /* Link the found element into the group list. */
2595 if (!GROUP_FIRST_ELEMENT (stmtinfo_a))
2596 {
2597 GROUP_FIRST_ELEMENT (stmtinfo_a) = DR_STMT (dra);
2598 lastinfo = stmtinfo_a;
2599 }
2600 GROUP_FIRST_ELEMENT (stmtinfo_b) = DR_STMT (dra);
2601 GROUP_NEXT_ELEMENT (lastinfo) = DR_STMT (drb);
2602 lastinfo = stmtinfo_b;
2603 }
2604 }
2605
9771b263 2606 FOR_EACH_VEC_ELT (datarefs, i, dr)
4b5caab7
IR
2607 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
2608 && !vect_analyze_data_ref_access (dr))
ebfd146a 2609 {
73fbfcad 2610 if (dump_enabled_p ())
e645e942
TJ
2611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2612 "not vectorized: complicated access pattern.\n");
4b5caab7
IR
2613
2614 if (bb_vinfo)
2615 {
2616 /* Mark the statement as not vectorizable. */
2617 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
2618 continue;
2619 }
2620 else
2621 return false;
ebfd146a
IR
2622 }
2623
2624 return true;
2625}
2626
a05a89fa 2627
93bdc3ed 2628/* Operator == between two dr_with_seg_len objects.
a05a89fa
CH
2629
2630 This equality operator is used to make sure two data refs
2631 are the same one so that we will consider to combine the
2632 aliasing checks of those two pairs of data dependent data
2633 refs. */
2634
2635static bool
93bdc3ed
CH
2636operator == (const dr_with_seg_len& d1,
2637 const dr_with_seg_len& d2)
a05a89fa 2638{
93bdc3ed
CH
2639 return operand_equal_p (DR_BASE_ADDRESS (d1.dr),
2640 DR_BASE_ADDRESS (d2.dr), 0)
2641 && compare_tree (d1.offset, d2.offset) == 0
2642 && compare_tree (d1.seg_len, d2.seg_len) == 0;
a05a89fa
CH
2643}
2644
93bdc3ed 2645/* Function comp_dr_with_seg_len_pair.
a05a89fa 2646
93bdc3ed 2647 Comparison function for sorting objects of dr_with_seg_len_pair_t
a05a89fa
CH
2648 so that we can combine aliasing checks in one scan. */
2649
2650static int
93bdc3ed 2651comp_dr_with_seg_len_pair (const void *p1_, const void *p2_)
a05a89fa 2652{
93bdc3ed
CH
2653 const dr_with_seg_len_pair_t* p1 = (const dr_with_seg_len_pair_t *) p1_;
2654 const dr_with_seg_len_pair_t* p2 = (const dr_with_seg_len_pair_t *) p2_;
2655
2656 const dr_with_seg_len &p11 = p1->first,
2657 &p12 = p1->second,
2658 &p21 = p2->first,
2659 &p22 = p2->second;
2660
2661 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
2662 if a and c have the same basic address snd step, and b and d have the same
2663 address and step. Therefore, if any a&c or b&d don't have the same address
2664 and step, we don't care the order of those two pairs after sorting. */
2665 int comp_res;
2666
2667 if ((comp_res = compare_tree (DR_BASE_ADDRESS (p11.dr),
2668 DR_BASE_ADDRESS (p21.dr))) != 0)
a05a89fa 2669 return comp_res;
93bdc3ed
CH
2670 if ((comp_res = compare_tree (DR_BASE_ADDRESS (p12.dr),
2671 DR_BASE_ADDRESS (p22.dr))) != 0)
2672 return comp_res;
2673 if ((comp_res = compare_tree (DR_STEP (p11.dr), DR_STEP (p21.dr))) != 0)
2674 return comp_res;
2675 if ((comp_res = compare_tree (DR_STEP (p12.dr), DR_STEP (p22.dr))) != 0)
2676 return comp_res;
2677 if ((comp_res = compare_tree (p11.offset, p21.offset)) != 0)
2678 return comp_res;
2679 if ((comp_res = compare_tree (p12.offset, p22.offset)) != 0)
a05a89fa 2680 return comp_res;
a05a89fa
CH
2681
2682 return 0;
2683}
2684
2685template <class T> static void
2686swap (T& a, T& b)
2687{
2688 T c (a);
2689 a = b;
2690 b = c;
2691}
2692
2693/* Function vect_vfa_segment_size.
2694
2695 Create an expression that computes the size of segment
2696 that will be accessed for a data reference. The functions takes into
2697 account that realignment loads may access one more vector.
2698
2699 Input:
2700 DR: The data reference.
2701 LENGTH_FACTOR: segment length to consider.
2702
2703 Return an expression whose value is the size of segment which will be
2704 accessed by DR. */
2705
2706static tree
2707vect_vfa_segment_size (struct data_reference *dr, tree length_factor)
2708{
2709 tree segment_length;
2710
2711 if (integer_zerop (DR_STEP (dr)))
2712 segment_length = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2713 else
2714 segment_length = size_binop (MULT_EXPR,
93bdc3ed
CH
2715 fold_convert (sizetype, DR_STEP (dr)),
2716 fold_convert (sizetype, length_factor));
a05a89fa
CH
2717
2718 if (vect_supportable_dr_alignment (dr, false)
93bdc3ed 2719 == dr_explicit_realign_optimized)
a05a89fa
CH
2720 {
2721 tree vector_size = TYPE_SIZE_UNIT
2722 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))));
2723
2724 segment_length = size_binop (PLUS_EXPR, segment_length, vector_size);
2725 }
2726 return segment_length;
2727}
2728
ebfd146a
IR
2729/* Function vect_prune_runtime_alias_test_list.
2730
2731 Prune a list of ddrs to be tested at run-time by versioning for alias.
a05a89fa 2732 Merge several alias checks into one if possible.
ebfd146a
IR
2733 Return FALSE if resulting list of ddrs is longer then allowed by
2734 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2735
2736bool
2737vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
2738{
a05a89fa 2739 vec<ddr_p> may_alias_ddrs =
ebfd146a 2740 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
93bdc3ed 2741 vec<dr_with_seg_len_pair_t>& comp_alias_ddrs =
a05a89fa
CH
2742 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
2743 int vect_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2744 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo);
2745
2746 ddr_p ddr;
2747 unsigned int i;
2748 tree length_factor;
ebfd146a 2749
73fbfcad 2750 if (dump_enabled_p ())
78c60e3d 2751 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2752 "=== vect_prune_runtime_alias_test_list ===\n");
ebfd146a 2753
a05a89fa
CH
2754 if (may_alias_ddrs.is_empty ())
2755 return true;
2756
2757 /* Basically, for each pair of dependent data refs store_ptr_0
2758 and load_ptr_0, we create an expression:
2759
2760 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2761 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2762
2763 for aliasing checks. However, in some cases we can decrease
2764 the number of checks by combining two checks into one. For
2765 example, suppose we have another pair of data refs store_ptr_0
2766 and load_ptr_1, and if the following condition is satisfied:
2767
2768 load_ptr_0 < load_ptr_1 &&
2769 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
2770
2771 (this condition means, in each iteration of vectorized loop,
2772 the accessed memory of store_ptr_0 cannot be between the memory
2773 of load_ptr_0 and load_ptr_1.)
2774
2775 we then can use only the following expression to finish the
2776 alising checks between store_ptr_0 & load_ptr_0 and
2777 store_ptr_0 & load_ptr_1:
2778
2779 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2780 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
2781
2782 Note that we only consider that load_ptr_0 and load_ptr_1 have the
2783 same basic address. */
2784
2785 comp_alias_ddrs.create (may_alias_ddrs.length ());
2786
2787 /* First, we collect all data ref pairs for aliasing checks. */
2788 FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
ebfd146a 2789 {
a05a89fa
CH
2790 struct data_reference *dr_a, *dr_b;
2791 gimple dr_group_first_a, dr_group_first_b;
2792 tree segment_length_a, segment_length_b;
2793 gimple stmt_a, stmt_b;
2794
2795 dr_a = DDR_A (ddr);
2796 stmt_a = DR_STMT (DDR_A (ddr));
2797 dr_group_first_a = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a));
2798 if (dr_group_first_a)
2799 {
2800 stmt_a = dr_group_first_a;
2801 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a));
2802 }
ebfd146a 2803
a05a89fa
CH
2804 dr_b = DDR_B (ddr);
2805 stmt_b = DR_STMT (DDR_B (ddr));
2806 dr_group_first_b = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b));
2807 if (dr_group_first_b)
2808 {
2809 stmt_b = dr_group_first_b;
2810 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b));
2811 }
ebfd146a 2812
a05a89fa
CH
2813 if (!operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0))
2814 length_factor = scalar_loop_iters;
2815 else
2816 length_factor = size_int (vect_factor);
2817 segment_length_a = vect_vfa_segment_size (dr_a, length_factor);
2818 segment_length_b = vect_vfa_segment_size (dr_b, length_factor);
2819
93bdc3ed
CH
2820 dr_with_seg_len_pair_t dr_with_seg_len_pair
2821 (dr_with_seg_len (dr_a, segment_length_a),
2822 dr_with_seg_len (dr_b, segment_length_b));
2823
2824 if (compare_tree (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b)) > 0)
a05a89fa
CH
2825 swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
2826
2827 comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
2828 }
2829
2830 /* Second, we sort the collected data ref pairs so that we can scan
2831 them once to combine all possible aliasing checks. */
93bdc3ed 2832 comp_alias_ddrs.qsort (comp_dr_with_seg_len_pair);
ebfd146a 2833
a05a89fa
CH
2834 /* Third, we scan the sorted dr pairs and check if we can combine
2835 alias checks of two neighbouring dr pairs. */
2836 for (size_t i = 1; i < comp_alias_ddrs.length (); ++i)
2837 {
2838 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
93bdc3ed
CH
2839 dr_with_seg_len *dr_a1 = &comp_alias_ddrs[i-1].first,
2840 *dr_b1 = &comp_alias_ddrs[i-1].second,
2841 *dr_a2 = &comp_alias_ddrs[i].first,
2842 *dr_b2 = &comp_alias_ddrs[i].second;
a05a89fa
CH
2843
2844 /* Remove duplicate data ref pairs. */
2845 if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
2846 {
2847 if (dump_enabled_p ())
ebfd146a 2848 {
a05a89fa
CH
2849 dump_printf_loc (MSG_NOTE, vect_location,
2850 "found equal ranges ");
2851 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2852 DR_REF (dr_a1->dr));
2853 dump_printf (MSG_NOTE, ", ");
2854 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2855 DR_REF (dr_b1->dr));
2856 dump_printf (MSG_NOTE, " and ");
2857 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2858 DR_REF (dr_a2->dr));
2859 dump_printf (MSG_NOTE, ", ");
2860 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2861 DR_REF (dr_b2->dr));
2862 dump_printf (MSG_NOTE, "\n");
ebfd146a 2863 }
a05a89fa
CH
2864
2865 comp_alias_ddrs.ordered_remove (i--);
2866 continue;
ebfd146a 2867 }
b8698a0f 2868
a05a89fa
CH
2869 if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
2870 {
2871 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
2872 and DR_A1 and DR_A2 are two consecutive memrefs. */
2873 if (*dr_a1 == *dr_a2)
2874 {
2875 swap (dr_a1, dr_b1);
2876 swap (dr_a2, dr_b2);
2877 }
2878
93bdc3ed
CH
2879 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1->dr),
2880 DR_BASE_ADDRESS (dr_a2->dr),
2881 0)
9541ffee
RS
2882 || !tree_fits_shwi_p (dr_a1->offset)
2883 || !tree_fits_shwi_p (dr_a2->offset))
a05a89fa
CH
2884 continue;
2885
2886 HOST_WIDE_INT diff = TREE_INT_CST_LOW (dr_a2->offset) -
2887 TREE_INT_CST_LOW (dr_a1->offset);
2888
2889
2890 /* Now we check if the following condition is satisfied:
2891
2892 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
2893
2894 where DIFF = DR_A2->OFFSET - DR_A1->OFFSET. However,
2895 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
2896 have to make a best estimation. We can get the minimum value
2897 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
2898 then either of the following two conditions can guarantee the
2899 one above:
2900
2901 1: DIFF <= MIN_SEG_LEN_B
2902 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
2903
2904 */
2905
2906 HOST_WIDE_INT
2907 min_seg_len_b = (TREE_CODE (dr_b1->seg_len) == INTEGER_CST) ?
93bdc3ed
CH
2908 TREE_INT_CST_LOW (dr_b1->seg_len) :
2909 vect_factor;
a05a89fa
CH
2910
2911 if (diff <= min_seg_len_b
2912 || (TREE_CODE (dr_a1->seg_len) == INTEGER_CST
2913 && diff - (HOST_WIDE_INT) TREE_INT_CST_LOW (dr_a1->seg_len) <
2914 min_seg_len_b))
2915 {
2916 dr_a1->seg_len = size_binop (PLUS_EXPR,
2917 dr_a2->seg_len, size_int (diff));
2918 comp_alias_ddrs.ordered_remove (i--);
2919 }
2920 }
ebfd146a
IR
2921 }
2922
a05a89fa
CH
2923 if ((int) comp_alias_ddrs.length () >
2924 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
ebfd146a 2925 {
73fbfcad 2926 if (dump_enabled_p ())
ebfd146a 2927 {
e645e942
TJ
2928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2929 "disable versioning for alias - max number of "
2930 "generated checks exceeded.\n");
ebfd146a
IR
2931 }
2932
ebfd146a
IR
2933 return false;
2934 }
2935
2936 return true;
2937}
2938
aec7ae7d
JJ
2939/* Check whether a non-affine read in stmt is suitable for gather load
2940 and if so, return a builtin decl for that operation. */
2941
2942tree
2943vect_check_gather (gimple stmt, loop_vec_info loop_vinfo, tree *basep,
2944 tree *offp, int *scalep)
2945{
2946 HOST_WIDE_INT scale = 1, pbitpos, pbitsize;
2947 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2948 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2949 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2950 tree offtype = NULL_TREE;
2951 tree decl, base, off;
2952 enum machine_mode pmode;
2953 int punsignedp, pvolatilep;
2954
2955 /* The gather builtins need address of the form
2956 loop_invariant + vector * {1, 2, 4, 8}
2957 or
2958 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2959 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2960 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2961 multiplications and additions in it. To get a vector, we need
2962 a single SSA_NAME that will be defined in the loop and will
2963 contain everything that is not loop invariant and that can be
2964 vectorized. The following code attempts to find such a preexistng
2965 SSA_NAME OFF and put the loop invariants into a tree BASE
2966 that can be gimplified before the loop. */
2967 base = get_inner_reference (DR_REF (dr), &pbitsize, &pbitpos, &off,
2968 &pmode, &punsignedp, &pvolatilep, false);
2969 gcc_assert (base != NULL_TREE && (pbitpos % BITS_PER_UNIT) == 0);
2970
2971 if (TREE_CODE (base) == MEM_REF)
2972 {
2973 if (!integer_zerop (TREE_OPERAND (base, 1)))
2974 {
2975 if (off == NULL_TREE)
2976 {
2977 double_int moff = mem_ref_offset (base);
2978 off = double_int_to_tree (sizetype, moff);
2979 }
2980 else
2981 off = size_binop (PLUS_EXPR, off,
2982 fold_convert (sizetype, TREE_OPERAND (base, 1)));
2983 }
2984 base = TREE_OPERAND (base, 0);
2985 }
2986 else
2987 base = build_fold_addr_expr (base);
2988
2989 if (off == NULL_TREE)
2990 off = size_zero_node;
2991
2992 /* If base is not loop invariant, either off is 0, then we start with just
2993 the constant offset in the loop invariant BASE and continue with base
2994 as OFF, otherwise give up.
2995 We could handle that case by gimplifying the addition of base + off
2996 into some SSA_NAME and use that as off, but for now punt. */
2997 if (!expr_invariant_in_loop_p (loop, base))
2998 {
2999 if (!integer_zerop (off))
3000 return NULL_TREE;
3001 off = base;
3002 base = size_int (pbitpos / BITS_PER_UNIT);
3003 }
3004 /* Otherwise put base + constant offset into the loop invariant BASE
3005 and continue with OFF. */
3006 else
3007 {
3008 base = fold_convert (sizetype, base);
3009 base = size_binop (PLUS_EXPR, base, size_int (pbitpos / BITS_PER_UNIT));
3010 }
3011
3012 /* OFF at this point may be either a SSA_NAME or some tree expression
3013 from get_inner_reference. Try to peel off loop invariants from it
3014 into BASE as long as possible. */
3015 STRIP_NOPS (off);
3016 while (offtype == NULL_TREE)
3017 {
3018 enum tree_code code;
3019 tree op0, op1, add = NULL_TREE;
3020
3021 if (TREE_CODE (off) == SSA_NAME)
3022 {
3023 gimple def_stmt = SSA_NAME_DEF_STMT (off);
3024
3025 if (expr_invariant_in_loop_p (loop, off))
3026 return NULL_TREE;
3027
3028 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
3029 break;
3030
3031 op0 = gimple_assign_rhs1 (def_stmt);
3032 code = gimple_assign_rhs_code (def_stmt);
3033 op1 = gimple_assign_rhs2 (def_stmt);
3034 }
3035 else
3036 {
3037 if (get_gimple_rhs_class (TREE_CODE (off)) == GIMPLE_TERNARY_RHS)
3038 return NULL_TREE;
3039 code = TREE_CODE (off);
3040 extract_ops_from_tree (off, &code, &op0, &op1);
3041 }
3042 switch (code)
3043 {
3044 case POINTER_PLUS_EXPR:
3045 case PLUS_EXPR:
3046 if (expr_invariant_in_loop_p (loop, op0))
3047 {
3048 add = op0;
3049 off = op1;
3050 do_add:
3051 add = fold_convert (sizetype, add);
3052 if (scale != 1)
3053 add = size_binop (MULT_EXPR, add, size_int (scale));
3054 base = size_binop (PLUS_EXPR, base, add);
3055 continue;
3056 }
3057 if (expr_invariant_in_loop_p (loop, op1))
3058 {
3059 add = op1;
3060 off = op0;
3061 goto do_add;
3062 }
3063 break;
3064 case MINUS_EXPR:
3065 if (expr_invariant_in_loop_p (loop, op1))
3066 {
3067 add = fold_convert (sizetype, op1);
3068 add = size_binop (MINUS_EXPR, size_zero_node, add);
3069 off = op0;
3070 goto do_add;
3071 }
3072 break;
3073 case MULT_EXPR:
9541ffee 3074 if (scale == 1 && tree_fits_shwi_p (op1))
aec7ae7d 3075 {
9439e9a1 3076 scale = tree_to_shwi (op1);
aec7ae7d
JJ
3077 off = op0;
3078 continue;
3079 }
3080 break;
3081 case SSA_NAME:
3082 off = op0;
3083 continue;
3084 CASE_CONVERT:
3085 if (!POINTER_TYPE_P (TREE_TYPE (op0))
3086 && !INTEGRAL_TYPE_P (TREE_TYPE (op0)))
3087 break;
3088 if (TYPE_PRECISION (TREE_TYPE (op0))
3089 == TYPE_PRECISION (TREE_TYPE (off)))
3090 {
3091 off = op0;
3092 continue;
3093 }
3094 if (TYPE_PRECISION (TREE_TYPE (op0))
3095 < TYPE_PRECISION (TREE_TYPE (off)))
3096 {
3097 off = op0;
3098 offtype = TREE_TYPE (off);
3099 STRIP_NOPS (off);
3100 continue;
3101 }
3102 break;
3103 default:
3104 break;
3105 }
3106 break;
3107 }
3108
3109 /* If at the end OFF still isn't a SSA_NAME or isn't
3110 defined in the loop, punt. */
3111 if (TREE_CODE (off) != SSA_NAME
3112 || expr_invariant_in_loop_p (loop, off))
3113 return NULL_TREE;
3114
3115 if (offtype == NULL_TREE)
3116 offtype = TREE_TYPE (off);
3117
3118 decl = targetm.vectorize.builtin_gather (STMT_VINFO_VECTYPE (stmt_info),
3119 offtype, scale);
3120 if (decl == NULL_TREE)
3121 return NULL_TREE;
3122
3123 if (basep)
3124 *basep = base;
3125 if (offp)
3126 *offp = off;
3127 if (scalep)
3128 *scalep = scale;
3129 return decl;
3130}
3131
ebfd146a
IR
3132/* Function vect_analyze_data_refs.
3133
a70d6342 3134 Find all the data references in the loop or basic block.
ebfd146a
IR
3135
3136 The general structure of the analysis of data refs in the vectorizer is as
3137 follows:
b8698a0f 3138 1- vect_analyze_data_refs(loop/bb): call
a70d6342
IR
3139 compute_data_dependences_for_loop/bb to find and analyze all data-refs
3140 in the loop/bb and their dependences.
ebfd146a
IR
3141 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3142 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3143 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3144
3145*/
3146
3147bool
777e1f09
RG
3148vect_analyze_data_refs (loop_vec_info loop_vinfo,
3149 bb_vec_info bb_vinfo,
3150 int *min_vf)
ebfd146a 3151{
a70d6342
IR
3152 struct loop *loop = NULL;
3153 basic_block bb = NULL;
ebfd146a 3154 unsigned int i;
9771b263 3155 vec<data_reference_p> datarefs;
ebfd146a
IR
3156 struct data_reference *dr;
3157 tree scalar_type;
3158
73fbfcad 3159 if (dump_enabled_p ())
78c60e3d
SS
3160 dump_printf_loc (MSG_NOTE, vect_location,
3161 "=== vect_analyze_data_refs ===\n");
b8698a0f 3162
a70d6342
IR
3163 if (loop_vinfo)
3164 {
3165 loop = LOOP_VINFO_LOOP (loop_vinfo);
fcac74a1
RB
3166 if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo))
3167 || find_data_references_in_loop
3168 (loop, &LOOP_VINFO_DATAREFS (loop_vinfo)))
22a8be9e 3169 {
73fbfcad 3170 if (dump_enabled_p ())
e645e942
TJ
3171 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3172 "not vectorized: loop contains function calls"
3173 " or data references that cannot be analyzed\n");
22a8be9e
SP
3174 return false;
3175 }
3176
a70d6342
IR
3177 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
3178 }
3179 else
3180 {
1aedeafe
RG
3181 gimple_stmt_iterator gsi;
3182
a70d6342 3183 bb = BB_VINFO_BB (bb_vinfo);
1aedeafe
RG
3184 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3185 {
3186 gimple stmt = gsi_stmt (gsi);
3187 if (!find_data_references_in_stmt (NULL, stmt,
3188 &BB_VINFO_DATAREFS (bb_vinfo)))
3189 {
3190 /* Mark the rest of the basic-block as unvectorizable. */
3191 for (; !gsi_end_p (gsi); gsi_next (&gsi))
d4d5e146
RG
3192 {
3193 stmt = gsi_stmt (gsi);
3194 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)) = false;
3195 }
1aedeafe
RG
3196 break;
3197 }
3198 }
22a8be9e 3199
a70d6342
IR
3200 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
3201 }
ebfd146a 3202
ff802fa1
IR
3203 /* Go through the data-refs, check that the analysis succeeded. Update
3204 pointer from stmt_vec_info struct to DR and vectype. */
ebfd146a 3205
9771b263 3206 FOR_EACH_VEC_ELT (datarefs, i, dr)
ebfd146a
IR
3207 {
3208 gimple stmt;
3209 stmt_vec_info stmt_info;
b8698a0f 3210 tree base, offset, init;
aec7ae7d 3211 bool gather = false;
74bf76ed 3212 bool simd_lane_access = false;
777e1f09 3213 int vf;
b8698a0f 3214
fbd7e877 3215again:
ebfd146a
IR
3216 if (!dr || !DR_REF (dr))
3217 {
73fbfcad 3218 if (dump_enabled_p ())
78c60e3d 3219 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3220 "not vectorized: unhandled data-ref\n");
ebfd146a
IR
3221 return false;
3222 }
3223
3224 stmt = DR_STMT (dr);
3225 stmt_info = vinfo_for_stmt (stmt);
3226
fbd7e877
RB
3227 /* Discard clobbers from the dataref vector. We will remove
3228 clobber stmts during vectorization. */
3229 if (gimple_clobber_p (stmt))
3230 {
3231 if (i == datarefs.length () - 1)
3232 {
3233 datarefs.pop ();
3234 break;
3235 }
3236 datarefs[i] = datarefs.pop ();
3237 goto again;
3238 }
3239
ebfd146a
IR
3240 /* Check that analysis of the data-ref succeeded. */
3241 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
aec7ae7d 3242 || !DR_STEP (dr))
ebfd146a 3243 {
74bf76ed
JJ
3244 bool maybe_gather
3245 = DR_IS_READ (dr)
aec7ae7d 3246 && !TREE_THIS_VOLATILE (DR_REF (dr))
74bf76ed
JJ
3247 && targetm.vectorize.builtin_gather != NULL;
3248 bool maybe_simd_lane_access
3249 = loop_vinfo && loop->simduid;
3250
3251 /* If target supports vector gather loads, or if this might be
3252 a SIMD lane access, see if they can't be used. */
3253 if (loop_vinfo
3254 && (maybe_gather || maybe_simd_lane_access)
aec7ae7d
JJ
3255 && !nested_in_vect_loop_p (loop, stmt))
3256 {
3257 struct data_reference *newdr
3258 = create_data_ref (NULL, loop_containing_stmt (stmt),
3259 DR_REF (dr), stmt, true);
3260 gcc_assert (newdr != NULL && DR_REF (newdr));
3261 if (DR_BASE_ADDRESS (newdr)
3262 && DR_OFFSET (newdr)
3263 && DR_INIT (newdr)
3264 && DR_STEP (newdr)
3265 && integer_zerop (DR_STEP (newdr)))
3266 {
74bf76ed
JJ
3267 if (maybe_simd_lane_access)
3268 {
3269 tree off = DR_OFFSET (newdr);
3270 STRIP_NOPS (off);
3271 if (TREE_CODE (DR_INIT (newdr)) == INTEGER_CST
3272 && TREE_CODE (off) == MULT_EXPR
cc269bb6 3273 && tree_fits_uhwi_p (TREE_OPERAND (off, 1)))
74bf76ed
JJ
3274 {
3275 tree step = TREE_OPERAND (off, 1);
3276 off = TREE_OPERAND (off, 0);
3277 STRIP_NOPS (off);
3278 if (CONVERT_EXPR_P (off)
3279 && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off,
3280 0)))
3281 < TYPE_PRECISION (TREE_TYPE (off)))
3282 off = TREE_OPERAND (off, 0);
3283 if (TREE_CODE (off) == SSA_NAME)
3284 {
3285 gimple def = SSA_NAME_DEF_STMT (off);
3286 tree reft = TREE_TYPE (DR_REF (newdr));
3287 if (gimple_call_internal_p (def)
3288 && gimple_call_internal_fn (def)
3289 == IFN_GOMP_SIMD_LANE)
3290 {
3291 tree arg = gimple_call_arg (def, 0);
3292 gcc_assert (TREE_CODE (arg) == SSA_NAME);
3293 arg = SSA_NAME_VAR (arg);
3294 if (arg == loop->simduid
3295 /* For now. */
3296 && tree_int_cst_equal
3297 (TYPE_SIZE_UNIT (reft),
3298 step))
3299 {
3300 DR_OFFSET (newdr) = ssize_int (0);
3301 DR_STEP (newdr) = step;
995a1b4a
JJ
3302 DR_ALIGNED_TO (newdr)
3303 = size_int (BIGGEST_ALIGNMENT);
74bf76ed
JJ
3304 dr = newdr;
3305 simd_lane_access = true;
3306 }
3307 }
3308 }
3309 }
3310 }
3311 if (!simd_lane_access && maybe_gather)
3312 {
3313 dr = newdr;
3314 gather = true;
3315 }
aec7ae7d 3316 }
74bf76ed 3317 if (!gather && !simd_lane_access)
aec7ae7d
JJ
3318 free_data_ref (newdr);
3319 }
4b5caab7 3320
74bf76ed 3321 if (!gather && !simd_lane_access)
aec7ae7d 3322 {
73fbfcad 3323 if (dump_enabled_p ())
aec7ae7d 3324 {
e645e942 3325 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
3326 "not vectorized: data ref analysis "
3327 "failed ");
3328 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3329 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
aec7ae7d 3330 }
ba65ae42 3331
aec7ae7d 3332 if (bb_vinfo)
fcac74a1 3333 break;
aec7ae7d
JJ
3334
3335 return false;
3336 }
ebfd146a
IR
3337 }
3338
3339 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
3340 {
73fbfcad 3341 if (dump_enabled_p ())
78c60e3d
SS
3342 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3343 "not vectorized: base addr of dr is a "
e645e942 3344 "constant\n");
ba65ae42
IR
3345
3346 if (bb_vinfo)
fcac74a1 3347 break;
ba65ae42 3348
74bf76ed 3349 if (gather || simd_lane_access)
aec7ae7d
JJ
3350 free_data_ref (dr);
3351 return false;
ebfd146a
IR
3352 }
3353
8f7de592
IR
3354 if (TREE_THIS_VOLATILE (DR_REF (dr)))
3355 {
73fbfcad 3356 if (dump_enabled_p ())
8f7de592 3357 {
78c60e3d
SS
3358 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3359 "not vectorized: volatile type ");
3360 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3361 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
8f7de592 3362 }
ba65ae42
IR
3363
3364 if (bb_vinfo)
fcac74a1 3365 break;
ba65ae42 3366
8f7de592
IR
3367 return false;
3368 }
3369
822ba6d7 3370 if (stmt_can_throw_internal (stmt))
5a2c1986 3371 {
73fbfcad 3372 if (dump_enabled_p ())
5a2c1986 3373 {
78c60e3d
SS
3374 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3375 "not vectorized: statement can throw an "
3376 "exception ");
3377 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3378 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
5a2c1986 3379 }
ba65ae42
IR
3380
3381 if (bb_vinfo)
fcac74a1 3382 break;
ba65ae42 3383
74bf76ed 3384 if (gather || simd_lane_access)
aec7ae7d 3385 free_data_ref (dr);
5a2c1986
IR
3386 return false;
3387 }
3388
508ef0c6
RG
3389 if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
3390 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
3391 {
73fbfcad 3392 if (dump_enabled_p ())
508ef0c6 3393 {
78c60e3d
SS
3394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3395 "not vectorized: statement is bitfield "
3396 "access ");
3397 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3398 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
508ef0c6
RG
3399 }
3400
3401 if (bb_vinfo)
fcac74a1 3402 break;
508ef0c6 3403
74bf76ed 3404 if (gather || simd_lane_access)
508ef0c6
RG
3405 free_data_ref (dr);
3406 return false;
3407 }
3408
3409 base = unshare_expr (DR_BASE_ADDRESS (dr));
3410 offset = unshare_expr (DR_OFFSET (dr));
3411 init = unshare_expr (DR_INIT (dr));
3412
9c239085
JJ
3413 if (is_gimple_call (stmt))
3414 {
73fbfcad 3415 if (dump_enabled_p ())
9c239085 3416 {
78c60e3d 3417 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 3418 "not vectorized: dr in a call ");
78c60e3d 3419 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3420 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
9c239085
JJ
3421 }
3422
3423 if (bb_vinfo)
fcac74a1 3424 break;
9c239085 3425
74bf76ed 3426 if (gather || simd_lane_access)
9c239085
JJ
3427 free_data_ref (dr);
3428 return false;
3429 }
3430
ebfd146a 3431 /* Update DR field in stmt_vec_info struct. */
ebfd146a
IR
3432
3433 /* If the dataref is in an inner-loop of the loop that is considered for
3434 for vectorization, we also want to analyze the access relative to
b8698a0f 3435 the outer-loop (DR contains information only relative to the
ebfd146a
IR
3436 inner-most enclosing loop). We do that by building a reference to the
3437 first location accessed by the inner-loop, and analyze it relative to
b8698a0f
L
3438 the outer-loop. */
3439 if (loop && nested_in_vect_loop_p (loop, stmt))
ebfd146a
IR
3440 {
3441 tree outer_step, outer_base, outer_init;
3442 HOST_WIDE_INT pbitsize, pbitpos;
3443 tree poffset;
3444 enum machine_mode pmode;
3445 int punsignedp, pvolatilep;
3446 affine_iv base_iv, offset_iv;
3447 tree dinit;
3448
b8698a0f 3449 /* Build a reference to the first location accessed by the
ff802fa1 3450 inner-loop: *(BASE+INIT). (The first location is actually
ebfd146a
IR
3451 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3452 tree inner_base = build_fold_indirect_ref
5d49b6a7 3453 (fold_build_pointer_plus (base, init));
ebfd146a 3454
73fbfcad 3455 if (dump_enabled_p ())
ebfd146a 3456 {
78c60e3d
SS
3457 dump_printf_loc (MSG_NOTE, vect_location,
3458 "analyze in outer-loop: ");
3459 dump_generic_expr (MSG_NOTE, TDF_SLIM, inner_base);
e645e942 3460 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
3461 }
3462
b8698a0f 3463 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
ebfd146a
IR
3464 &poffset, &pmode, &punsignedp, &pvolatilep, false);
3465 gcc_assert (outer_base != NULL_TREE);
3466
3467 if (pbitpos % BITS_PER_UNIT != 0)
3468 {
73fbfcad 3469 if (dump_enabled_p ())
78c60e3d
SS
3470 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3471 "failed: bit offset alignment.\n");
ebfd146a
IR
3472 return false;
3473 }
3474
3475 outer_base = build_fold_addr_expr (outer_base);
b8698a0f 3476 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
ebfd146a
IR
3477 &base_iv, false))
3478 {
73fbfcad 3479 if (dump_enabled_p ())
e645e942 3480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 3481 "failed: evolution of base is not affine.\n");
ebfd146a
IR
3482 return false;
3483 }
3484
3485 if (offset)
3486 {
3487 if (poffset)
b8698a0f 3488 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
ebfd146a
IR
3489 poffset);
3490 else
3491 poffset = offset;
3492 }
3493
3494 if (!poffset)
3495 {
3496 offset_iv.base = ssize_int (0);
3497 offset_iv.step = ssize_int (0);
3498 }
b8698a0f 3499 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
ebfd146a
IR
3500 &offset_iv, false))
3501 {
73fbfcad 3502 if (dump_enabled_p ())
e645e942 3503 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 3504 "evolution of offset is not affine.\n");
ebfd146a
IR
3505 return false;
3506 }
3507
3508 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
3509 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
3510 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3511 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
3512 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
3513
3514 outer_step = size_binop (PLUS_EXPR,
3515 fold_convert (ssizetype, base_iv.step),
3516 fold_convert (ssizetype, offset_iv.step));
3517
3518 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
3519 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
b8698a0f 3520 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
ebfd146a 3521 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
b8698a0f 3522 STMT_VINFO_DR_OFFSET (stmt_info) =
ebfd146a 3523 fold_convert (ssizetype, offset_iv.base);
b8698a0f 3524 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
ebfd146a
IR
3525 size_int (highest_pow2_factor (offset_iv.base));
3526
73fbfcad 3527 if (dump_enabled_p ())
ebfd146a 3528 {
78c60e3d
SS
3529 dump_printf_loc (MSG_NOTE, vect_location,
3530 "\touter base_address: ");
3531 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3532 STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3533 dump_printf (MSG_NOTE, "\n\touter offset from base address: ");
3534 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3535 STMT_VINFO_DR_OFFSET (stmt_info));
3536 dump_printf (MSG_NOTE,
3537 "\n\touter constant offset from base address: ");
3538 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3539 STMT_VINFO_DR_INIT (stmt_info));
3540 dump_printf (MSG_NOTE, "\n\touter step: ");
3541 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3542 STMT_VINFO_DR_STEP (stmt_info));
3543 dump_printf (MSG_NOTE, "\n\touter aligned to: ");
3544 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3545 STMT_VINFO_DR_ALIGNED_TO (stmt_info));
e645e942 3546 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
3547 }
3548 }
3549
3550 if (STMT_VINFO_DATA_REF (stmt_info))
3551 {
73fbfcad 3552 if (dump_enabled_p ())
ebfd146a 3553 {
78c60e3d
SS
3554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3555 "not vectorized: more than one data ref "
3556 "in stmt: ");
3557 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3558 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 3559 }
ba65ae42
IR
3560
3561 if (bb_vinfo)
fcac74a1 3562 break;
ba65ae42 3563
74bf76ed 3564 if (gather || simd_lane_access)
aec7ae7d 3565 free_data_ref (dr);
ebfd146a
IR
3566 return false;
3567 }
8644a673 3568
ebfd146a 3569 STMT_VINFO_DATA_REF (stmt_info) = dr;
74bf76ed
JJ
3570 if (simd_lane_access)
3571 {
3572 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info) = true;
3573 datarefs[i] = dr;
3574 }
b8698a0f 3575
ebfd146a
IR
3576 /* Set vectype for STMT. */
3577 scalar_type = TREE_TYPE (DR_REF (dr));
3578 STMT_VINFO_VECTYPE (stmt_info) =
3579 get_vectype_for_scalar_type (scalar_type);
b8698a0f 3580 if (!STMT_VINFO_VECTYPE (stmt_info))
ebfd146a 3581 {
73fbfcad 3582 if (dump_enabled_p ())
ebfd146a 3583 {
e645e942 3584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
3585 "not vectorized: no vectype for stmt: ");
3586 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3587 dump_printf (MSG_MISSED_OPTIMIZATION, " scalar_type: ");
3588 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_DETAILS,
3589 scalar_type);
e645e942 3590 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 3591 }
4b5caab7
IR
3592
3593 if (bb_vinfo)
fcac74a1 3594 break;
aec7ae7d 3595
74bf76ed 3596 if (gather || simd_lane_access)
aec7ae7d
JJ
3597 {
3598 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3599 free_data_ref (dr);
3600 }
3601 return false;
ebfd146a 3602 }
451dabda
RB
3603 else
3604 {
3605 if (dump_enabled_p ())
3606 {
3607 dump_printf_loc (MSG_NOTE, vect_location,
3608 "got vectype for stmt: ");
3609 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3610 dump_generic_expr (MSG_NOTE, TDF_SLIM,
3611 STMT_VINFO_VECTYPE (stmt_info));
e645e942 3612 dump_printf (MSG_NOTE, "\n");
451dabda
RB
3613 }
3614 }
777e1f09
RG
3615
3616 /* Adjust the minimal vectorization factor according to the
3617 vector type. */
3618 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
3619 if (vf > *min_vf)
3620 *min_vf = vf;
aec7ae7d
JJ
3621
3622 if (gather)
3623 {
aec7ae7d 3624 tree off;
aec7ae7d 3625
7d75abc8
MM
3626 gather = 0 != vect_check_gather (stmt, loop_vinfo, NULL, &off, NULL);
3627 if (gather
3628 && get_vectype_for_scalar_type (TREE_TYPE (off)) == NULL_TREE)
3629 gather = false;
319e6439 3630 if (!gather)
aec7ae7d 3631 {
6f723d33
JJ
3632 STMT_VINFO_DATA_REF (stmt_info) = NULL;
3633 free_data_ref (dr);
73fbfcad 3634 if (dump_enabled_p ())
aec7ae7d 3635 {
78c60e3d
SS
3636 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3637 "not vectorized: not suitable for gather "
3638 "load ");
3639 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3640 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
aec7ae7d
JJ
3641 }
3642 return false;
3643 }
3644
9771b263 3645 datarefs[i] = dr;
319e6439
RG
3646 STMT_VINFO_GATHER_P (stmt_info) = true;
3647 }
3648 else if (loop_vinfo
3649 && TREE_CODE (DR_STEP (dr)) != INTEGER_CST)
3650 {
51a905b2
RB
3651 if (nested_in_vect_loop_p (loop, stmt)
3652 || !DR_IS_READ (dr))
319e6439 3653 {
73fbfcad 3654 if (dump_enabled_p ())
319e6439 3655 {
78c60e3d
SS
3656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3657 "not vectorized: not suitable for strided "
3658 "load ");
3659 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3660 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
319e6439
RG
3661 }
3662 return false;
3663 }
3664 STMT_VINFO_STRIDE_LOAD_P (stmt_info) = true;
aec7ae7d 3665 }
ebfd146a 3666 }
b8698a0f 3667
fcac74a1
RB
3668 /* If we stopped analysis at the first dataref we could not analyze
3669 when trying to vectorize a basic-block mark the rest of the datarefs
3670 as not vectorizable and truncate the vector of datarefs. That
3671 avoids spending useless time in analyzing their dependence. */
3672 if (i != datarefs.length ())
3673 {
3674 gcc_assert (bb_vinfo != NULL);
3675 for (unsigned j = i; j < datarefs.length (); ++j)
3676 {
3677 data_reference_p dr = datarefs[j];
3678 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
3679 free_data_ref (dr);
3680 }
3681 datarefs.truncate (i);
3682 }
3683
ebfd146a
IR
3684 return true;
3685}
3686
3687
3688/* Function vect_get_new_vect_var.
3689
ff802fa1 3690 Returns a name for a new variable. The current naming scheme appends the
b8698a0f
L
3691 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3692 the name of vectorizer generated variables, and appends that to NAME if
ebfd146a
IR
3693 provided. */
3694
3695tree
3696vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
3697{
3698 const char *prefix;
3699 tree new_vect_var;
3700
3701 switch (var_kind)
3702 {
3703 case vect_simple_var:
451dabda 3704 prefix = "vect";
ebfd146a
IR
3705 break;
3706 case vect_scalar_var:
451dabda 3707 prefix = "stmp";
ebfd146a
IR
3708 break;
3709 case vect_pointer_var:
451dabda 3710 prefix = "vectp";
ebfd146a
IR
3711 break;
3712 default:
3713 gcc_unreachable ();
3714 }
3715
3716 if (name)
3717 {
451dabda 3718 char* tmp = concat (prefix, "_", name, NULL);
65876d24 3719 new_vect_var = create_tmp_reg (type, tmp);
ebfd146a
IR
3720 free (tmp);
3721 }
3722 else
65876d24 3723 new_vect_var = create_tmp_reg (type, prefix);
ebfd146a
IR
3724
3725 return new_vect_var;
3726}
3727
3728
3729/* Function vect_create_addr_base_for_vector_ref.
3730
3731 Create an expression that computes the address of the first memory location
3732 that will be accessed for a data reference.
3733
3734 Input:
3735 STMT: The statement containing the data reference.
3736 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3737 OFFSET: Optional. If supplied, it is be added to the initial address.
3738 LOOP: Specify relative to which loop-nest should the address be computed.
3739 For example, when the dataref is in an inner-loop nested in an
3740 outer-loop that is now being vectorized, LOOP can be either the
ff802fa1 3741 outer-loop, or the inner-loop. The first memory location accessed
ebfd146a
IR
3742 by the following dataref ('in' points to short):
3743
3744 for (i=0; i<N; i++)
3745 for (j=0; j<M; j++)
3746 s += in[i+j]
3747
3748 is as follows:
3749 if LOOP=i_loop: &in (relative to i_loop)
3750 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3751
3752 Output:
b8698a0f 3753 1. Return an SSA_NAME whose value is the address of the memory location of
ebfd146a
IR
3754 the first vector of the data reference.
3755 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3756 these statement(s) which define the returned SSA_NAME.
3757
3758 FORNOW: We are only handling array accesses with step 1. */
3759
3760tree
3761vect_create_addr_base_for_vector_ref (gimple stmt,
3762 gimple_seq *new_stmt_list,
3763 tree offset,
3764 struct loop *loop)
3765{
3766 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3767 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4bdd44c4 3768 tree data_ref_base;
595c2679 3769 const char *base_name;
4bdd44c4 3770 tree addr_base;
ebfd146a
IR
3771 tree dest;
3772 gimple_seq seq = NULL;
4bdd44c4
RB
3773 tree base_offset;
3774 tree init;
8644a673 3775 tree vect_ptr_type;
ebfd146a 3776 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
a70d6342 3777 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
ebfd146a 3778
a70d6342 3779 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
ebfd146a 3780 {
a70d6342 3781 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 3782
a70d6342 3783 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
ebfd146a
IR
3784
3785 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
3786 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
3787 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
3788 }
4bdd44c4
RB
3789 else
3790 {
3791 data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
3792 base_offset = unshare_expr (DR_OFFSET (dr));
3793 init = unshare_expr (DR_INIT (dr));
3794 }
ebfd146a 3795
a70d6342 3796 if (loop_vinfo)
595c2679 3797 base_name = get_name (data_ref_base);
a70d6342
IR
3798 else
3799 {
3800 base_offset = ssize_int (0);
3801 init = ssize_int (0);
595c2679 3802 base_name = get_name (DR_REF (dr));
b8698a0f 3803 }
a70d6342 3804
ebfd146a
IR
3805 /* Create base_offset */
3806 base_offset = size_binop (PLUS_EXPR,
3807 fold_convert (sizetype, base_offset),
3808 fold_convert (sizetype, init));
ebfd146a
IR
3809
3810 if (offset)
3811 {
ebfd146a
IR
3812 offset = fold_build2 (MULT_EXPR, sizetype,
3813 fold_convert (sizetype, offset), step);
3814 base_offset = fold_build2 (PLUS_EXPR, sizetype,
3815 base_offset, offset);
ebfd146a
IR
3816 }
3817
3818 /* base + base_offset */
a70d6342 3819 if (loop_vinfo)
5d49b6a7 3820 addr_base = fold_build_pointer_plus (data_ref_base, base_offset);
a70d6342
IR
3821 else
3822 {
70f34814
RG
3823 addr_base = build1 (ADDR_EXPR,
3824 build_pointer_type (TREE_TYPE (DR_REF (dr))),
3825 unshare_expr (DR_REF (dr)));
a70d6342 3826 }
b8698a0f 3827
ebfd146a 3828 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
4bdd44c4
RB
3829 addr_base = fold_convert (vect_ptr_type, addr_base);
3830 dest = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var, base_name);
3831 addr_base = force_gimple_operand (addr_base, &seq, false, dest);
ebfd146a
IR
3832 gimple_seq_add_seq (new_stmt_list, seq);
3833
17fc049f 3834 if (DR_PTR_INFO (dr)
4bdd44c4 3835 && TREE_CODE (addr_base) == SSA_NAME)
128aaeed 3836 {
4bdd44c4 3837 duplicate_ssa_name_ptr_info (addr_base, DR_PTR_INFO (dr));
128aaeed 3838 if (offset)
4bdd44c4 3839 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base));
128aaeed 3840 }
17fc049f 3841
73fbfcad 3842 if (dump_enabled_p ())
ebfd146a 3843 {
78c60e3d 3844 dump_printf_loc (MSG_NOTE, vect_location, "created ");
4bdd44c4 3845 dump_generic_expr (MSG_NOTE, TDF_SLIM, addr_base);
e645e942 3846 dump_printf (MSG_NOTE, "\n");
ebfd146a 3847 }
8644a673 3848
4bdd44c4 3849 return addr_base;
ebfd146a
IR
3850}
3851
3852
3853/* Function vect_create_data_ref_ptr.
3854
920e8172
RS
3855 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3856 location accessed in the loop by STMT, along with the def-use update
3857 chain to appropriately advance the pointer through the loop iterations.
3858 Also set aliasing information for the pointer. This pointer is used by
3859 the callers to this function to create a memory reference expression for
3860 vector load/store access.
ebfd146a
IR
3861
3862 Input:
3863 1. STMT: a stmt that references memory. Expected to be of the form
3864 GIMPLE_ASSIGN <name, data-ref> or
3865 GIMPLE_ASSIGN <data-ref, name>.
920e8172
RS
3866 2. AGGR_TYPE: the type of the reference, which should be either a vector
3867 or an array.
3868 3. AT_LOOP: the loop where the vector memref is to be created.
3869 4. OFFSET (optional): an offset to be added to the initial address accessed
ebfd146a 3870 by the data-ref in STMT.
920e8172
RS
3871 5. BSI: location where the new stmts are to be placed if there is no loop
3872 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
ebfd146a 3873 pointing to the initial address.
ebfd146a
IR
3874
3875 Output:
3876 1. Declare a new ptr to vector_type, and have it point to the base of the
3877 data reference (initial addressed accessed by the data reference).
3878 For example, for vector of type V8HI, the following code is generated:
3879
920e8172
RS
3880 v8hi *ap;
3881 ap = (v8hi *)initial_address;
ebfd146a
IR
3882
3883 if OFFSET is not supplied:
3884 initial_address = &a[init];
3885 if OFFSET is supplied:
3886 initial_address = &a[init + OFFSET];
3887
3888 Return the initial_address in INITIAL_ADDRESS.
3889
3890 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
b8698a0f 3891 update the pointer in each iteration of the loop.
ebfd146a
IR
3892
3893 Return the increment stmt that updates the pointer in PTR_INCR.
3894
b8698a0f 3895 3. Set INV_P to true if the access pattern of the data reference in the
ff802fa1 3896 vectorized loop is invariant. Set it to false otherwise.
ebfd146a
IR
3897
3898 4. Return the pointer. */
3899
3900tree
920e8172
RS
3901vect_create_data_ref_ptr (gimple stmt, tree aggr_type, struct loop *at_loop,
3902 tree offset, tree *initial_address,
3903 gimple_stmt_iterator *gsi, gimple *ptr_incr,
3904 bool only_init, bool *inv_p)
ebfd146a 3905{
595c2679 3906 const char *base_name;
ebfd146a
IR
3907 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3908 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
a70d6342
IR
3909 struct loop *loop = NULL;
3910 bool nested_in_vect_loop = false;
3911 struct loop *containing_loop = NULL;
920e8172
RS
3912 tree aggr_ptr_type;
3913 tree aggr_ptr;
ebfd146a
IR
3914 tree new_temp;
3915 gimple vec_stmt;
3916 gimple_seq new_stmt_list = NULL;
a70d6342 3917 edge pe = NULL;
ebfd146a 3918 basic_block new_bb;
920e8172 3919 tree aggr_ptr_init;
ebfd146a 3920 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
920e8172 3921 tree aptr;
ebfd146a
IR
3922 gimple_stmt_iterator incr_gsi;
3923 bool insert_after;
3924 tree indx_before_incr, indx_after_incr;
3925 gimple incr;
3926 tree step;
a70d6342 3927 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
b8698a0f 3928
920e8172
RS
3929 gcc_assert (TREE_CODE (aggr_type) == ARRAY_TYPE
3930 || TREE_CODE (aggr_type) == VECTOR_TYPE);
3931
a70d6342
IR
3932 if (loop_vinfo)
3933 {
3934 loop = LOOP_VINFO_LOOP (loop_vinfo);
3935 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3936 containing_loop = (gimple_bb (stmt))->loop_father;
3937 pe = loop_preheader_edge (loop);
3938 }
3939 else
3940 {
3941 gcc_assert (bb_vinfo);
3942 only_init = true;
3943 *ptr_incr = NULL;
3944 }
b8698a0f 3945
ebfd146a
IR
3946 /* Check the step (evolution) of the load in LOOP, and record
3947 whether it's invariant. */
3948 if (nested_in_vect_loop)
3949 step = STMT_VINFO_DR_STEP (stmt_info);
3950 else
3951 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
b8698a0f 3952
08940f33 3953 if (integer_zerop (step))
ebfd146a
IR
3954 *inv_p = true;
3955 else
3956 *inv_p = false;
3957
3958 /* Create an expression for the first address accessed by this load
b8698a0f 3959 in LOOP. */
595c2679 3960 base_name = get_name (DR_BASE_ADDRESS (dr));
ebfd146a 3961
73fbfcad 3962 if (dump_enabled_p ())
ebfd146a 3963 {
595c2679 3964 tree dr_base_type = TREE_TYPE (DR_BASE_OBJECT (dr));
78c60e3d
SS
3965 dump_printf_loc (MSG_NOTE, vect_location,
3966 "create %s-pointer variable to type: ",
5806f481 3967 get_tree_code_name (TREE_CODE (aggr_type)));
78c60e3d 3968 dump_generic_expr (MSG_NOTE, TDF_SLIM, aggr_type);
595c2679 3969 if (TREE_CODE (dr_base_type) == ARRAY_TYPE)
78c60e3d 3970 dump_printf (MSG_NOTE, " vectorizing an array ref: ");
38000232
MG
3971 else if (TREE_CODE (dr_base_type) == VECTOR_TYPE)
3972 dump_printf (MSG_NOTE, " vectorizing a vector ref: ");
595c2679 3973 else if (TREE_CODE (dr_base_type) == RECORD_TYPE)
78c60e3d 3974 dump_printf (MSG_NOTE, " vectorizing a record based array ref: ");
595c2679 3975 else
78c60e3d 3976 dump_printf (MSG_NOTE, " vectorizing a pointer ref: ");
595c2679 3977 dump_generic_expr (MSG_NOTE, TDF_SLIM, DR_BASE_OBJECT (dr));
e645e942 3978 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
3979 }
3980
4bdd44c4
RB
3981 /* (1) Create the new aggregate-pointer variable.
3982 Vector and array types inherit the alias set of their component
920e8172
RS
3983 type by default so we need to use a ref-all pointer if the data
3984 reference does not conflict with the created aggregated data
3985 reference because it is not addressable. */
4bdd44c4
RB
3986 bool need_ref_all = false;
3987 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
3f49ba3f 3988 get_alias_set (DR_REF (dr))))
4bdd44c4 3989 need_ref_all = true;
3f49ba3f 3990 /* Likewise for any of the data references in the stmt group. */
e14c1050 3991 else if (STMT_VINFO_GROUP_SIZE (stmt_info) > 1)
ebfd146a 3992 {
e14c1050 3993 gimple orig_stmt = STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info);
5006671f
RG
3994 do
3995 {
4bdd44c4
RB
3996 stmt_vec_info sinfo = vinfo_for_stmt (orig_stmt);
3997 struct data_reference *sdr = STMT_VINFO_DATA_REF (sinfo);
3998 if (!alias_sets_conflict_p (get_alias_set (aggr_type),
3999 get_alias_set (DR_REF (sdr))))
5006671f 4000 {
4bdd44c4 4001 need_ref_all = true;
5006671f
RG
4002 break;
4003 }
4bdd44c4 4004 orig_stmt = STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo);
5006671f
RG
4005 }
4006 while (orig_stmt);
ebfd146a 4007 }
4bdd44c4
RB
4008 aggr_ptr_type = build_pointer_type_for_mode (aggr_type, ptr_mode,
4009 need_ref_all);
4010 aggr_ptr = vect_get_new_vect_var (aggr_ptr_type, vect_pointer_var, base_name);
4011
ebfd146a 4012
ff802fa1
IR
4013 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4014 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4015 def-use update cycles for the pointer: one relative to the outer-loop
4016 (LOOP), which is what steps (3) and (4) below do. The other is relative
4017 to the inner-loop (which is the inner-most loop containing the dataref),
4018 and this is done be step (5) below.
ebfd146a 4019
ff802fa1
IR
4020 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4021 inner-most loop, and so steps (3),(4) work the same, and step (5) is
4022 redundant. Steps (3),(4) create the following:
ebfd146a
IR
4023
4024 vp0 = &base_addr;
4025 LOOP: vp1 = phi(vp0,vp2)
b8698a0f 4026 ...
ebfd146a
IR
4027 ...
4028 vp2 = vp1 + step
4029 goto LOOP
b8698a0f 4030
ff802fa1
IR
4031 If there is an inner-loop nested in loop, then step (5) will also be
4032 applied, and an additional update in the inner-loop will be created:
ebfd146a
IR
4033
4034 vp0 = &base_addr;
4035 LOOP: vp1 = phi(vp0,vp2)
4036 ...
4037 inner: vp3 = phi(vp1,vp4)
4038 vp4 = vp3 + inner_step
4039 if () goto inner
4040 ...
4041 vp2 = vp1 + step
4042 if () goto LOOP */
4043
920e8172
RS
4044 /* (2) Calculate the initial address of the aggregate-pointer, and set
4045 the aggregate-pointer to point to it before the loop. */
ebfd146a
IR
4046
4047 /* Create: (&(base[init_val+offset]) in the loop preheader. */
4048
4049 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
4050 offset, loop);
ebfd146a
IR
4051 if (new_stmt_list)
4052 {
a70d6342
IR
4053 if (pe)
4054 {
4055 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
4056 gcc_assert (!new_bb);
4057 }
4058 else
1b29f05e 4059 gsi_insert_seq_before (gsi, new_stmt_list, GSI_SAME_STMT);
ebfd146a
IR
4060 }
4061
4062 *initial_address = new_temp;
4063
920e8172 4064 /* Create: p = (aggr_type *) initial_base */
17fc049f 4065 if (TREE_CODE (new_temp) != SSA_NAME
920e8172 4066 || !useless_type_conversion_p (aggr_ptr_type, TREE_TYPE (new_temp)))
a70d6342 4067 {
920e8172
RS
4068 vec_stmt = gimple_build_assign (aggr_ptr,
4069 fold_convert (aggr_ptr_type, new_temp));
4070 aggr_ptr_init = make_ssa_name (aggr_ptr, vec_stmt);
17fc049f
RG
4071 /* Copy the points-to information if it exists. */
4072 if (DR_PTR_INFO (dr))
920e8172
RS
4073 duplicate_ssa_name_ptr_info (aggr_ptr_init, DR_PTR_INFO (dr));
4074 gimple_assign_set_lhs (vec_stmt, aggr_ptr_init);
17fc049f
RG
4075 if (pe)
4076 {
4077 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
4078 gcc_assert (!new_bb);
4079 }
4080 else
1b29f05e 4081 gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
a70d6342
IR
4082 }
4083 else
920e8172 4084 aggr_ptr_init = new_temp;
ebfd146a 4085
920e8172 4086 /* (3) Handle the updating of the aggregate-pointer inside the loop.
ff802fa1
IR
4087 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4088 inner-loop nested in LOOP (during outer-loop vectorization). */
ebfd146a 4089
a70d6342 4090 /* No update in loop is required. */
b8698a0f 4091 if (only_init && (!loop_vinfo || at_loop == loop))
920e8172 4092 aptr = aggr_ptr_init;
ebfd146a
IR
4093 else
4094 {
920e8172 4095 /* The step of the aggregate pointer is the type size. */
08940f33 4096 tree iv_step = TYPE_SIZE_UNIT (aggr_type);
b8698a0f 4097 /* One exception to the above is when the scalar step of the load in
ebfd146a
IR
4098 LOOP is zero. In this case the step here is also zero. */
4099 if (*inv_p)
08940f33
RB
4100 iv_step = size_zero_node;
4101 else if (tree_int_cst_sgn (step) == -1)
4102 iv_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (iv_step), iv_step);
ebfd146a
IR
4103
4104 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
4105
920e8172 4106 create_iv (aggr_ptr_init,
08940f33 4107 fold_convert (aggr_ptr_type, iv_step),
920e8172 4108 aggr_ptr, loop, &incr_gsi, insert_after,
ebfd146a
IR
4109 &indx_before_incr, &indx_after_incr);
4110 incr = gsi_stmt (incr_gsi);
a70d6342 4111 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
ebfd146a
IR
4112
4113 /* Copy the points-to information if it exists. */
4114 if (DR_PTR_INFO (dr))
4115 {
4116 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4117 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4118 }
ebfd146a
IR
4119 if (ptr_incr)
4120 *ptr_incr = incr;
4121
920e8172 4122 aptr = indx_before_incr;
ebfd146a
IR
4123 }
4124
4125 if (!nested_in_vect_loop || only_init)
920e8172 4126 return aptr;
ebfd146a
IR
4127
4128
920e8172 4129 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
ff802fa1 4130 nested in LOOP, if exists. */
ebfd146a
IR
4131
4132 gcc_assert (nested_in_vect_loop);
4133 if (!only_init)
4134 {
4135 standard_iv_increment_position (containing_loop, &incr_gsi,
4136 &insert_after);
920e8172 4137 create_iv (aptr, fold_convert (aggr_ptr_type, DR_STEP (dr)), aggr_ptr,
ebfd146a
IR
4138 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
4139 &indx_after_incr);
4140 incr = gsi_stmt (incr_gsi);
a70d6342 4141 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
ebfd146a
IR
4142
4143 /* Copy the points-to information if it exists. */
4144 if (DR_PTR_INFO (dr))
4145 {
4146 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
4147 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
4148 }
ebfd146a
IR
4149 if (ptr_incr)
4150 *ptr_incr = incr;
4151
b8698a0f 4152 return indx_before_incr;
ebfd146a
IR
4153 }
4154 else
4155 gcc_unreachable ();
4156}
4157
4158
4159/* Function bump_vector_ptr
4160
4161 Increment a pointer (to a vector type) by vector-size. If requested,
b8698a0f 4162 i.e. if PTR-INCR is given, then also connect the new increment stmt
ebfd146a
IR
4163 to the existing def-use update-chain of the pointer, by modifying
4164 the PTR_INCR as illustrated below:
4165
4166 The pointer def-use update-chain before this function:
4167 DATAREF_PTR = phi (p_0, p_2)
4168 ....
b8698a0f 4169 PTR_INCR: p_2 = DATAREF_PTR + step
ebfd146a
IR
4170
4171 The pointer def-use update-chain after this function:
4172 DATAREF_PTR = phi (p_0, p_2)
4173 ....
4174 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4175 ....
4176 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4177
4178 Input:
b8698a0f 4179 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
ebfd146a 4180 in the loop.
b8698a0f 4181 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
ebfd146a 4182 the loop. The increment amount across iterations is expected
b8698a0f 4183 to be vector_size.
ebfd146a
IR
4184 BSI - location where the new update stmt is to be placed.
4185 STMT - the original scalar memory-access stmt that is being vectorized.
4186 BUMP - optional. The offset by which to bump the pointer. If not given,
4187 the offset is assumed to be vector_size.
4188
4189 Output: Return NEW_DATAREF_PTR as illustrated above.
b8698a0f 4190
ebfd146a
IR
4191*/
4192
4193tree
4194bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
4195 gimple stmt, tree bump)
4196{
4197 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4198 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4199 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
4200 tree update = TYPE_SIZE_UNIT (vectype);
4201 gimple incr_stmt;
4202 ssa_op_iter iter;
4203 use_operand_p use_p;
4204 tree new_dataref_ptr;
4205
4206 if (bump)
4207 update = bump;
b8698a0f 4208
070ecdfd
RG
4209 new_dataref_ptr = copy_ssa_name (dataref_ptr, NULL);
4210 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, new_dataref_ptr,
ebfd146a 4211 dataref_ptr, update);
ebfd146a
IR
4212 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
4213
4214 /* Copy the points-to information if it exists. */
4215 if (DR_PTR_INFO (dr))
128aaeed
RB
4216 {
4217 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
644ffefd 4218 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr));
128aaeed 4219 }
ebfd146a
IR
4220
4221 if (!ptr_incr)
4222 return new_dataref_ptr;
4223
4224 /* Update the vector-pointer's cross-iteration increment. */
4225 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
4226 {
4227 tree use = USE_FROM_PTR (use_p);
4228
4229 if (use == dataref_ptr)
4230 SET_USE (use_p, new_dataref_ptr);
4231 else
4232 gcc_assert (tree_int_cst_compare (use, update) == 0);
4233 }
4234
4235 return new_dataref_ptr;
4236}
4237
4238
4239/* Function vect_create_destination_var.
4240
4241 Create a new temporary of type VECTYPE. */
4242
4243tree
4244vect_create_destination_var (tree scalar_dest, tree vectype)
4245{
4246 tree vec_dest;
451dabda
RB
4247 const char *name;
4248 char *new_name;
ebfd146a
IR
4249 tree type;
4250 enum vect_var_kind kind;
4251
4252 kind = vectype ? vect_simple_var : vect_scalar_var;
4253 type = vectype ? vectype : TREE_TYPE (scalar_dest);
4254
4255 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
4256
451dabda
RB
4257 name = get_name (scalar_dest);
4258 if (name)
4259 asprintf (&new_name, "%s_%u", name, SSA_NAME_VERSION (scalar_dest));
4260 else
4261 asprintf (&new_name, "_%u", SSA_NAME_VERSION (scalar_dest));
ebfd146a 4262 vec_dest = vect_get_new_vect_var (type, kind, new_name);
451dabda 4263 free (new_name);
ebfd146a
IR
4264
4265 return vec_dest;
4266}
4267
0d0293ac 4268/* Function vect_grouped_store_supported.
ebfd146a 4269
e2c83630
RH
4270 Returns TRUE if interleave high and interleave low permutations
4271 are supported, and FALSE otherwise. */
ebfd146a
IR
4272
4273bool
0d0293ac 4274vect_grouped_store_supported (tree vectype, unsigned HOST_WIDE_INT count)
ebfd146a 4275{
e2c83630 4276 enum machine_mode mode = TYPE_MODE (vectype);
b8698a0f 4277
b602d918
RS
4278 /* vect_permute_store_chain requires the group size to be a power of two. */
4279 if (exact_log2 (count) == -1)
4280 {
73fbfcad 4281 if (dump_enabled_p ())
78c60e3d
SS
4282 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4283 "the size of the group of accesses"
e645e942 4284 " is not a power of 2\n");
b602d918
RS
4285 return false;
4286 }
4287
e2c83630 4288 /* Check that the permutation is supported. */
3fcc1b55
JJ
4289 if (VECTOR_MODE_P (mode))
4290 {
4291 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4292 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
4293 for (i = 0; i < nelt / 2; i++)
4294 {
4295 sel[i * 2] = i;
4296 sel[i * 2 + 1] = i + nelt;
4297 }
4298 if (can_vec_perm_p (mode, false, sel))
4299 {
4300 for (i = 0; i < nelt; i++)
4301 sel[i] += nelt / 2;
4302 if (can_vec_perm_p (mode, false, sel))
4303 return true;
4304 }
4305 }
ebfd146a 4306
73fbfcad 4307 if (dump_enabled_p ())
78c60e3d 4308 dump_printf (MSG_MISSED_OPTIMIZATION,
e645e942 4309 "interleave op not supported by target.\n");
a6b3dfde 4310 return false;
ebfd146a
IR
4311}
4312
4313
272c6793
RS
4314/* Return TRUE if vec_store_lanes is available for COUNT vectors of
4315 type VECTYPE. */
4316
4317bool
4318vect_store_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4319{
4320 return vect_lanes_optab_supported_p ("vec_store_lanes",
4321 vec_store_lanes_optab,
4322 vectype, count);
4323}
4324
4325
ebfd146a
IR
4326/* Function vect_permute_store_chain.
4327
4328 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
b8698a0f 4329 a power of 2, generate interleave_high/low stmts to reorder the data
ff802fa1 4330 correctly for the stores. Return the final references for stores in
ebfd146a
IR
4331 RESULT_CHAIN.
4332
4333 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
ff802fa1
IR
4334 The input is 4 vectors each containing 8 elements. We assign a number to
4335 each element, the input sequence is:
ebfd146a
IR
4336
4337 1st vec: 0 1 2 3 4 5 6 7
4338 2nd vec: 8 9 10 11 12 13 14 15
b8698a0f 4339 3rd vec: 16 17 18 19 20 21 22 23
ebfd146a
IR
4340 4th vec: 24 25 26 27 28 29 30 31
4341
4342 The output sequence should be:
4343
4344 1st vec: 0 8 16 24 1 9 17 25
4345 2nd vec: 2 10 18 26 3 11 19 27
4346 3rd vec: 4 12 20 28 5 13 21 30
4347 4th vec: 6 14 22 30 7 15 23 31
4348
4349 i.e., we interleave the contents of the four vectors in their order.
4350
ff802fa1 4351 We use interleave_high/low instructions to create such output. The input of
ebfd146a 4352 each interleave_high/low operation is two vectors:
b8698a0f
L
4353 1st vec 2nd vec
4354 0 1 2 3 4 5 6 7
4355 the even elements of the result vector are obtained left-to-right from the
ff802fa1 4356 high/low elements of the first vector. The odd elements of the result are
ebfd146a
IR
4357 obtained left-to-right from the high/low elements of the second vector.
4358 The output of interleave_high will be: 0 4 1 5
4359 and of interleave_low: 2 6 3 7
4360
b8698a0f 4361
ff802fa1 4362 The permutation is done in log LENGTH stages. In each stage interleave_high
b8698a0f
L
4363 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4364 where the first argument is taken from the first half of DR_CHAIN and the
4365 second argument from it's second half.
4366 In our example,
ebfd146a
IR
4367
4368 I1: interleave_high (1st vec, 3rd vec)
4369 I2: interleave_low (1st vec, 3rd vec)
4370 I3: interleave_high (2nd vec, 4th vec)
4371 I4: interleave_low (2nd vec, 4th vec)
4372
4373 The output for the first stage is:
4374
4375 I1: 0 16 1 17 2 18 3 19
4376 I2: 4 20 5 21 6 22 7 23
4377 I3: 8 24 9 25 10 26 11 27
4378 I4: 12 28 13 29 14 30 15 31
4379
4380 The output of the second stage, i.e. the final result is:
4381
4382 I1: 0 8 16 24 1 9 17 25
4383 I2: 2 10 18 26 3 11 19 27
4384 I3: 4 12 20 28 5 13 21 30
4385 I4: 6 14 22 30 7 15 23 31. */
b8698a0f 4386
b602d918 4387void
9771b263 4388vect_permute_store_chain (vec<tree> dr_chain,
b8698a0f 4389 unsigned int length,
ebfd146a
IR
4390 gimple stmt,
4391 gimple_stmt_iterator *gsi,
9771b263 4392 vec<tree> *result_chain)
ebfd146a 4393{
83d5977e 4394 tree vect1, vect2, high, low;
ebfd146a
IR
4395 gimple perm_stmt;
4396 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3fcc1b55
JJ
4397 tree perm_mask_low, perm_mask_high;
4398 unsigned int i, n;
e2c83630 4399 unsigned int j, nelt = TYPE_VECTOR_SUBPARTS (vectype);
3fcc1b55 4400 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
b8698a0f 4401
b6b9227d
JJ
4402 result_chain->quick_grow (length);
4403 memcpy (result_chain->address (), dr_chain.address (),
4404 length * sizeof (tree));
ebfd146a 4405
3fcc1b55
JJ
4406 for (i = 0, n = nelt / 2; i < n; i++)
4407 {
4408 sel[i * 2] = i;
4409 sel[i * 2 + 1] = i + nelt;
4410 }
4411 perm_mask_high = vect_gen_perm_mask (vectype, sel);
e2c83630
RH
4412 gcc_assert (perm_mask_high != NULL);
4413
3fcc1b55
JJ
4414 for (i = 0; i < nelt; i++)
4415 sel[i] += nelt / 2;
4416 perm_mask_low = vect_gen_perm_mask (vectype, sel);
e2c83630 4417 gcc_assert (perm_mask_low != NULL);
3fcc1b55
JJ
4418
4419 for (i = 0, n = exact_log2 (length); i < n; i++)
ebfd146a
IR
4420 {
4421 for (j = 0; j < length/2; j++)
4422 {
9771b263
DN
4423 vect1 = dr_chain[j];
4424 vect2 = dr_chain[j+length/2];
ebfd146a
IR
4425
4426 /* Create interleaving stmt:
3fcc1b55 4427 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
83d5977e 4428 high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
3fcc1b55 4429 perm_stmt
73804b12
RG
4430 = gimple_build_assign_with_ops (VEC_PERM_EXPR, high,
4431 vect1, vect2, perm_mask_high);
ebfd146a 4432 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
9771b263 4433 (*result_chain)[2*j] = high;
ebfd146a
IR
4434
4435 /* Create interleaving stmt:
3fcc1b55
JJ
4436 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4437 nelt*3/2+1, ...}> */
83d5977e 4438 low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
3fcc1b55 4439 perm_stmt
73804b12
RG
4440 = gimple_build_assign_with_ops (VEC_PERM_EXPR, low,
4441 vect1, vect2, perm_mask_low);
ebfd146a 4442 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
9771b263 4443 (*result_chain)[2*j+1] = low;
ebfd146a 4444 }
b6b9227d
JJ
4445 memcpy (dr_chain.address (), result_chain->address (),
4446 length * sizeof (tree));
ebfd146a 4447 }
ebfd146a
IR
4448}
4449
4450/* Function vect_setup_realignment
b8698a0f 4451
ebfd146a
IR
4452 This function is called when vectorizing an unaligned load using
4453 the dr_explicit_realign[_optimized] scheme.
4454 This function generates the following code at the loop prolog:
4455
4456 p = initial_addr;
4457 x msq_init = *(floor(p)); # prolog load
b8698a0f 4458 realignment_token = call target_builtin;
ebfd146a
IR
4459 loop:
4460 x msq = phi (msq_init, ---)
4461
b8698a0f 4462 The stmts marked with x are generated only for the case of
ebfd146a
IR
4463 dr_explicit_realign_optimized.
4464
b8698a0f 4465 The code above sets up a new (vector) pointer, pointing to the first
ebfd146a
IR
4466 location accessed by STMT, and a "floor-aligned" load using that pointer.
4467 It also generates code to compute the "realignment-token" (if the relevant
4468 target hook was defined), and creates a phi-node at the loop-header bb
4469 whose arguments are the result of the prolog-load (created by this
4470 function) and the result of a load that takes place in the loop (to be
4471 created by the caller to this function).
4472
4473 For the case of dr_explicit_realign_optimized:
b8698a0f 4474 The caller to this function uses the phi-result (msq) to create the
ebfd146a
IR
4475 realignment code inside the loop, and sets up the missing phi argument,
4476 as follows:
b8698a0f 4477 loop:
ebfd146a
IR
4478 msq = phi (msq_init, lsq)
4479 lsq = *(floor(p')); # load in loop
4480 result = realign_load (msq, lsq, realignment_token);
4481
4482 For the case of dr_explicit_realign:
4483 loop:
4484 msq = *(floor(p)); # load in loop
4485 p' = p + (VS-1);
4486 lsq = *(floor(p')); # load in loop
4487 result = realign_load (msq, lsq, realignment_token);
4488
4489 Input:
4490 STMT - (scalar) load stmt to be vectorized. This load accesses
4491 a memory location that may be unaligned.
4492 BSI - place where new code is to be inserted.
4493 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
b8698a0f
L
4494 is used.
4495
ebfd146a
IR
4496 Output:
4497 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4498 target hook, if defined.
4499 Return value - the result of the loop-header phi node. */
4500
4501tree
4502vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
4503 tree *realignment_token,
4504 enum dr_alignment_support alignment_support_scheme,
4505 tree init_addr,
4506 struct loop **at_loop)
4507{
4508 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4509 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4510 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
20ede5c6 4511 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
69f11a13
IR
4512 struct loop *loop = NULL;
4513 edge pe = NULL;
ebfd146a
IR
4514 tree scalar_dest = gimple_assign_lhs (stmt);
4515 tree vec_dest;
4516 gimple inc;
4517 tree ptr;
4518 tree data_ref;
4519 gimple new_stmt;
4520 basic_block new_bb;
4521 tree msq_init = NULL_TREE;
4522 tree new_temp;
4523 gimple phi_stmt;
4524 tree msq = NULL_TREE;
4525 gimple_seq stmts = NULL;
4526 bool inv_p;
4527 bool compute_in_loop = false;
69f11a13 4528 bool nested_in_vect_loop = false;
ebfd146a 4529 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
69f11a13
IR
4530 struct loop *loop_for_initial_load = NULL;
4531
4532 if (loop_vinfo)
4533 {
4534 loop = LOOP_VINFO_LOOP (loop_vinfo);
4535 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
4536 }
ebfd146a
IR
4537
4538 gcc_assert (alignment_support_scheme == dr_explicit_realign
4539 || alignment_support_scheme == dr_explicit_realign_optimized);
4540
4541 /* We need to generate three things:
4542 1. the misalignment computation
4543 2. the extra vector load (for the optimized realignment scheme).
4544 3. the phi node for the two vectors from which the realignment is
ff802fa1 4545 done (for the optimized realignment scheme). */
ebfd146a
IR
4546
4547 /* 1. Determine where to generate the misalignment computation.
4548
4549 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4550 calculation will be generated by this function, outside the loop (in the
4551 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4552 caller, inside the loop.
4553
4554 Background: If the misalignment remains fixed throughout the iterations of
4555 the loop, then both realignment schemes are applicable, and also the
4556 misalignment computation can be done outside LOOP. This is because we are
4557 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4558 are a multiple of VS (the Vector Size), and therefore the misalignment in
4559 different vectorized LOOP iterations is always the same.
4560 The problem arises only if the memory access is in an inner-loop nested
4561 inside LOOP, which is now being vectorized using outer-loop vectorization.
4562 This is the only case when the misalignment of the memory access may not
4563 remain fixed throughout the iterations of the inner-loop (as explained in
4564 detail in vect_supportable_dr_alignment). In this case, not only is the
4565 optimized realignment scheme not applicable, but also the misalignment
4566 computation (and generation of the realignment token that is passed to
4567 REALIGN_LOAD) have to be done inside the loop.
4568
4569 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4570 or not, which in turn determines if the misalignment is computed inside
4571 the inner-loop, or outside LOOP. */
4572
69f11a13 4573 if (init_addr != NULL_TREE || !loop_vinfo)
ebfd146a
IR
4574 {
4575 compute_in_loop = true;
4576 gcc_assert (alignment_support_scheme == dr_explicit_realign);
4577 }
4578
4579
4580 /* 2. Determine where to generate the extra vector load.
4581
4582 For the optimized realignment scheme, instead of generating two vector
4583 loads in each iteration, we generate a single extra vector load in the
4584 preheader of the loop, and in each iteration reuse the result of the
4585 vector load from the previous iteration. In case the memory access is in
4586 an inner-loop nested inside LOOP, which is now being vectorized using
4587 outer-loop vectorization, we need to determine whether this initial vector
4588 load should be generated at the preheader of the inner-loop, or can be
4589 generated at the preheader of LOOP. If the memory access has no evolution
4590 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4591 to be generated inside LOOP (in the preheader of the inner-loop). */
4592
4593 if (nested_in_vect_loop)
4594 {
4595 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
4596 bool invariant_in_outerloop =
4597 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
4598 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
4599 }
4600 else
4601 loop_for_initial_load = loop;
4602 if (at_loop)
4603 *at_loop = loop_for_initial_load;
4604
69f11a13
IR
4605 if (loop_for_initial_load)
4606 pe = loop_preheader_edge (loop_for_initial_load);
4607
ebfd146a
IR
4608 /* 3. For the case of the optimized realignment, create the first vector
4609 load at the loop preheader. */
4610
4611 if (alignment_support_scheme == dr_explicit_realign_optimized)
4612 {
4613 /* Create msq_init = *(floor(p1)) in the loop preheader */
4614
4615 gcc_assert (!compute_in_loop);
ebfd146a 4616 vec_dest = vect_create_destination_var (scalar_dest, vectype);
920e8172
RS
4617 ptr = vect_create_data_ref_ptr (stmt, vectype, loop_for_initial_load,
4618 NULL_TREE, &init_addr, NULL, &inc,
4619 true, &inv_p);
070ecdfd 4620 new_temp = copy_ssa_name (ptr, NULL);
75421dcd 4621 new_stmt = gimple_build_assign_with_ops
070ecdfd 4622 (BIT_AND_EXPR, new_temp, ptr,
75421dcd
RG
4623 build_int_cst (TREE_TYPE (ptr),
4624 -(HOST_WIDE_INT)TYPE_ALIGN_UNIT (vectype)));
75421dcd
RG
4625 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4626 gcc_assert (!new_bb);
20ede5c6
RG
4627 data_ref
4628 = build2 (MEM_REF, TREE_TYPE (vec_dest), new_temp,
4629 build_int_cst (reference_alias_ptr_type (DR_REF (dr)), 0));
ebfd146a
IR
4630 new_stmt = gimple_build_assign (vec_dest, data_ref);
4631 new_temp = make_ssa_name (vec_dest, new_stmt);
4632 gimple_assign_set_lhs (new_stmt, new_temp);
69f11a13
IR
4633 if (pe)
4634 {
4635 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4636 gcc_assert (!new_bb);
4637 }
4638 else
4639 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4640
ebfd146a
IR
4641 msq_init = gimple_assign_lhs (new_stmt);
4642 }
4643
4644 /* 4. Create realignment token using a target builtin, if available.
4645 It is done either inside the containing loop, or before LOOP (as
4646 determined above). */
4647
4648 if (targetm.vectorize.builtin_mask_for_load)
4649 {
4650 tree builtin_decl;
4651
4652 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
69f11a13 4653 if (!init_addr)
ebfd146a
IR
4654 {
4655 /* Generate the INIT_ADDR computation outside LOOP. */
4656 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
4657 NULL_TREE, loop);
69f11a13
IR
4658 if (loop)
4659 {
4660 pe = loop_preheader_edge (loop);
4661 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
4662 gcc_assert (!new_bb);
4663 }
4664 else
4665 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
ebfd146a
IR
4666 }
4667
4668 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
4669 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
4670 vec_dest =
4671 vect_create_destination_var (scalar_dest,
4672 gimple_call_return_type (new_stmt));
4673 new_temp = make_ssa_name (vec_dest, new_stmt);
4674 gimple_call_set_lhs (new_stmt, new_temp);
4675
4676 if (compute_in_loop)
4677 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
4678 else
4679 {
4680 /* Generate the misalignment computation outside LOOP. */
4681 pe = loop_preheader_edge (loop);
4682 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
4683 gcc_assert (!new_bb);
4684 }
4685
4686 *realignment_token = gimple_call_lhs (new_stmt);
4687
4688 /* The result of the CALL_EXPR to this builtin is determined from
4689 the value of the parameter and no global variables are touched
4690 which makes the builtin a "const" function. Requiring the
4691 builtin to have the "const" attribute makes it unnecessary
4692 to call mark_call_clobbered. */
4693 gcc_assert (TREE_READONLY (builtin_decl));
4694 }
4695
4696 if (alignment_support_scheme == dr_explicit_realign)
4697 return msq;
4698
4699 gcc_assert (!compute_in_loop);
4700 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
4701
4702
4703 /* 5. Create msq = phi <msq_init, lsq> in loop */
4704
4705 pe = loop_preheader_edge (containing_loop);
4706 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4707 msq = make_ssa_name (vec_dest, NULL);
4708 phi_stmt = create_phi_node (msq, containing_loop->header);
9e227d60 4709 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
ebfd146a
IR
4710
4711 return msq;
4712}
4713
4714
0d0293ac 4715/* Function vect_grouped_load_supported.
ebfd146a 4716
e2c83630 4717 Returns TRUE if even and odd permutations are supported,
ebfd146a
IR
4718 and FALSE otherwise. */
4719
4720bool
0d0293ac 4721vect_grouped_load_supported (tree vectype, unsigned HOST_WIDE_INT count)
ebfd146a 4722{
e2c83630 4723 enum machine_mode mode = TYPE_MODE (vectype);
ebfd146a 4724
b602d918
RS
4725 /* vect_permute_load_chain requires the group size to be a power of two. */
4726 if (exact_log2 (count) == -1)
4727 {
73fbfcad 4728 if (dump_enabled_p ())
78c60e3d
SS
4729 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4730 "the size of the group of accesses"
e645e942 4731 " is not a power of 2\n");
b602d918
RS
4732 return false;
4733 }
4734
e2c83630
RH
4735 /* Check that the permutation is supported. */
4736 if (VECTOR_MODE_P (mode))
4737 {
4738 unsigned int i, nelt = GET_MODE_NUNITS (mode);
4739 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
ebfd146a 4740
e2c83630
RH
4741 for (i = 0; i < nelt; i++)
4742 sel[i] = i * 2;
4743 if (can_vec_perm_p (mode, false, sel))
4744 {
4745 for (i = 0; i < nelt; i++)
4746 sel[i] = i * 2 + 1;
4747 if (can_vec_perm_p (mode, false, sel))
4748 return true;
4749 }
4750 }
ebfd146a 4751
73fbfcad 4752 if (dump_enabled_p ())
78c60e3d 4753 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 4754 "extract even/odd not supported by target\n");
a6b3dfde 4755 return false;
ebfd146a
IR
4756}
4757
272c6793
RS
4758/* Return TRUE if vec_load_lanes is available for COUNT vectors of
4759 type VECTYPE. */
4760
4761bool
4762vect_load_lanes_supported (tree vectype, unsigned HOST_WIDE_INT count)
4763{
4764 return vect_lanes_optab_supported_p ("vec_load_lanes",
4765 vec_load_lanes_optab,
4766 vectype, count);
4767}
ebfd146a
IR
4768
4769/* Function vect_permute_load_chain.
4770
4771 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
b8698a0f 4772 a power of 2, generate extract_even/odd stmts to reorder the input data
ff802fa1 4773 correctly. Return the final references for loads in RESULT_CHAIN.
ebfd146a
IR
4774
4775 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4776 The input is 4 vectors each containing 8 elements. We assign a number to each
4777 element, the input sequence is:
4778
4779 1st vec: 0 1 2 3 4 5 6 7
4780 2nd vec: 8 9 10 11 12 13 14 15
b8698a0f 4781 3rd vec: 16 17 18 19 20 21 22 23
ebfd146a
IR
4782 4th vec: 24 25 26 27 28 29 30 31
4783
4784 The output sequence should be:
4785
4786 1st vec: 0 4 8 12 16 20 24 28
4787 2nd vec: 1 5 9 13 17 21 25 29
b8698a0f 4788 3rd vec: 2 6 10 14 18 22 26 30
ebfd146a
IR
4789 4th vec: 3 7 11 15 19 23 27 31
4790
4791 i.e., the first output vector should contain the first elements of each
4792 interleaving group, etc.
4793
ff802fa1
IR
4794 We use extract_even/odd instructions to create such output. The input of
4795 each extract_even/odd operation is two vectors
b8698a0f
L
4796 1st vec 2nd vec
4797 0 1 2 3 4 5 6 7
ebfd146a 4798
ff802fa1 4799 and the output is the vector of extracted even/odd elements. The output of
ebfd146a
IR
4800 extract_even will be: 0 2 4 6
4801 and of extract_odd: 1 3 5 7
4802
b8698a0f 4803
ff802fa1
IR
4804 The permutation is done in log LENGTH stages. In each stage extract_even
4805 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4806 their order. In our example,
ebfd146a
IR
4807
4808 E1: extract_even (1st vec, 2nd vec)
4809 E2: extract_odd (1st vec, 2nd vec)
4810 E3: extract_even (3rd vec, 4th vec)
4811 E4: extract_odd (3rd vec, 4th vec)
4812
4813 The output for the first stage will be:
4814
4815 E1: 0 2 4 6 8 10 12 14
4816 E2: 1 3 5 7 9 11 13 15
b8698a0f 4817 E3: 16 18 20 22 24 26 28 30
ebfd146a
IR
4818 E4: 17 19 21 23 25 27 29 31
4819
4820 In order to proceed and create the correct sequence for the next stage (or
b8698a0f
L
4821 for the correct output, if the second stage is the last one, as in our
4822 example), we first put the output of extract_even operation and then the
ebfd146a
IR
4823 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4824 The input for the second stage is:
4825
4826 1st vec (E1): 0 2 4 6 8 10 12 14
b8698a0f
L
4827 2nd vec (E3): 16 18 20 22 24 26 28 30
4828 3rd vec (E2): 1 3 5 7 9 11 13 15
ebfd146a
IR
4829 4th vec (E4): 17 19 21 23 25 27 29 31
4830
4831 The output of the second stage:
4832
4833 E1: 0 4 8 12 16 20 24 28
4834 E2: 2 6 10 14 18 22 26 30
4835 E3: 1 5 9 13 17 21 25 29
4836 E4: 3 7 11 15 19 23 27 31
4837
4838 And RESULT_CHAIN after reordering:
4839
4840 1st vec (E1): 0 4 8 12 16 20 24 28
4841 2nd vec (E3): 1 5 9 13 17 21 25 29
b8698a0f 4842 3rd vec (E2): 2 6 10 14 18 22 26 30
ebfd146a
IR
4843 4th vec (E4): 3 7 11 15 19 23 27 31. */
4844
b602d918 4845static void
9771b263 4846vect_permute_load_chain (vec<tree> dr_chain,
b8698a0f 4847 unsigned int length,
ebfd146a
IR
4848 gimple stmt,
4849 gimple_stmt_iterator *gsi,
9771b263 4850 vec<tree> *result_chain)
ebfd146a 4851{
83d5977e 4852 tree data_ref, first_vect, second_vect;
e2c83630 4853 tree perm_mask_even, perm_mask_odd;
ebfd146a
IR
4854 gimple perm_stmt;
4855 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
e2c83630
RH
4856 unsigned int i, j, log_length = exact_log2 (length);
4857 unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype);
4858 unsigned char *sel = XALLOCAVEC (unsigned char, nelt);
ebfd146a 4859
3f292312
JJ
4860 result_chain->quick_grow (length);
4861 memcpy (result_chain->address (), dr_chain.address (),
4862 length * sizeof (tree));
e2c83630
RH
4863
4864 for (i = 0; i < nelt; ++i)
4865 sel[i] = i * 2;
4866 perm_mask_even = vect_gen_perm_mask (vectype, sel);
4867 gcc_assert (perm_mask_even != NULL);
4868
4869 for (i = 0; i < nelt; ++i)
4870 sel[i] = i * 2 + 1;
4871 perm_mask_odd = vect_gen_perm_mask (vectype, sel);
4872 gcc_assert (perm_mask_odd != NULL);
4873
4874 for (i = 0; i < log_length; i++)
ebfd146a 4875 {
e2c83630 4876 for (j = 0; j < length; j += 2)
ebfd146a 4877 {
9771b263
DN
4878 first_vect = dr_chain[j];
4879 second_vect = dr_chain[j+1];
ebfd146a
IR
4880
4881 /* data_ref = permute_even (first_data_ref, second_data_ref); */
83d5977e 4882 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_even");
73804b12
RG
4883 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4884 first_vect, second_vect,
4885 perm_mask_even);
ebfd146a 4886 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
9771b263 4887 (*result_chain)[j/2] = data_ref;
b8698a0f 4888
ebfd146a 4889 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
83d5977e 4890 data_ref = make_temp_ssa_name (vectype, NULL, "vect_perm_odd");
73804b12
RG
4891 perm_stmt = gimple_build_assign_with_ops (VEC_PERM_EXPR, data_ref,
4892 first_vect, second_vect,
4893 perm_mask_odd);
ebfd146a 4894 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
9771b263 4895 (*result_chain)[j/2+length/2] = data_ref;
ebfd146a 4896 }
3f292312
JJ
4897 memcpy (dr_chain.address (), result_chain->address (),
4898 length * sizeof (tree));
ebfd146a 4899 }
ebfd146a
IR
4900}
4901
4902
0d0293ac 4903/* Function vect_transform_grouped_load.
ebfd146a
IR
4904
4905 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
4906 to perform their permutation and ascribe the result vectorized statements to
4907 the scalar statements.
4908*/
4909
b602d918 4910void
9771b263 4911vect_transform_grouped_load (gimple stmt, vec<tree> dr_chain, int size,
ebfd146a
IR
4912 gimple_stmt_iterator *gsi)
4913{
6e1aa848 4914 vec<tree> result_chain = vNULL;
ebfd146a 4915
b8698a0f
L
4916 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
4917 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
ebfd146a 4918 vectors, that are ready for vector computation. */
9771b263 4919 result_chain.create (size);
b602d918 4920 vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain);
0d0293ac 4921 vect_record_grouped_load_vectors (stmt, result_chain);
9771b263 4922 result_chain.release ();
272c6793
RS
4923}
4924
0d0293ac 4925/* RESULT_CHAIN contains the output of a group of grouped loads that were
272c6793
RS
4926 generated as part of the vectorization of STMT. Assign the statement
4927 for each vector to the associated scalar statement. */
4928
4929void
9771b263 4930vect_record_grouped_load_vectors (gimple stmt, vec<tree> result_chain)
272c6793 4931{
e14c1050 4932 gimple first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
272c6793
RS
4933 gimple next_stmt, new_stmt;
4934 unsigned int i, gap_count;
4935 tree tmp_data_ref;
ebfd146a 4936
b8698a0f
L
4937 /* Put a permuted data-ref in the VECTORIZED_STMT field.
4938 Since we scan the chain starting from it's first node, their order
ebfd146a
IR
4939 corresponds the order of data-refs in RESULT_CHAIN. */
4940 next_stmt = first_stmt;
4941 gap_count = 1;
9771b263 4942 FOR_EACH_VEC_ELT (result_chain, i, tmp_data_ref)
ebfd146a
IR
4943 {
4944 if (!next_stmt)
4945 break;
4946
ff802fa1
IR
4947 /* Skip the gaps. Loads created for the gaps will be removed by dead
4948 code elimination pass later. No need to check for the first stmt in
ebfd146a 4949 the group, since it always exists.
e14c1050
IR
4950 GROUP_GAP is the number of steps in elements from the previous
4951 access (if there is no gap GROUP_GAP is 1). We skip loads that
ff802fa1 4952 correspond to the gaps. */
b8698a0f 4953 if (next_stmt != first_stmt
e14c1050 4954 && gap_count < GROUP_GAP (vinfo_for_stmt (next_stmt)))
ebfd146a
IR
4955 {
4956 gap_count++;
4957 continue;
4958 }
4959
4960 while (next_stmt)
4961 {
4962 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
4963 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
4964 copies, and we put the new vector statement in the first available
4965 RELATED_STMT. */
4966 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
4967 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
4968 else
4969 {
e14c1050 4970 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
ebfd146a
IR
4971 {
4972 gimple prev_stmt =
4973 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
4974 gimple rel_stmt =
4975 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
4976 while (rel_stmt)
4977 {
4978 prev_stmt = rel_stmt;
b8698a0f 4979 rel_stmt =
ebfd146a
IR
4980 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
4981 }
4982
b8698a0f 4983 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
ebfd146a
IR
4984 new_stmt;
4985 }
4986 }
4987
e14c1050 4988 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
ebfd146a
IR
4989 gap_count = 1;
4990 /* If NEXT_STMT accesses the same DR as the previous statement,
4991 put the same TMP_DATA_REF as its vectorized statement; otherwise
4992 get the next data-ref from RESULT_CHAIN. */
e14c1050 4993 if (!next_stmt || !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
ebfd146a
IR
4994 break;
4995 }
4996 }
ebfd146a
IR
4997}
4998
4999/* Function vect_force_dr_alignment_p.
5000
5001 Returns whether the alignment of a DECL can be forced to be aligned
5002 on ALIGNMENT bit boundary. */
5003
b8698a0f 5004bool
ebfd146a
IR
5005vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
5006{
5007 if (TREE_CODE (decl) != VAR_DECL)
5008 return false;
5009
c205d0b3
RG
5010 /* We cannot change alignment of common or external symbols as another
5011 translation unit may contain a definition with lower alignment.
5012 The rules of common symbol linking mean that the definition
010403d1
RB
5013 will override the common symbol. The same is true for constant
5014 pool entries which may be shared and are not properly merged
5015 by LTO. */
c205d0b3 5016 if (DECL_EXTERNAL (decl)
010403d1
RB
5017 || DECL_COMMON (decl)
5018 || DECL_IN_CONSTANT_POOL (decl))
ebfd146a
IR
5019 return false;
5020
5021 if (TREE_ASM_WRITTEN (decl))
5022 return false;
5023
f89dcfd8
RG
5024 /* Do not override the alignment as specified by the ABI when the used
5025 attribute is set. */
5026 if (DECL_PRESERVE_P (decl))
af4d0d91
RG
5027 return false;
5028
79e02217
JJ
5029 /* Do not override explicit alignment set by the user when an explicit
5030 section name is also used. This is a common idiom used by many
5031 software projects. */
5032 if (DECL_SECTION_NAME (decl) != NULL_TREE
5033 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl))
5034 return false;
5035
ebfd146a
IR
5036 if (TREE_STATIC (decl))
5037 return (alignment <= MAX_OFILE_ALIGNMENT);
5038 else
5039 return (alignment <= MAX_STACK_ALIGNMENT);
5040}
5041
ebfd146a 5042
720f5239
IR
5043/* Return whether the data reference DR is supported with respect to its
5044 alignment.
5045 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
5046 it is aligned, i.e., check if it is possible to vectorize it with different
ebfd146a
IR
5047 alignment. */
5048
5049enum dr_alignment_support
720f5239
IR
5050vect_supportable_dr_alignment (struct data_reference *dr,
5051 bool check_aligned_accesses)
ebfd146a
IR
5052{
5053 gimple stmt = DR_STMT (dr);
5054 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5055 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
81f40b79 5056 enum machine_mode mode = TYPE_MODE (vectype);
a70d6342
IR
5057 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5058 struct loop *vect_loop = NULL;
5059 bool nested_in_vect_loop = false;
ebfd146a 5060
720f5239 5061 if (aligned_access_p (dr) && !check_aligned_accesses)
ebfd146a
IR
5062 return dr_aligned;
5063
69f11a13
IR
5064 if (loop_vinfo)
5065 {
5066 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
5067 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
5068 }
a70d6342 5069
ebfd146a
IR
5070 /* Possibly unaligned access. */
5071
5072 /* We can choose between using the implicit realignment scheme (generating
5073 a misaligned_move stmt) and the explicit realignment scheme (generating
ff802fa1
IR
5074 aligned loads with a REALIGN_LOAD). There are two variants to the
5075 explicit realignment scheme: optimized, and unoptimized.
ebfd146a
IR
5076 We can optimize the realignment only if the step between consecutive
5077 vector loads is equal to the vector size. Since the vector memory
5078 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
5079 is guaranteed that the misalignment amount remains the same throughout the
5080 execution of the vectorized loop. Therefore, we can create the
5081 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
5082 at the loop preheader.
5083
5084 However, in the case of outer-loop vectorization, when vectorizing a
5085 memory access in the inner-loop nested within the LOOP that is now being
5086 vectorized, while it is guaranteed that the misalignment of the
5087 vectorized memory access will remain the same in different outer-loop
5088 iterations, it is *not* guaranteed that is will remain the same throughout
5089 the execution of the inner-loop. This is because the inner-loop advances
5090 with the original scalar step (and not in steps of VS). If the inner-loop
5091 step happens to be a multiple of VS, then the misalignment remains fixed
5092 and we can use the optimized realignment scheme. For example:
5093
5094 for (i=0; i<N; i++)
5095 for (j=0; j<M; j++)
5096 s += a[i+j];
5097
5098 When vectorizing the i-loop in the above example, the step between
5099 consecutive vector loads is 1, and so the misalignment does not remain
5100 fixed across the execution of the inner-loop, and the realignment cannot
5101 be optimized (as illustrated in the following pseudo vectorized loop):
5102
5103 for (i=0; i<N; i+=4)
5104 for (j=0; j<M; j++){
5105 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
5106 // when j is {0,1,2,3,4,5,6,7,...} respectively.
5107 // (assuming that we start from an aligned address).
5108 }
5109
5110 We therefore have to use the unoptimized realignment scheme:
5111
5112 for (i=0; i<N; i+=4)
5113 for (j=k; j<M; j+=4)
5114 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
5115 // that the misalignment of the initial address is
5116 // 0).
5117
5118 The loop can then be vectorized as follows:
5119
5120 for (k=0; k<4; k++){
5121 rt = get_realignment_token (&vp[k]);
5122 for (i=0; i<N; i+=4){
5123 v1 = vp[i+k];
5124 for (j=k; j<M; j+=4){
5125 v2 = vp[i+j+VS-1];
5126 va = REALIGN_LOAD <v1,v2,rt>;
5127 vs += va;
5128 v1 = v2;
5129 }
5130 }
5131 } */
5132
5133 if (DR_IS_READ (dr))
5134 {
0601d0cf
RE
5135 bool is_packed = false;
5136 tree type = (TREE_TYPE (DR_REF (dr)));
5137
947131ba 5138 if (optab_handler (vec_realign_load_optab, mode) != CODE_FOR_nothing
ebfd146a
IR
5139 && (!targetm.vectorize.builtin_mask_for_load
5140 || targetm.vectorize.builtin_mask_for_load ()))
5141 {
5142 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
69f11a13
IR
5143 if ((nested_in_vect_loop
5144 && (TREE_INT_CST_LOW (DR_STEP (dr))
5145 != GET_MODE_SIZE (TYPE_MODE (vectype))))
5146 || !loop_vinfo)
ebfd146a
IR
5147 return dr_explicit_realign;
5148 else
5149 return dr_explicit_realign_optimized;
5150 }
0601d0cf 5151 if (!known_alignment_for_access_p (dr))
4c9bcf89 5152 is_packed = not_size_aligned (DR_REF (dr));
b8698a0f 5153
afb119be
RB
5154 if ((TYPE_USER_ALIGN (type) && !is_packed)
5155 || targetm.vectorize.
5156 support_vector_misalignment (mode, type,
5157 DR_MISALIGNMENT (dr), is_packed))
ebfd146a
IR
5158 /* Can't software pipeline the loads, but can at least do them. */
5159 return dr_unaligned_supported;
5160 }
0601d0cf
RE
5161 else
5162 {
5163 bool is_packed = false;
5164 tree type = (TREE_TYPE (DR_REF (dr)));
ebfd146a 5165
0601d0cf 5166 if (!known_alignment_for_access_p (dr))
4c9bcf89 5167 is_packed = not_size_aligned (DR_REF (dr));
b8698a0f 5168
afb119be
RB
5169 if ((TYPE_USER_ALIGN (type) && !is_packed)
5170 || targetm.vectorize.
5171 support_vector_misalignment (mode, type,
5172 DR_MISALIGNMENT (dr), is_packed))
0601d0cf
RE
5173 return dr_unaligned_supported;
5174 }
b8698a0f 5175
ebfd146a
IR
5176 /* Unsupported. */
5177 return dr_unaligned_unsupported;
5178}