1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
28 #include "stor-layout.h"
31 #include "basic-block.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-ssa-alias.h"
34 #include "internal-fn.h"
36 #include "gimple-expr.h"
40 #include "gimple-iterator.h"
41 #include "gimplify-me.h"
42 #include "gimple-ssa.h"
43 #include "tree-phinodes.h"
44 #include "ssa-iterators.h"
45 #include "stringpool.h"
46 #include "tree-ssanames.h"
47 #include "tree-ssa-loop-ivopts.h"
48 #include "tree-ssa-loop-manip.h"
49 #include "tree-ssa-loop.h"
52 #include "tree-chrec.h"
53 #include "tree-scalar-evolution.h"
54 #include "tree-vectorizer.h"
55 #include "diagnostic-core.h"
57 /* Need to include rtl.h, expr.h, etc. for optabs. */
62 /* Return true if load- or store-lanes optab OPTAB is implemented for
63 COUNT vectors of type VECTYPE. NAME is the name of OPTAB. */
66 vect_lanes_optab_supported_p (const char *name
, convert_optab optab
,
67 tree vectype
, unsigned HOST_WIDE_INT count
)
69 enum machine_mode mode
, array_mode
;
72 mode
= TYPE_MODE (vectype
);
73 limit_p
= !targetm
.array_mode_supported_p (mode
, count
);
74 array_mode
= mode_for_size (count
* GET_MODE_BITSIZE (mode
),
77 if (array_mode
== BLKmode
)
79 if (dump_enabled_p ())
80 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
81 "no array mode for %s[" HOST_WIDE_INT_PRINT_DEC
"]\n",
82 GET_MODE_NAME (mode
), count
);
86 if (convert_optab_handler (optab
, array_mode
, mode
) == CODE_FOR_nothing
)
88 if (dump_enabled_p ())
89 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
90 "cannot use %s<%s><%s>\n", name
,
91 GET_MODE_NAME (array_mode
), GET_MODE_NAME (mode
));
95 if (dump_enabled_p ())
96 dump_printf_loc (MSG_NOTE
, vect_location
,
97 "can use %s<%s><%s>\n", name
, GET_MODE_NAME (array_mode
),
98 GET_MODE_NAME (mode
));
104 /* Return the smallest scalar part of STMT.
105 This is used to determine the vectype of the stmt. We generally set the
106 vectype according to the type of the result (lhs). For stmts whose
107 result-type is different than the type of the arguments (e.g., demotion,
108 promotion), vectype will be reset appropriately (later). Note that we have
109 to visit the smallest datatype in this function, because that determines the
110 VF. If the smallest datatype in the loop is present only as the rhs of a
111 promotion operation - we'd miss it.
112 Such a case, where a variable of this datatype does not appear in the lhs
113 anywhere in the loop, can only occur if it's an invariant: e.g.:
114 'int_x = (int) short_inv', which we'd expect to have been optimized away by
115 invariant motion. However, we cannot rely on invariant motion to always
116 take invariants out of the loop, and so in the case of promotion we also
117 have to check the rhs.
118 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
122 vect_get_smallest_scalar_type (gimple stmt
, HOST_WIDE_INT
*lhs_size_unit
,
123 HOST_WIDE_INT
*rhs_size_unit
)
125 tree scalar_type
= gimple_expr_type (stmt
);
126 HOST_WIDE_INT lhs
, rhs
;
128 lhs
= rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
130 if (is_gimple_assign (stmt
)
131 && (gimple_assign_cast_p (stmt
)
132 || gimple_assign_rhs_code (stmt
) == WIDEN_MULT_EXPR
133 || gimple_assign_rhs_code (stmt
) == WIDEN_LSHIFT_EXPR
134 || gimple_assign_rhs_code (stmt
) == FLOAT_EXPR
))
136 tree rhs_type
= TREE_TYPE (gimple_assign_rhs1 (stmt
));
138 rhs
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type
));
140 scalar_type
= rhs_type
;
143 *lhs_size_unit
= lhs
;
144 *rhs_size_unit
= rhs
;
149 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
150 tested at run-time. Return TRUE if DDR was successfully inserted.
151 Return false if versioning is not supported. */
154 vect_mark_for_runtime_alias_test (ddr_p ddr
, loop_vec_info loop_vinfo
)
156 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
158 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
) == 0)
161 if (dump_enabled_p ())
163 dump_printf_loc (MSG_NOTE
, vect_location
,
164 "mark for run-time aliasing test between ");
165 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_A (ddr
)));
166 dump_printf (MSG_NOTE
, " and ");
167 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (DDR_B (ddr
)));
168 dump_printf (MSG_NOTE
, "\n");
171 if (optimize_loop_nest_for_size_p (loop
))
173 if (dump_enabled_p ())
174 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
175 "versioning not supported when optimizing"
180 /* FORNOW: We don't support versioning with outer-loop vectorization. */
183 if (dump_enabled_p ())
184 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
185 "versioning not yet supported for outer-loops.\n");
189 /* FORNOW: We don't support creating runtime alias tests for non-constant
191 if (TREE_CODE (DR_STEP (DDR_A (ddr
))) != INTEGER_CST
192 || TREE_CODE (DR_STEP (DDR_B (ddr
))) != INTEGER_CST
)
194 if (dump_enabled_p ())
195 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
196 "versioning not yet supported for non-constant "
201 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
).safe_push (ddr
);
206 /* Function vect_analyze_data_ref_dependence.
208 Return TRUE if there (might) exist a dependence between a memory-reference
209 DRA and a memory-reference DRB. When versioning for alias may check a
210 dependence at run-time, return FALSE. Adjust *MAX_VF according to
211 the data dependence. */
214 vect_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
,
215 loop_vec_info loop_vinfo
, int *max_vf
)
218 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
219 struct data_reference
*dra
= DDR_A (ddr
);
220 struct data_reference
*drb
= DDR_B (ddr
);
221 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
222 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
223 lambda_vector dist_v
;
224 unsigned int loop_depth
;
226 /* In loop analysis all data references should be vectorizable. */
227 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a
)
228 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b
))
231 /* Independent data accesses. */
232 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
236 || (DR_IS_READ (dra
) && DR_IS_READ (drb
)))
239 /* Even if we have an anti-dependence then, as the vectorized loop covers at
240 least two scalar iterations, there is always also a true dependence.
241 As the vectorizer does not re-order loads and stores we can ignore
242 the anti-dependence if TBAA can disambiguate both DRs similar to the
243 case with known negative distance anti-dependences (positive
244 distance anti-dependences would violate TBAA constraints). */
245 if (((DR_IS_READ (dra
) && DR_IS_WRITE (drb
))
246 || (DR_IS_WRITE (dra
) && DR_IS_READ (drb
)))
247 && !alias_sets_conflict_p (get_alias_set (DR_REF (dra
)),
248 get_alias_set (DR_REF (drb
))))
251 /* Unknown data dependence. */
252 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
254 /* If user asserted safelen consecutive iterations can be
255 executed concurrently, assume independence. */
256 if (loop
->safelen
>= 2)
258 if (loop
->safelen
< *max_vf
)
259 *max_vf
= loop
->safelen
;
260 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
) = false;
264 if (STMT_VINFO_GATHER_P (stmtinfo_a
)
265 || STMT_VINFO_GATHER_P (stmtinfo_b
))
267 if (dump_enabled_p ())
269 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
270 "versioning for alias not supported for: "
271 "can't determine dependence between ");
272 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
274 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
275 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
277 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
282 if (dump_enabled_p ())
284 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
285 "versioning for alias required: "
286 "can't determine dependence between ");
287 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
289 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
290 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
292 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
295 /* Add to list of ddrs that need to be tested at run-time. */
296 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
299 /* Known data dependence. */
300 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
302 /* If user asserted safelen consecutive iterations can be
303 executed concurrently, assume independence. */
304 if (loop
->safelen
>= 2)
306 if (loop
->safelen
< *max_vf
)
307 *max_vf
= loop
->safelen
;
308 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
) = false;
312 if (STMT_VINFO_GATHER_P (stmtinfo_a
)
313 || STMT_VINFO_GATHER_P (stmtinfo_b
))
315 if (dump_enabled_p ())
317 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
318 "versioning for alias not supported for: "
319 "bad dist vector for ");
320 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
322 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
323 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
325 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
330 if (dump_enabled_p ())
332 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
333 "versioning for alias required: "
334 "bad dist vector for ");
335 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dra
));
336 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
337 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (drb
));
338 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
340 /* Add to list of ddrs that need to be tested at run-time. */
341 return !vect_mark_for_runtime_alias_test (ddr
, loop_vinfo
);
344 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
345 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
347 int dist
= dist_v
[loop_depth
];
349 if (dump_enabled_p ())
350 dump_printf_loc (MSG_NOTE
, vect_location
,
351 "dependence distance = %d.\n", dist
);
355 if (dump_enabled_p ())
357 dump_printf_loc (MSG_NOTE
, vect_location
,
358 "dependence distance == 0 between ");
359 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
360 dump_printf (MSG_NOTE
, " and ");
361 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
362 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
365 /* When we perform grouped accesses and perform implicit CSE
366 by detecting equal accesses and doing disambiguation with
367 runtime alias tests like for
375 where we will end up loading { a[i], a[i+1] } once, make
376 sure that inserting group loads before the first load and
377 stores after the last store will do the right thing. */
378 if ((STMT_VINFO_GROUPED_ACCESS (stmtinfo_a
)
379 && GROUP_SAME_DR_STMT (stmtinfo_a
))
380 || (STMT_VINFO_GROUPED_ACCESS (stmtinfo_b
)
381 && GROUP_SAME_DR_STMT (stmtinfo_b
)))
384 earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
386 (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
388 if (dump_enabled_p ())
389 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
390 "READ_WRITE dependence in interleaving."
399 if (dist
> 0 && DDR_REVERSED_P (ddr
))
401 /* If DDR_REVERSED_P the order of the data-refs in DDR was
402 reversed (to make distance vector positive), and the actual
403 distance is negative. */
404 if (dump_enabled_p ())
405 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
406 "dependence distance negative.\n");
407 /* Record a negative dependence distance to later limit the
408 amount of stmt copying / unrolling we can perform.
409 Only need to handle read-after-write dependence. */
411 && (STMT_VINFO_MIN_NEG_DIST (stmtinfo_b
) == 0
412 || STMT_VINFO_MIN_NEG_DIST (stmtinfo_b
) > (unsigned)dist
))
413 STMT_VINFO_MIN_NEG_DIST (stmtinfo_b
) = dist
;
418 && abs (dist
) < *max_vf
)
420 /* The dependence distance requires reduction of the maximal
421 vectorization factor. */
422 *max_vf
= abs (dist
);
423 if (dump_enabled_p ())
424 dump_printf_loc (MSG_NOTE
, vect_location
,
425 "adjusting maximal vectorization factor to %i\n",
429 if (abs (dist
) >= *max_vf
)
431 /* Dependence distance does not create dependence, as far as
432 vectorization is concerned, in this case. */
433 if (dump_enabled_p ())
434 dump_printf_loc (MSG_NOTE
, vect_location
,
435 "dependence distance >= VF.\n");
439 if (dump_enabled_p ())
441 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
442 "not vectorized, possible dependence "
443 "between data-refs ");
444 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
445 dump_printf (MSG_NOTE
, " and ");
446 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
447 dump_printf (MSG_NOTE
, "\n");
456 /* Function vect_analyze_data_ref_dependences.
458 Examine all the data references in the loop, and make sure there do not
459 exist any data dependences between them. Set *MAX_VF according to
460 the maximum vectorization factor the data dependences allow. */
463 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo
, int *max_vf
)
466 struct data_dependence_relation
*ddr
;
468 if (dump_enabled_p ())
469 dump_printf_loc (MSG_NOTE
, vect_location
,
470 "=== vect_analyze_data_ref_dependences ===\n");
472 LOOP_VINFO_NO_DATA_DEPENDENCIES (loop_vinfo
) = true;
473 if (!compute_all_dependences (LOOP_VINFO_DATAREFS (loop_vinfo
),
474 &LOOP_VINFO_DDRS (loop_vinfo
),
475 LOOP_VINFO_LOOP_NEST (loop_vinfo
), true))
478 FOR_EACH_VEC_ELT (LOOP_VINFO_DDRS (loop_vinfo
), i
, ddr
)
479 if (vect_analyze_data_ref_dependence (ddr
, loop_vinfo
, max_vf
))
486 /* Function vect_slp_analyze_data_ref_dependence.
488 Return TRUE if there (might) exist a dependence between a memory-reference
489 DRA and a memory-reference DRB. When versioning for alias may check a
490 dependence at run-time, return FALSE. Adjust *MAX_VF according to
491 the data dependence. */
494 vect_slp_analyze_data_ref_dependence (struct data_dependence_relation
*ddr
)
496 struct data_reference
*dra
= DDR_A (ddr
);
497 struct data_reference
*drb
= DDR_B (ddr
);
499 /* We need to check dependences of statements marked as unvectorizable
500 as well, they still can prohibit vectorization. */
502 /* Independent data accesses. */
503 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
509 /* Read-read is OK. */
510 if (DR_IS_READ (dra
) && DR_IS_READ (drb
))
513 /* If dra and drb are part of the same interleaving chain consider
515 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (DR_STMT (dra
)))
516 && (GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (dra
)))
517 == GROUP_FIRST_ELEMENT (vinfo_for_stmt (DR_STMT (drb
)))))
520 /* Unknown data dependence. */
521 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
523 if (dump_enabled_p ())
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
526 "can't determine dependence between ");
527 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (dra
));
528 dump_printf (MSG_MISSED_OPTIMIZATION
, " and ");
529 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, DR_REF (drb
));
530 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
533 else if (dump_enabled_p ())
535 dump_printf_loc (MSG_NOTE
, vect_location
,
536 "determined dependence between ");
537 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
538 dump_printf (MSG_NOTE
, " and ");
539 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
540 dump_printf (MSG_NOTE
, "\n");
543 /* We do not vectorize basic blocks with write-write dependencies. */
544 if (DR_IS_WRITE (dra
) && DR_IS_WRITE (drb
))
547 /* If we have a read-write dependence check that the load is before the store.
548 When we vectorize basic blocks, vector load can be only before
549 corresponding scalar load, and vector store can be only after its
550 corresponding scalar store. So the order of the acceses is preserved in
551 case the load is before the store. */
552 gimple earlier_stmt
= get_earlier_stmt (DR_STMT (dra
), DR_STMT (drb
));
553 if (DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (earlier_stmt
))))
555 /* That only holds for load-store pairs taking part in vectorization. */
556 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dra
)))
557 && STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (drb
))))
565 /* Function vect_analyze_data_ref_dependences.
567 Examine all the data references in the basic-block, and make sure there
568 do not exist any data dependences between them. Set *MAX_VF according to
569 the maximum vectorization factor the data dependences allow. */
572 vect_slp_analyze_data_ref_dependences (bb_vec_info bb_vinfo
)
574 struct data_dependence_relation
*ddr
;
577 if (dump_enabled_p ())
578 dump_printf_loc (MSG_NOTE
, vect_location
,
579 "=== vect_slp_analyze_data_ref_dependences ===\n");
581 if (!compute_all_dependences (BB_VINFO_DATAREFS (bb_vinfo
),
582 &BB_VINFO_DDRS (bb_vinfo
),
586 FOR_EACH_VEC_ELT (BB_VINFO_DDRS (bb_vinfo
), i
, ddr
)
587 if (vect_slp_analyze_data_ref_dependence (ddr
))
594 /* Function vect_compute_data_ref_alignment
596 Compute the misalignment of the data reference DR.
599 1. If during the misalignment computation it is found that the data reference
600 cannot be vectorized then false is returned.
601 2. DR_MISALIGNMENT (DR) is defined.
603 FOR NOW: No analysis is actually performed. Misalignment is calculated
604 only for trivial cases. TODO. */
607 vect_compute_data_ref_alignment (struct data_reference
*dr
)
609 gimple stmt
= DR_STMT (dr
);
610 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
611 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
612 struct loop
*loop
= NULL
;
613 tree ref
= DR_REF (dr
);
615 tree base
, base_addr
;
618 tree aligned_to
, alignment
;
620 if (dump_enabled_p ())
621 dump_printf_loc (MSG_NOTE
, vect_location
,
622 "vect_compute_data_ref_alignment:\n");
625 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
627 /* Initialize misalignment to unknown. */
628 SET_DR_MISALIGNMENT (dr
, -1);
630 /* Strided loads perform only component accesses, misalignment information
631 is irrelevant for them. */
632 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
635 misalign
= DR_INIT (dr
);
636 aligned_to
= DR_ALIGNED_TO (dr
);
637 base_addr
= DR_BASE_ADDRESS (dr
);
638 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
640 /* In case the dataref is in an inner-loop of the loop that is being
641 vectorized (LOOP), we use the base and misalignment information
642 relative to the outer-loop (LOOP). This is ok only if the misalignment
643 stays the same throughout the execution of the inner-loop, which is why
644 we have to check that the stride of the dataref in the inner-loop evenly
645 divides by the vector size. */
646 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
648 tree step
= DR_STEP (dr
);
649 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
651 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) == 0)
653 if (dump_enabled_p ())
654 dump_printf_loc (MSG_NOTE
, vect_location
,
655 "inner step divides the vector-size.\n");
656 misalign
= STMT_VINFO_DR_INIT (stmt_info
);
657 aligned_to
= STMT_VINFO_DR_ALIGNED_TO (stmt_info
);
658 base_addr
= STMT_VINFO_DR_BASE_ADDRESS (stmt_info
);
662 if (dump_enabled_p ())
663 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
664 "inner step doesn't divide the vector-size.\n");
665 misalign
= NULL_TREE
;
669 /* Similarly, if we're doing basic-block vectorization, we can only use
670 base and misalignment information relative to an innermost loop if the
671 misalignment stays the same throughout the execution of the loop.
672 As above, this is the case if the stride of the dataref evenly divides
673 by the vector size. */
676 tree step
= DR_STEP (dr
);
677 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
679 if (dr_step
% GET_MODE_SIZE (TYPE_MODE (vectype
)) != 0)
681 if (dump_enabled_p ())
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
683 "SLP: step doesn't divide the vector-size.\n");
684 misalign
= NULL_TREE
;
688 base
= build_fold_indirect_ref (base_addr
);
689 alignment
= ssize_int (TYPE_ALIGN (vectype
)/BITS_PER_UNIT
);
691 if ((aligned_to
&& tree_int_cst_compare (aligned_to
, alignment
) < 0)
694 if (dump_enabled_p ())
696 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
697 "Unknown alignment for access: ");
698 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, base
);
699 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
705 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base
)),
707 || (TREE_CODE (base_addr
) == SSA_NAME
708 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
709 TREE_TYPE (base_addr
)))),
711 || (get_pointer_alignment (base_addr
) >= TYPE_ALIGN (vectype
)))
714 base_aligned
= false;
718 /* Do not change the alignment of global variables here if
719 flag_section_anchors is enabled as we already generated
720 RTL for other functions. Most global variables should
721 have been aligned during the IPA increase_alignment pass. */
722 if (!vect_can_force_dr_alignment_p (base
, TYPE_ALIGN (vectype
))
723 || (TREE_STATIC (base
) && flag_section_anchors
))
725 if (dump_enabled_p ())
727 dump_printf_loc (MSG_NOTE
, vect_location
,
728 "can't force alignment of ref: ");
729 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, ref
);
730 dump_printf (MSG_NOTE
, "\n");
735 /* Force the alignment of the decl.
736 NOTE: This is the only change to the code we make during
737 the analysis phase, before deciding to vectorize the loop. */
738 if (dump_enabled_p ())
740 dump_printf_loc (MSG_NOTE
, vect_location
, "force alignment of ");
741 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, ref
);
742 dump_printf (MSG_NOTE
, "\n");
745 ((dataref_aux
*)dr
->aux
)->base_decl
= base
;
746 ((dataref_aux
*)dr
->aux
)->base_misaligned
= true;
749 /* If this is a backward running DR then first access in the larger
750 vectype actually is N-1 elements before the address in the DR.
751 Adjust misalign accordingly. */
752 if (tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0)
754 tree offset
= ssize_int (TYPE_VECTOR_SUBPARTS (vectype
) - 1);
755 /* DR_STEP(dr) is the same as -TYPE_SIZE of the scalar type,
756 otherwise we wouldn't be here. */
757 offset
= fold_build2 (MULT_EXPR
, ssizetype
, offset
, DR_STEP (dr
));
758 /* PLUS because DR_STEP was negative. */
759 misalign
= size_binop (PLUS_EXPR
, misalign
, offset
);
762 /* Modulo alignment. */
763 misalign
= size_binop (FLOOR_MOD_EXPR
, misalign
, alignment
);
765 if (!tree_fits_uhwi_p (misalign
))
767 /* Negative or overflowed misalignment value. */
768 if (dump_enabled_p ())
769 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
770 "unexpected misalign value\n");
774 SET_DR_MISALIGNMENT (dr
, tree_to_uhwi (misalign
));
776 if (dump_enabled_p ())
778 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
779 "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr
));
780 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, ref
);
781 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
788 /* Function vect_compute_data_refs_alignment
790 Compute the misalignment of data references in the loop.
791 Return FALSE if a data reference is found that cannot be vectorized. */
794 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo
,
795 bb_vec_info bb_vinfo
)
797 vec
<data_reference_p
> datarefs
;
798 struct data_reference
*dr
;
802 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
804 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
806 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
807 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
808 && !vect_compute_data_ref_alignment (dr
))
812 /* Mark unsupported statement as unvectorizable. */
813 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
824 /* Function vect_update_misalignment_for_peel
826 DR - the data reference whose misalignment is to be adjusted.
827 DR_PEEL - the data reference whose misalignment is being made
828 zero in the vector loop by the peel.
829 NPEEL - the number of iterations in the peel loop if the misalignment
830 of DR_PEEL is known at compile time. */
833 vect_update_misalignment_for_peel (struct data_reference
*dr
,
834 struct data_reference
*dr_peel
, int npeel
)
837 vec
<dr_p
> same_align_drs
;
838 struct data_reference
*current_dr
;
839 int dr_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr
))));
840 int dr_peel_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel
))));
841 stmt_vec_info stmt_info
= vinfo_for_stmt (DR_STMT (dr
));
842 stmt_vec_info peel_stmt_info
= vinfo_for_stmt (DR_STMT (dr_peel
));
844 /* For interleaved data accesses the step in the loop must be multiplied by
845 the size of the interleaving group. */
846 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
847 dr_size
*= GROUP_SIZE (vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info
)));
848 if (STMT_VINFO_GROUPED_ACCESS (peel_stmt_info
))
849 dr_peel_size
*= GROUP_SIZE (peel_stmt_info
);
851 /* It can be assumed that the data refs with the same alignment as dr_peel
852 are aligned in the vector loop. */
854 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel
)));
855 FOR_EACH_VEC_ELT (same_align_drs
, i
, current_dr
)
857 if (current_dr
!= dr
)
859 gcc_assert (DR_MISALIGNMENT (dr
) / dr_size
==
860 DR_MISALIGNMENT (dr_peel
) / dr_peel_size
);
861 SET_DR_MISALIGNMENT (dr
, 0);
865 if (known_alignment_for_access_p (dr
)
866 && known_alignment_for_access_p (dr_peel
))
868 bool negative
= tree_int_cst_compare (DR_STEP (dr
), size_zero_node
) < 0;
869 int misal
= DR_MISALIGNMENT (dr
);
870 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
871 misal
+= negative
? -npeel
* dr_size
: npeel
* dr_size
;
872 misal
&= (TYPE_ALIGN (vectype
) / BITS_PER_UNIT
) - 1;
873 SET_DR_MISALIGNMENT (dr
, misal
);
877 if (dump_enabled_p ())
878 dump_printf_loc (MSG_NOTE
, vect_location
, "Setting misalignment to -1.\n");
879 SET_DR_MISALIGNMENT (dr
, -1);
883 /* Function vect_verify_datarefs_alignment
885 Return TRUE if all data references in the loop can be
886 handled with respect to alignment. */
889 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
891 vec
<data_reference_p
> datarefs
;
892 struct data_reference
*dr
;
893 enum dr_alignment_support supportable_dr_alignment
;
897 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
899 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
901 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
903 gimple stmt
= DR_STMT (dr
);
904 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
906 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
909 /* For interleaving, only the alignment of the first access matters.
910 Skip statements marked as not vectorizable. */
911 if ((STMT_VINFO_GROUPED_ACCESS (stmt_info
)
912 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
913 || !STMT_VINFO_VECTORIZABLE (stmt_info
))
916 /* Strided loads perform only component accesses, alignment is
917 irrelevant for them. */
918 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
921 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
922 if (!supportable_dr_alignment
)
924 if (dump_enabled_p ())
927 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
928 "not vectorized: unsupported unaligned load.");
930 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
931 "not vectorized: unsupported unaligned "
934 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
936 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
940 if (supportable_dr_alignment
!= dr_aligned
&& dump_enabled_p ())
941 dump_printf_loc (MSG_NOTE
, vect_location
,
942 "Vectorizing an unaligned access.\n");
947 /* Given an memory reference EXP return whether its alignment is less
951 not_size_aligned (tree exp
)
953 if (!tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (exp
))))
956 return (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (exp
)))
957 > get_object_alignment (exp
));
960 /* Function vector_alignment_reachable_p
962 Return true if vector alignment for DR is reachable by peeling
963 a few loop iterations. Return false otherwise. */
966 vector_alignment_reachable_p (struct data_reference
*dr
)
968 gimple stmt
= DR_STMT (dr
);
969 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
970 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
972 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
974 /* For interleaved access we peel only if number of iterations in
975 the prolog loop ({VF - misalignment}), is a multiple of the
976 number of the interleaved accesses. */
977 int elem_size
, mis_in_elements
;
978 int nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
980 /* FORNOW: handle only known alignment. */
981 if (!known_alignment_for_access_p (dr
))
984 elem_size
= GET_MODE_SIZE (TYPE_MODE (vectype
)) / nelements
;
985 mis_in_elements
= DR_MISALIGNMENT (dr
) / elem_size
;
987 if ((nelements
- mis_in_elements
) % GROUP_SIZE (stmt_info
))
991 /* If misalignment is known at the compile time then allow peeling
992 only if natural alignment is reachable through peeling. */
993 if (known_alignment_for_access_p (dr
) && !aligned_access_p (dr
))
995 HOST_WIDE_INT elmsize
=
996 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype
)));
997 if (dump_enabled_p ())
999 dump_printf_loc (MSG_NOTE
, vect_location
,
1000 "data size =" HOST_WIDE_INT_PRINT_DEC
, elmsize
);
1001 dump_printf (MSG_NOTE
,
1002 ". misalignment = %d.\n", DR_MISALIGNMENT (dr
));
1004 if (DR_MISALIGNMENT (dr
) % elmsize
)
1006 if (dump_enabled_p ())
1007 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1008 "data size does not divide the misalignment.\n");
1013 if (!known_alignment_for_access_p (dr
))
1015 tree type
= TREE_TYPE (DR_REF (dr
));
1016 bool is_packed
= not_size_aligned (DR_REF (dr
));
1017 if (dump_enabled_p ())
1018 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1019 "Unknown misalignment, is_packed = %d\n",is_packed
);
1020 if ((TYPE_USER_ALIGN (type
) && !is_packed
)
1021 || targetm
.vectorize
.vector_alignment_reachable (type
, is_packed
))
1031 /* Calculate the cost of the memory access represented by DR. */
1034 vect_get_data_access_cost (struct data_reference
*dr
,
1035 unsigned int *inside_cost
,
1036 unsigned int *outside_cost
,
1037 stmt_vector_for_cost
*body_cost_vec
)
1039 gimple stmt
= DR_STMT (dr
);
1040 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1041 int nunits
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
1042 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1043 int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1044 int ncopies
= vf
/ nunits
;
1046 if (DR_IS_READ (dr
))
1047 vect_get_load_cost (dr
, ncopies
, true, inside_cost
, outside_cost
,
1048 NULL
, body_cost_vec
, false);
1050 vect_get_store_cost (dr
, ncopies
, inside_cost
, body_cost_vec
);
1052 if (dump_enabled_p ())
1053 dump_printf_loc (MSG_NOTE
, vect_location
,
1054 "vect_get_data_access_cost: inside_cost = %d, "
1055 "outside_cost = %d.\n", *inside_cost
, *outside_cost
);
1059 /* Insert DR into peeling hash table with NPEEL as key. */
1062 vect_peeling_hash_insert (loop_vec_info loop_vinfo
, struct data_reference
*dr
,
1065 struct _vect_peel_info elem
, *slot
;
1066 _vect_peel_info
**new_slot
;
1067 bool supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1070 slot
= LOOP_VINFO_PEELING_HTAB (loop_vinfo
).find (&elem
);
1075 slot
= XNEW (struct _vect_peel_info
);
1076 slot
->npeel
= npeel
;
1079 new_slot
= LOOP_VINFO_PEELING_HTAB (loop_vinfo
).find_slot (slot
, INSERT
);
1083 if (!supportable_dr_alignment
1084 && unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo
)))
1085 slot
->count
+= VECT_MAX_COST
;
1089 /* Traverse peeling hash table to find peeling option that aligns maximum
1090 number of data accesses. */
1093 vect_peeling_hash_get_most_frequent (_vect_peel_info
**slot
,
1094 _vect_peel_extended_info
*max
)
1096 vect_peel_info elem
= *slot
;
1098 if (elem
->count
> max
->peel_info
.count
1099 || (elem
->count
== max
->peel_info
.count
1100 && max
->peel_info
.npeel
> elem
->npeel
))
1102 max
->peel_info
.npeel
= elem
->npeel
;
1103 max
->peel_info
.count
= elem
->count
;
1104 max
->peel_info
.dr
= elem
->dr
;
1111 /* Traverse peeling hash table and calculate cost for each peeling option.
1112 Find the one with the lowest cost. */
1115 vect_peeling_hash_get_lowest_cost (_vect_peel_info
**slot
,
1116 _vect_peel_extended_info
*min
)
1118 vect_peel_info elem
= *slot
;
1119 int save_misalignment
, dummy
;
1120 unsigned int inside_cost
= 0, outside_cost
= 0, i
;
1121 gimple stmt
= DR_STMT (elem
->dr
);
1122 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1123 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
1124 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1125 struct data_reference
*dr
;
1126 stmt_vector_for_cost prologue_cost_vec
, body_cost_vec
, epilogue_cost_vec
;
1127 int single_iter_cost
;
1129 prologue_cost_vec
.create (2);
1130 body_cost_vec
.create (2);
1131 epilogue_cost_vec
.create (2);
1133 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1135 stmt
= DR_STMT (dr
);
1136 stmt_info
= vinfo_for_stmt (stmt
);
1137 /* For interleaving, only the alignment of the first access
1139 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1140 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1143 save_misalignment
= DR_MISALIGNMENT (dr
);
1144 vect_update_misalignment_for_peel (dr
, elem
->dr
, elem
->npeel
);
1145 vect_get_data_access_cost (dr
, &inside_cost
, &outside_cost
,
1147 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1150 single_iter_cost
= vect_get_single_scalar_iteration_cost (loop_vinfo
);
1151 outside_cost
+= vect_get_known_peeling_cost (loop_vinfo
, elem
->npeel
,
1152 &dummy
, single_iter_cost
,
1154 &epilogue_cost_vec
);
1156 /* Prologue and epilogue costs are added to the target model later.
1157 These costs depend only on the scalar iteration cost, the
1158 number of peeling iterations finally chosen, and the number of
1159 misaligned statements. So discard the information found here. */
1160 prologue_cost_vec
.release ();
1161 epilogue_cost_vec
.release ();
1163 if (inside_cost
< min
->inside_cost
1164 || (inside_cost
== min
->inside_cost
&& outside_cost
< min
->outside_cost
))
1166 min
->inside_cost
= inside_cost
;
1167 min
->outside_cost
= outside_cost
;
1168 min
->body_cost_vec
.release ();
1169 min
->body_cost_vec
= body_cost_vec
;
1170 min
->peel_info
.dr
= elem
->dr
;
1171 min
->peel_info
.npeel
= elem
->npeel
;
1174 body_cost_vec
.release ();
1180 /* Choose best peeling option by traversing peeling hash table and either
1181 choosing an option with the lowest cost (if cost model is enabled) or the
1182 option that aligns as many accesses as possible. */
1184 static struct data_reference
*
1185 vect_peeling_hash_choose_best_peeling (loop_vec_info loop_vinfo
,
1186 unsigned int *npeel
,
1187 stmt_vector_for_cost
*body_cost_vec
)
1189 struct _vect_peel_extended_info res
;
1191 res
.peel_info
.dr
= NULL
;
1192 res
.body_cost_vec
= stmt_vector_for_cost ();
1194 if (!unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo
)))
1196 res
.inside_cost
= INT_MAX
;
1197 res
.outside_cost
= INT_MAX
;
1198 LOOP_VINFO_PEELING_HTAB (loop_vinfo
)
1199 .traverse
<_vect_peel_extended_info
*,
1200 vect_peeling_hash_get_lowest_cost
> (&res
);
1204 res
.peel_info
.count
= 0;
1205 LOOP_VINFO_PEELING_HTAB (loop_vinfo
)
1206 .traverse
<_vect_peel_extended_info
*,
1207 vect_peeling_hash_get_most_frequent
> (&res
);
1210 *npeel
= res
.peel_info
.npeel
;
1211 *body_cost_vec
= res
.body_cost_vec
;
1212 return res
.peel_info
.dr
;
1216 /* Function vect_enhance_data_refs_alignment
1218 This pass will use loop versioning and loop peeling in order to enhance
1219 the alignment of data references in the loop.
1221 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1222 original loop is to be vectorized. Any other loops that are created by
1223 the transformations performed in this pass - are not supposed to be
1224 vectorized. This restriction will be relaxed.
1226 This pass will require a cost model to guide it whether to apply peeling
1227 or versioning or a combination of the two. For example, the scheme that
1228 intel uses when given a loop with several memory accesses, is as follows:
1229 choose one memory access ('p') which alignment you want to force by doing
1230 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1231 other accesses are not necessarily aligned, or (2) use loop versioning to
1232 generate one loop in which all accesses are aligned, and another loop in
1233 which only 'p' is necessarily aligned.
1235 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1236 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1237 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1239 Devising a cost model is the most critical aspect of this work. It will
1240 guide us on which access to peel for, whether to use loop versioning, how
1241 many versions to create, etc. The cost model will probably consist of
1242 generic considerations as well as target specific considerations (on
1243 powerpc for example, misaligned stores are more painful than misaligned
1246 Here are the general steps involved in alignment enhancements:
1248 -- original loop, before alignment analysis:
1249 for (i=0; i<N; i++){
1250 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1251 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1254 -- After vect_compute_data_refs_alignment:
1255 for (i=0; i<N; i++){
1256 x = q[i]; # DR_MISALIGNMENT(q) = 3
1257 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1260 -- Possibility 1: we do loop versioning:
1262 for (i=0; i<N; i++){ # loop 1A
1263 x = q[i]; # DR_MISALIGNMENT(q) = 3
1264 p[i] = y; # DR_MISALIGNMENT(p) = 0
1268 for (i=0; i<N; i++){ # loop 1B
1269 x = q[i]; # DR_MISALIGNMENT(q) = 3
1270 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1274 -- Possibility 2: we do loop peeling:
1275 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1279 for (i = 3; i < N; i++){ # loop 2A
1280 x = q[i]; # DR_MISALIGNMENT(q) = 0
1281 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1284 -- Possibility 3: combination of loop peeling and versioning:
1285 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1290 for (i = 3; i<N; i++){ # loop 3A
1291 x = q[i]; # DR_MISALIGNMENT(q) = 0
1292 p[i] = y; # DR_MISALIGNMENT(p) = 0
1296 for (i = 3; i<N; i++){ # loop 3B
1297 x = q[i]; # DR_MISALIGNMENT(q) = 0
1298 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1302 These loops are later passed to loop_transform to be vectorized. The
1303 vectorizer will use the alignment information to guide the transformation
1304 (whether to generate regular loads/stores, or with special handling for
1308 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo
)
1310 vec
<data_reference_p
> datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
1311 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1312 enum dr_alignment_support supportable_dr_alignment
;
1313 struct data_reference
*dr0
= NULL
, *first_store
= NULL
;
1314 struct data_reference
*dr
;
1316 bool do_peeling
= false;
1317 bool do_versioning
= false;
1320 stmt_vec_info stmt_info
;
1321 unsigned int npeel
= 0;
1322 bool all_misalignments_unknown
= true;
1323 unsigned int vf
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1324 unsigned possible_npeel_number
= 1;
1326 unsigned int nelements
, mis
, same_align_drs_max
= 0;
1327 stmt_vector_for_cost body_cost_vec
= stmt_vector_for_cost ();
1329 if (dump_enabled_p ())
1330 dump_printf_loc (MSG_NOTE
, vect_location
,
1331 "=== vect_enhance_data_refs_alignment ===\n");
1333 /* While cost model enhancements are expected in the future, the high level
1334 view of the code at this time is as follows:
1336 A) If there is a misaligned access then see if peeling to align
1337 this access can make all data references satisfy
1338 vect_supportable_dr_alignment. If so, update data structures
1339 as needed and return true.
1341 B) If peeling wasn't possible and there is a data reference with an
1342 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1343 then see if loop versioning checks can be used to make all data
1344 references satisfy vect_supportable_dr_alignment. If so, update
1345 data structures as needed and return true.
1347 C) If neither peeling nor versioning were successful then return false if
1348 any data reference does not satisfy vect_supportable_dr_alignment.
1350 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1352 Note, Possibility 3 above (which is peeling and versioning together) is not
1353 being done at this time. */
1355 /* (1) Peeling to force alignment. */
1357 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1359 + How many accesses will become aligned due to the peeling
1360 - How many accesses will become unaligned due to the peeling,
1361 and the cost of misaligned accesses.
1362 - The cost of peeling (the extra runtime checks, the increase
1365 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1367 stmt
= DR_STMT (dr
);
1368 stmt_info
= vinfo_for_stmt (stmt
);
1370 if (!STMT_VINFO_RELEVANT_P (stmt_info
))
1373 /* For interleaving, only the alignment of the first access
1375 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1376 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1379 /* For invariant accesses there is nothing to enhance. */
1380 if (integer_zerop (DR_STEP (dr
)))
1383 /* Strided loads perform only component accesses, alignment is
1384 irrelevant for them. */
1385 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1388 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, true);
1389 do_peeling
= vector_alignment_reachable_p (dr
);
1392 if (known_alignment_for_access_p (dr
))
1394 unsigned int npeel_tmp
;
1395 bool negative
= tree_int_cst_compare (DR_STEP (dr
),
1396 size_zero_node
) < 0;
1398 /* Save info about DR in the hash table. */
1399 if (!LOOP_VINFO_PEELING_HTAB (loop_vinfo
).is_created ())
1400 LOOP_VINFO_PEELING_HTAB (loop_vinfo
).create (1);
1402 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1403 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1404 mis
= DR_MISALIGNMENT (dr
) / GET_MODE_SIZE (TYPE_MODE (
1405 TREE_TYPE (DR_REF (dr
))));
1406 npeel_tmp
= (negative
1407 ? (mis
- nelements
) : (nelements
- mis
))
1410 /* For multiple types, it is possible that the bigger type access
1411 will have more than one peeling option. E.g., a loop with two
1412 types: one of size (vector size / 4), and the other one of
1413 size (vector size / 8). Vectorization factor will 8. If both
1414 access are misaligned by 3, the first one needs one scalar
1415 iteration to be aligned, and the second one needs 5. But the
1416 the first one will be aligned also by peeling 5 scalar
1417 iterations, and in that case both accesses will be aligned.
1418 Hence, except for the immediate peeling amount, we also want
1419 to try to add full vector size, while we don't exceed
1420 vectorization factor.
1421 We do this automtically for cost model, since we calculate cost
1422 for every peeling option. */
1423 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo
)))
1424 possible_npeel_number
= vf
/nelements
;
1426 /* Handle the aligned case. We may decide to align some other
1427 access, making DR unaligned. */
1428 if (DR_MISALIGNMENT (dr
) == 0)
1431 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo
)))
1432 possible_npeel_number
++;
1435 for (j
= 0; j
< possible_npeel_number
; j
++)
1437 gcc_assert (npeel_tmp
<= vf
);
1438 vect_peeling_hash_insert (loop_vinfo
, dr
, npeel_tmp
);
1439 npeel_tmp
+= nelements
;
1442 all_misalignments_unknown
= false;
1443 /* Data-ref that was chosen for the case that all the
1444 misalignments are unknown is not relevant anymore, since we
1445 have a data-ref with known alignment. */
1450 /* If we don't know any misalignment values, we prefer
1451 peeling for data-ref that has the maximum number of data-refs
1452 with the same alignment, unless the target prefers to align
1453 stores over load. */
1454 if (all_misalignments_unknown
)
1456 unsigned same_align_drs
1457 = STMT_VINFO_SAME_ALIGN_REFS (stmt_info
).length ();
1459 || same_align_drs_max
< same_align_drs
)
1461 same_align_drs_max
= same_align_drs
;
1464 /* For data-refs with the same number of related
1465 accesses prefer the one where the misalign
1466 computation will be invariant in the outermost loop. */
1467 else if (same_align_drs_max
== same_align_drs
)
1469 struct loop
*ivloop0
, *ivloop
;
1470 ivloop0
= outermost_invariant_loop_for_expr
1471 (loop
, DR_BASE_ADDRESS (dr0
));
1472 ivloop
= outermost_invariant_loop_for_expr
1473 (loop
, DR_BASE_ADDRESS (dr
));
1474 if ((ivloop
&& !ivloop0
)
1475 || (ivloop
&& ivloop0
1476 && flow_loop_nested_p (ivloop
, ivloop0
)))
1480 if (!first_store
&& DR_IS_WRITE (dr
))
1484 /* If there are both known and unknown misaligned accesses in the
1485 loop, we choose peeling amount according to the known
1487 if (!supportable_dr_alignment
)
1490 if (!first_store
&& DR_IS_WRITE (dr
))
1497 if (!aligned_access_p (dr
))
1499 if (dump_enabled_p ())
1500 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1501 "vector alignment may not be reachable\n");
1507 /* Check if we can possibly peel the loop. */
1508 if (!vect_can_advance_ivs_p (loop_vinfo
)
1509 || !slpeel_can_duplicate_loop_p (loop
, single_exit (loop
)))
1512 if (do_peeling
&& all_misalignments_unknown
1513 && vect_supportable_dr_alignment (dr0
, false))
1516 /* Check if the target requires to prefer stores over loads, i.e., if
1517 misaligned stores are more expensive than misaligned loads (taking
1518 drs with same alignment into account). */
1519 if (first_store
&& DR_IS_READ (dr0
))
1521 unsigned int load_inside_cost
= 0, load_outside_cost
= 0;
1522 unsigned int store_inside_cost
= 0, store_outside_cost
= 0;
1523 unsigned int load_inside_penalty
= 0, load_outside_penalty
= 0;
1524 unsigned int store_inside_penalty
= 0, store_outside_penalty
= 0;
1525 stmt_vector_for_cost dummy
;
1528 vect_get_data_access_cost (dr0
, &load_inside_cost
, &load_outside_cost
,
1530 vect_get_data_access_cost (first_store
, &store_inside_cost
,
1531 &store_outside_cost
, &dummy
);
1535 /* Calculate the penalty for leaving FIRST_STORE unaligned (by
1536 aligning the load DR0). */
1537 load_inside_penalty
= store_inside_cost
;
1538 load_outside_penalty
= store_outside_cost
;
1540 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1541 DR_STMT (first_store
))).iterate (i
, &dr
);
1543 if (DR_IS_READ (dr
))
1545 load_inside_penalty
+= load_inside_cost
;
1546 load_outside_penalty
+= load_outside_cost
;
1550 load_inside_penalty
+= store_inside_cost
;
1551 load_outside_penalty
+= store_outside_cost
;
1554 /* Calculate the penalty for leaving DR0 unaligned (by
1555 aligning the FIRST_STORE). */
1556 store_inside_penalty
= load_inside_cost
;
1557 store_outside_penalty
= load_outside_cost
;
1559 STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (
1560 DR_STMT (dr0
))).iterate (i
, &dr
);
1562 if (DR_IS_READ (dr
))
1564 store_inside_penalty
+= load_inside_cost
;
1565 store_outside_penalty
+= load_outside_cost
;
1569 store_inside_penalty
+= store_inside_cost
;
1570 store_outside_penalty
+= store_outside_cost
;
1573 if (load_inside_penalty
> store_inside_penalty
1574 || (load_inside_penalty
== store_inside_penalty
1575 && load_outside_penalty
> store_outside_penalty
))
1579 /* In case there are only loads with different unknown misalignments, use
1580 peeling only if it may help to align other accesses in the loop. */
1582 && !STMT_VINFO_SAME_ALIGN_REFS (
1583 vinfo_for_stmt (DR_STMT (dr0
))).length ()
1584 && vect_supportable_dr_alignment (dr0
, false)
1585 != dr_unaligned_supported
)
1589 if (do_peeling
&& !dr0
)
1591 /* Peeling is possible, but there is no data access that is not supported
1592 unless aligned. So we try to choose the best possible peeling. */
1594 /* We should get here only if there are drs with known misalignment. */
1595 gcc_assert (!all_misalignments_unknown
);
1597 /* Choose the best peeling from the hash table. */
1598 dr0
= vect_peeling_hash_choose_best_peeling (loop_vinfo
, &npeel
,
1606 stmt
= DR_STMT (dr0
);
1607 stmt_info
= vinfo_for_stmt (stmt
);
1608 vectype
= STMT_VINFO_VECTYPE (stmt_info
);
1609 nelements
= TYPE_VECTOR_SUBPARTS (vectype
);
1611 if (known_alignment_for_access_p (dr0
))
1613 bool negative
= tree_int_cst_compare (DR_STEP (dr0
),
1614 size_zero_node
) < 0;
1617 /* Since it's known at compile time, compute the number of
1618 iterations in the peeled loop (the peeling factor) for use in
1619 updating DR_MISALIGNMENT values. The peeling factor is the
1620 vectorization factor minus the misalignment as an element
1622 mis
= DR_MISALIGNMENT (dr0
);
1623 mis
/= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0
))));
1624 npeel
= ((negative
? mis
- nelements
: nelements
- mis
)
1628 /* For interleaved data access every iteration accesses all the
1629 members of the group, therefore we divide the number of iterations
1630 by the group size. */
1631 stmt_info
= vinfo_for_stmt (DR_STMT (dr0
));
1632 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
))
1633 npeel
/= GROUP_SIZE (stmt_info
);
1635 if (dump_enabled_p ())
1636 dump_printf_loc (MSG_NOTE
, vect_location
,
1637 "Try peeling by %d\n", npeel
);
1640 /* Ensure that all data refs can be vectorized after the peel. */
1641 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1643 int save_misalignment
;
1648 stmt
= DR_STMT (dr
);
1649 stmt_info
= vinfo_for_stmt (stmt
);
1650 /* For interleaving, only the alignment of the first access
1652 if (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1653 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
)
1656 /* Strided loads perform only component accesses, alignment is
1657 irrelevant for them. */
1658 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1661 save_misalignment
= DR_MISALIGNMENT (dr
);
1662 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1663 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1664 SET_DR_MISALIGNMENT (dr
, save_misalignment
);
1666 if (!supportable_dr_alignment
)
1673 if (do_peeling
&& known_alignment_for_access_p (dr0
) && npeel
== 0)
1675 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1680 body_cost_vec
.release ();
1687 unsigned max_allowed_peel
1688 = PARAM_VALUE (PARAM_VECT_MAX_PEELING_FOR_ALIGNMENT
);
1689 if (max_allowed_peel
!= (unsigned)-1)
1691 unsigned max_peel
= npeel
;
1694 gimple dr_stmt
= DR_STMT (dr0
);
1695 stmt_vec_info vinfo
= vinfo_for_stmt (dr_stmt
);
1696 tree vtype
= STMT_VINFO_VECTYPE (vinfo
);
1697 max_peel
= TYPE_VECTOR_SUBPARTS (vtype
) - 1;
1699 if (max_peel
> max_allowed_peel
)
1702 if (dump_enabled_p ())
1703 dump_printf_loc (MSG_NOTE
, vect_location
,
1704 "Disable peeling, max peels reached: %d\n", max_peel
);
1711 stmt_info_for_cost
*si
;
1712 void *data
= LOOP_VINFO_TARGET_COST_DATA (loop_vinfo
);
1714 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1715 If the misalignment of DR_i is identical to that of dr0 then set
1716 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1717 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1718 by the peeling factor times the element size of DR_i (MOD the
1719 vectorization factor times the size). Otherwise, the
1720 misalignment of DR_i must be set to unknown. */
1721 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1723 vect_update_misalignment_for_peel (dr
, dr0
, npeel
);
1725 LOOP_VINFO_UNALIGNED_DR (loop_vinfo
) = dr0
;
1727 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
) = npeel
;
1729 LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo
)
1730 = DR_MISALIGNMENT (dr0
);
1731 SET_DR_MISALIGNMENT (dr0
, 0);
1732 if (dump_enabled_p ())
1734 dump_printf_loc (MSG_NOTE
, vect_location
,
1735 "Alignment of access forced using peeling.\n");
1736 dump_printf_loc (MSG_NOTE
, vect_location
,
1737 "Peeling for alignment will be applied.\n");
1739 /* We've delayed passing the inside-loop peeling costs to the
1740 target cost model until we were sure peeling would happen.
1742 if (body_cost_vec
.exists ())
1744 FOR_EACH_VEC_ELT (body_cost_vec
, i
, si
)
1746 struct _stmt_vec_info
*stmt_info
1747 = si
->stmt
? vinfo_for_stmt (si
->stmt
) : NULL
;
1748 (void) add_stmt_cost (data
, si
->count
, si
->kind
, stmt_info
,
1749 si
->misalign
, vect_body
);
1751 body_cost_vec
.release ();
1754 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1760 body_cost_vec
.release ();
1762 /* (2) Versioning to force alignment. */
1764 /* Try versioning if:
1765 1) optimize loop for speed
1766 2) there is at least one unsupported misaligned data ref with an unknown
1768 3) all misaligned data refs with a known misalignment are supported, and
1769 4) the number of runtime alignment checks is within reason. */
1772 optimize_loop_nest_for_speed_p (loop
)
1773 && (!loop
->inner
); /* FORNOW */
1777 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
1779 stmt
= DR_STMT (dr
);
1780 stmt_info
= vinfo_for_stmt (stmt
);
1782 /* For interleaving, only the alignment of the first access
1784 if (aligned_access_p (dr
)
1785 || (STMT_VINFO_GROUPED_ACCESS (stmt_info
)
1786 && GROUP_FIRST_ELEMENT (stmt_info
) != stmt
))
1789 /* Strided loads perform only component accesses, alignment is
1790 irrelevant for them. */
1791 if (STMT_VINFO_STRIDE_LOAD_P (stmt_info
))
1794 supportable_dr_alignment
= vect_supportable_dr_alignment (dr
, false);
1796 if (!supportable_dr_alignment
)
1802 if (known_alignment_for_access_p (dr
)
1803 || LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).length ()
1804 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS
))
1806 do_versioning
= false;
1810 stmt
= DR_STMT (dr
);
1811 vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
1812 gcc_assert (vectype
);
1814 /* The rightmost bits of an aligned address must be zeros.
1815 Construct the mask needed for this test. For example,
1816 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1817 mask must be 15 = 0xf. */
1818 mask
= GET_MODE_SIZE (TYPE_MODE (vectype
)) - 1;
1820 /* FORNOW: use the same mask to test all potentially unaligned
1821 references in the loop. The vectorizer currently supports
1822 a single vector size, see the reference to
1823 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1824 vectorization factor is computed. */
1825 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo
)
1826 || LOOP_VINFO_PTR_MASK (loop_vinfo
) == mask
);
1827 LOOP_VINFO_PTR_MASK (loop_vinfo
) = mask
;
1828 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).safe_push (
1833 /* Versioning requires at least one misaligned data reference. */
1834 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo
))
1835 do_versioning
= false;
1836 else if (!do_versioning
)
1837 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
).truncate (0);
1842 vec
<gimple
> may_misalign_stmts
1843 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo
);
1846 /* It can now be assumed that the data references in the statements
1847 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1848 of the loop being vectorized. */
1849 FOR_EACH_VEC_ELT (may_misalign_stmts
, i
, stmt
)
1851 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
1852 dr
= STMT_VINFO_DATA_REF (stmt_info
);
1853 SET_DR_MISALIGNMENT (dr
, 0);
1854 if (dump_enabled_p ())
1855 dump_printf_loc (MSG_NOTE
, vect_location
,
1856 "Alignment of access forced using versioning.\n");
1859 if (dump_enabled_p ())
1860 dump_printf_loc (MSG_NOTE
, vect_location
,
1861 "Versioning for alignment will be applied.\n");
1863 /* Peeling and versioning can't be done together at this time. */
1864 gcc_assert (! (do_peeling
&& do_versioning
));
1866 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1871 /* This point is reached if neither peeling nor versioning is being done. */
1872 gcc_assert (! (do_peeling
|| do_versioning
));
1874 stat
= vect_verify_datarefs_alignment (loop_vinfo
, NULL
);
1879 /* Function vect_find_same_alignment_drs.
1881 Update group and alignment relations according to the chosen
1882 vectorization factor. */
1885 vect_find_same_alignment_drs (struct data_dependence_relation
*ddr
,
1886 loop_vec_info loop_vinfo
)
1889 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
1890 int vectorization_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
1891 struct data_reference
*dra
= DDR_A (ddr
);
1892 struct data_reference
*drb
= DDR_B (ddr
);
1893 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
1894 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
1895 int dra_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra
))));
1896 int drb_size
= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb
))));
1897 lambda_vector dist_v
;
1898 unsigned int loop_depth
;
1900 if (DDR_ARE_DEPENDENT (ddr
) == chrec_known
)
1906 if (DDR_ARE_DEPENDENT (ddr
) == chrec_dont_know
)
1909 /* Loop-based vectorization and known data dependence. */
1910 if (DDR_NUM_DIST_VECTS (ddr
) == 0)
1913 /* Data-dependence analysis reports a distance vector of zero
1914 for data-references that overlap only in the first iteration
1915 but have different sign step (see PR45764).
1916 So as a sanity check require equal DR_STEP. */
1917 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
1920 loop_depth
= index_in_loop_nest (loop
->num
, DDR_LOOP_NEST (ddr
));
1921 FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr
), i
, dist_v
)
1923 int dist
= dist_v
[loop_depth
];
1925 if (dump_enabled_p ())
1926 dump_printf_loc (MSG_NOTE
, vect_location
,
1927 "dependence distance = %d.\n", dist
);
1929 /* Same loop iteration. */
1931 || (dist
% vectorization_factor
== 0 && dra_size
== drb_size
))
1933 /* Two references with distance zero have the same alignment. */
1934 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a
).safe_push (drb
);
1935 STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b
).safe_push (dra
);
1936 if (dump_enabled_p ())
1938 dump_printf_loc (MSG_NOTE
, vect_location
,
1939 "accesses have the same alignment.\n");
1940 dump_printf (MSG_NOTE
,
1941 "dependence distance modulo vf == 0 between ");
1942 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
1943 dump_printf (MSG_NOTE
, " and ");
1944 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
1945 dump_printf (MSG_NOTE
, "\n");
1952 /* Function vect_analyze_data_refs_alignment
1954 Analyze the alignment of the data-references in the loop.
1955 Return FALSE if a data reference is found that cannot be vectorized. */
1958 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo
,
1959 bb_vec_info bb_vinfo
)
1961 if (dump_enabled_p ())
1962 dump_printf_loc (MSG_NOTE
, vect_location
,
1963 "=== vect_analyze_data_refs_alignment ===\n");
1965 /* Mark groups of data references with same alignment using
1966 data dependence information. */
1969 vec
<ddr_p
> ddrs
= LOOP_VINFO_DDRS (loop_vinfo
);
1970 struct data_dependence_relation
*ddr
;
1973 FOR_EACH_VEC_ELT (ddrs
, i
, ddr
)
1974 vect_find_same_alignment_drs (ddr
, loop_vinfo
);
1977 if (!vect_compute_data_refs_alignment (loop_vinfo
, bb_vinfo
))
1979 if (dump_enabled_p ())
1980 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
1981 "not vectorized: can't calculate alignment "
1990 /* Analyze groups of accesses: check that DR belongs to a group of
1991 accesses of legal size, step, etc. Detect gaps, single element
1992 interleaving, and other special cases. Set grouped access info.
1993 Collect groups of strided stores for further use in SLP analysis. */
1996 vect_analyze_group_access (struct data_reference
*dr
)
1998 tree step
= DR_STEP (dr
);
1999 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2000 HOST_WIDE_INT type_size
= TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type
));
2001 gimple stmt
= DR_STMT (dr
);
2002 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2003 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2004 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
2005 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2006 HOST_WIDE_INT groupsize
, last_accessed_element
= 1;
2007 bool slp_impossible
= false;
2008 struct loop
*loop
= NULL
;
2011 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2013 /* For interleaving, GROUPSIZE is STEP counted in elements, i.e., the
2014 size of the interleaving group (including gaps). */
2015 groupsize
= absu_hwi (dr_step
) / type_size
;
2017 /* Not consecutive access is possible only if it is a part of interleaving. */
2018 if (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)))
2020 /* Check if it this DR is a part of interleaving, and is a single
2021 element of the group that is accessed in the loop. */
2023 /* Gaps are supported only for loads. STEP must be a multiple of the type
2024 size. The size of the group must be a power of 2. */
2026 && (dr_step
% type_size
) == 0
2028 && exact_log2 (groupsize
) != -1)
2030 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = stmt
;
2031 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
2032 if (dump_enabled_p ())
2034 dump_printf_loc (MSG_NOTE
, vect_location
,
2035 "Detected single element interleaving ");
2036 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dr
));
2037 dump_printf (MSG_NOTE
, " step ");
2038 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, step
);
2039 dump_printf (MSG_NOTE
, "\n");
2044 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_NOTE
, vect_location
,
2046 "Data access with gaps requires scalar "
2050 if (dump_enabled_p ())
2051 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2052 "Peeling for outer loop is not"
2057 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2063 if (dump_enabled_p ())
2065 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2066 "not consecutive access ");
2067 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
2068 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2073 /* Mark the statement as unvectorizable. */
2074 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2081 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) == stmt
)
2083 /* First stmt in the interleaving chain. Check the chain. */
2084 gimple next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt
));
2085 struct data_reference
*data_ref
= dr
;
2086 unsigned int count
= 1;
2087 tree prev_init
= DR_INIT (data_ref
);
2089 HOST_WIDE_INT diff
, gaps
= 0;
2090 unsigned HOST_WIDE_INT count_in_bytes
;
2094 /* Skip same data-refs. In case that two or more stmts share
2095 data-ref (supported only for loads), we vectorize only the first
2096 stmt, and the rest get their vectorized loads from the first
2098 if (!tree_int_cst_compare (DR_INIT (data_ref
),
2099 DR_INIT (STMT_VINFO_DATA_REF (
2100 vinfo_for_stmt (next
)))))
2102 if (DR_IS_WRITE (data_ref
))
2104 if (dump_enabled_p ())
2105 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2106 "Two store stmts share the same dr.\n");
2110 /* For load use the same data-ref load. */
2111 GROUP_SAME_DR_STMT (vinfo_for_stmt (next
)) = prev
;
2114 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2119 data_ref
= STMT_VINFO_DATA_REF (vinfo_for_stmt (next
));
2121 /* All group members have the same STEP by construction. */
2122 gcc_checking_assert (operand_equal_p (DR_STEP (data_ref
), step
, 0));
2124 /* Check that the distance between two accesses is equal to the type
2125 size. Otherwise, we have gaps. */
2126 diff
= (TREE_INT_CST_LOW (DR_INIT (data_ref
))
2127 - TREE_INT_CST_LOW (prev_init
)) / type_size
;
2130 /* FORNOW: SLP of accesses with gaps is not supported. */
2131 slp_impossible
= true;
2132 if (DR_IS_WRITE (data_ref
))
2134 if (dump_enabled_p ())
2135 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2136 "interleaved store with gaps\n");
2143 last_accessed_element
+= diff
;
2145 /* Store the gap from the previous member of the group. If there is no
2146 gap in the access, GROUP_GAP is always 1. */
2147 GROUP_GAP (vinfo_for_stmt (next
)) = diff
;
2149 prev_init
= DR_INIT (data_ref
);
2150 next
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next
));
2151 /* Count the number of data-refs in the chain. */
2155 /* COUNT is the number of accesses found, we multiply it by the size of
2156 the type to get COUNT_IN_BYTES. */
2157 count_in_bytes
= type_size
* count
;
2159 /* Check that the size of the interleaving (including gaps) is not
2160 greater than STEP. */
2162 && absu_hwi (dr_step
) < count_in_bytes
+ gaps
* type_size
)
2164 if (dump_enabled_p ())
2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2167 "interleaving size is greater than step for ");
2168 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2170 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2175 /* Check that the size of the interleaving is equal to STEP for stores,
2176 i.e., that there are no gaps. */
2178 && absu_hwi (dr_step
) != count_in_bytes
)
2180 if (DR_IS_READ (dr
))
2182 slp_impossible
= true;
2183 /* There is a gap after the last load in the group. This gap is a
2184 difference between the groupsize and the number of elements.
2185 When there is no gap, this difference should be 0. */
2186 GROUP_GAP (vinfo_for_stmt (stmt
)) = groupsize
- count
;
2190 if (dump_enabled_p ())
2191 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2192 "interleaved store with gaps\n");
2197 /* Check that STEP is a multiple of type size. */
2199 && (dr_step
% type_size
) != 0)
2201 if (dump_enabled_p ())
2203 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2204 "step is not a multiple of type size: step ");
2205 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, step
);
2206 dump_printf (MSG_MISSED_OPTIMIZATION
, " size ");
2207 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
,
2208 TYPE_SIZE_UNIT (scalar_type
));
2209 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
2217 GROUP_SIZE (vinfo_for_stmt (stmt
)) = groupsize
;
2218 if (dump_enabled_p ())
2219 dump_printf_loc (MSG_NOTE
, vect_location
,
2220 "Detected interleaving of size %d\n", (int)groupsize
);
2222 /* SLP: create an SLP data structure for every interleaving group of
2223 stores for further analysis in vect_analyse_slp. */
2224 if (DR_IS_WRITE (dr
) && !slp_impossible
)
2227 LOOP_VINFO_GROUPED_STORES (loop_vinfo
).safe_push (stmt
);
2229 BB_VINFO_GROUPED_STORES (bb_vinfo
).safe_push (stmt
);
2232 /* There is a gap in the end of the group. */
2233 if (groupsize
- last_accessed_element
> 0 && loop_vinfo
)
2235 if (dump_enabled_p ())
2236 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2237 "Data access with gaps requires scalar "
2241 if (dump_enabled_p ())
2242 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2243 "Peeling for outer loop is not supported\n");
2247 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo
) = true;
2255 /* Analyze the access pattern of the data-reference DR.
2256 In case of non-consecutive accesses call vect_analyze_group_access() to
2257 analyze groups of accesses. */
2260 vect_analyze_data_ref_access (struct data_reference
*dr
)
2262 tree step
= DR_STEP (dr
);
2263 tree scalar_type
= TREE_TYPE (DR_REF (dr
));
2264 gimple stmt
= DR_STMT (dr
);
2265 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2266 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2267 struct loop
*loop
= NULL
;
2270 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2272 if (loop_vinfo
&& !step
)
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2276 "bad data-ref access in loop\n");
2280 /* Allow invariant loads in not nested loops. */
2281 if (loop_vinfo
&& integer_zerop (step
))
2283 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2284 if (nested_in_vect_loop_p (loop
, stmt
))
2286 if (dump_enabled_p ())
2287 dump_printf_loc (MSG_NOTE
, vect_location
,
2288 "zero step in inner loop of nest\n");
2291 return DR_IS_READ (dr
);
2294 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2296 /* Interleaved accesses are not yet supported within outer-loop
2297 vectorization for references in the inner-loop. */
2298 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2300 /* For the rest of the analysis we use the outer-loop step. */
2301 step
= STMT_VINFO_DR_STEP (stmt_info
);
2302 if (integer_zerop (step
))
2304 if (dump_enabled_p ())
2305 dump_printf_loc (MSG_NOTE
, vect_location
,
2306 "zero step in outer loop.\n");
2307 if (DR_IS_READ (dr
))
2315 if (TREE_CODE (step
) == INTEGER_CST
)
2317 HOST_WIDE_INT dr_step
= TREE_INT_CST_LOW (step
);
2318 if (!tree_int_cst_compare (step
, TYPE_SIZE_UNIT (scalar_type
))
2320 && !compare_tree_int (TYPE_SIZE_UNIT (scalar_type
), -dr_step
)))
2322 /* Mark that it is not interleaving. */
2323 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
)) = NULL
;
2328 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
2330 if (dump_enabled_p ())
2331 dump_printf_loc (MSG_NOTE
, vect_location
,
2332 "grouped access in outer loop.\n");
2336 /* Assume this is a DR handled by non-constant strided load case. */
2337 if (TREE_CODE (step
) != INTEGER_CST
)
2338 return STMT_VINFO_STRIDE_LOAD_P (stmt_info
);
2340 /* Not consecutive access - check if it's a part of interleaving group. */
2341 return vect_analyze_group_access (dr
);
2346 /* A helper function used in the comparator function to sort data
2347 references. T1 and T2 are two data references to be compared.
2348 The function returns -1, 0, or 1. */
2351 compare_tree (tree t1
, tree t2
)
2354 enum tree_code code
;
2365 if (TREE_CODE (t1
) != TREE_CODE (t2
))
2366 return TREE_CODE (t1
) < TREE_CODE (t2
) ? -1 : 1;
2368 code
= TREE_CODE (t1
);
2371 /* For const values, we can just use hash values for comparisons. */
2379 hashval_t h1
= iterative_hash_expr (t1
, 0);
2380 hashval_t h2
= iterative_hash_expr (t2
, 0);
2382 return h1
< h2
? -1 : 1;
2387 cmp
= compare_tree (SSA_NAME_VAR (t1
), SSA_NAME_VAR (t2
));
2391 if (SSA_NAME_VERSION (t1
) != SSA_NAME_VERSION (t2
))
2392 return SSA_NAME_VERSION (t1
) < SSA_NAME_VERSION (t2
) ? -1 : 1;
2396 tclass
= TREE_CODE_CLASS (code
);
2398 /* For var-decl, we could compare their UIDs. */
2399 if (tclass
== tcc_declaration
)
2401 if (DECL_UID (t1
) != DECL_UID (t2
))
2402 return DECL_UID (t1
) < DECL_UID (t2
) ? -1 : 1;
2406 /* For expressions with operands, compare their operands recursively. */
2407 for (i
= TREE_OPERAND_LENGTH (t1
) - 1; i
>= 0; --i
)
2409 cmp
= compare_tree (TREE_OPERAND (t1
, i
), TREE_OPERAND (t2
, i
));
2419 /* Compare two data-references DRA and DRB to group them into chunks
2420 suitable for grouping. */
2423 dr_group_sort_cmp (const void *dra_
, const void *drb_
)
2425 data_reference_p dra
= *(data_reference_p
*)const_cast<void *>(dra_
);
2426 data_reference_p drb
= *(data_reference_p
*)const_cast<void *>(drb_
);
2429 /* Stabilize sort. */
2433 /* Ordering of DRs according to base. */
2434 if (!operand_equal_p (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
), 0))
2436 cmp
= compare_tree (DR_BASE_ADDRESS (dra
), DR_BASE_ADDRESS (drb
));
2441 /* And according to DR_OFFSET. */
2442 if (!dr_equal_offsets_p (dra
, drb
))
2444 cmp
= compare_tree (DR_OFFSET (dra
), DR_OFFSET (drb
));
2449 /* Put reads before writes. */
2450 if (DR_IS_READ (dra
) != DR_IS_READ (drb
))
2451 return DR_IS_READ (dra
) ? -1 : 1;
2453 /* Then sort after access size. */
2454 if (!operand_equal_p (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))),
2455 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))), 0))
2457 cmp
= compare_tree (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
))),
2458 TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
))));
2463 /* And after step. */
2464 if (!operand_equal_p (DR_STEP (dra
), DR_STEP (drb
), 0))
2466 cmp
= compare_tree (DR_STEP (dra
), DR_STEP (drb
));
2471 /* Then sort after DR_INIT. In case of identical DRs sort after stmt UID. */
2472 cmp
= tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
));
2474 return gimple_uid (DR_STMT (dra
)) < gimple_uid (DR_STMT (drb
)) ? -1 : 1;
2478 /* Function vect_analyze_data_ref_accesses.
2480 Analyze the access pattern of all the data references in the loop.
2482 FORNOW: the only access pattern that is considered vectorizable is a
2483 simple step 1 (consecutive) access.
2485 FORNOW: handle only arrays and pointer accesses. */
2488 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo
, bb_vec_info bb_vinfo
)
2491 vec
<data_reference_p
> datarefs
;
2492 struct data_reference
*dr
;
2494 if (dump_enabled_p ())
2495 dump_printf_loc (MSG_NOTE
, vect_location
,
2496 "=== vect_analyze_data_ref_accesses ===\n");
2499 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
2501 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
2503 if (datarefs
.is_empty ())
2506 /* Sort the array of datarefs to make building the interleaving chains
2507 linear. Don't modify the original vector's order, it is needed for
2508 determining what dependencies are reversed. */
2509 vec
<data_reference_p
> datarefs_copy
= datarefs
.copy ();
2510 qsort (datarefs_copy
.address (), datarefs_copy
.length (),
2511 sizeof (data_reference_p
), dr_group_sort_cmp
);
2513 /* Build the interleaving chains. */
2514 for (i
= 0; i
< datarefs_copy
.length () - 1;)
2516 data_reference_p dra
= datarefs_copy
[i
];
2517 stmt_vec_info stmtinfo_a
= vinfo_for_stmt (DR_STMT (dra
));
2518 stmt_vec_info lastinfo
= NULL
;
2519 for (i
= i
+ 1; i
< datarefs_copy
.length (); ++i
)
2521 data_reference_p drb
= datarefs_copy
[i
];
2522 stmt_vec_info stmtinfo_b
= vinfo_for_stmt (DR_STMT (drb
));
2524 /* ??? Imperfect sorting (non-compatible types, non-modulo
2525 accesses, same accesses) can lead to a group to be artificially
2526 split here as we don't just skip over those. If it really
2527 matters we can push those to a worklist and re-iterate
2528 over them. The we can just skip ahead to the next DR here. */
2530 /* Check that the data-refs have same first location (except init)
2531 and they are both either store or load (not load and store). */
2532 if (DR_IS_READ (dra
) != DR_IS_READ (drb
)
2533 || !operand_equal_p (DR_BASE_ADDRESS (dra
),
2534 DR_BASE_ADDRESS (drb
), 0)
2535 || !dr_equal_offsets_p (dra
, drb
))
2538 /* Check that the data-refs have the same constant size and step. */
2539 tree sza
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra
)));
2540 tree szb
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb
)));
2541 if (!tree_fits_uhwi_p (sza
)
2542 || !tree_fits_uhwi_p (szb
)
2543 || !tree_int_cst_equal (sza
, szb
)
2544 || !tree_fits_shwi_p (DR_STEP (dra
))
2545 || !tree_fits_shwi_p (DR_STEP (drb
))
2546 || !tree_int_cst_equal (DR_STEP (dra
), DR_STEP (drb
)))
2549 /* Do not place the same access in the interleaving chain twice. */
2550 if (tree_int_cst_compare (DR_INIT (dra
), DR_INIT (drb
)) == 0)
2553 /* Check the types are compatible.
2554 ??? We don't distinguish this during sorting. */
2555 if (!types_compatible_p (TREE_TYPE (DR_REF (dra
)),
2556 TREE_TYPE (DR_REF (drb
))))
2559 /* Sorting has ensured that DR_INIT (dra) <= DR_INIT (drb). */
2560 HOST_WIDE_INT init_a
= TREE_INT_CST_LOW (DR_INIT (dra
));
2561 HOST_WIDE_INT init_b
= TREE_INT_CST_LOW (DR_INIT (drb
));
2562 gcc_assert (init_a
< init_b
);
2564 /* If init_b == init_a + the size of the type * k, we have an
2565 interleaving, and DRA is accessed before DRB. */
2566 HOST_WIDE_INT type_size_a
= tree_to_uhwi (sza
);
2567 if ((init_b
- init_a
) % type_size_a
!= 0)
2570 /* The step (if not zero) is greater than the difference between
2571 data-refs' inits. This splits groups into suitable sizes. */
2572 HOST_WIDE_INT step
= tree_to_shwi (DR_STEP (dra
));
2573 if (step
!= 0 && step
<= (init_b
- init_a
))
2576 if (dump_enabled_p ())
2578 dump_printf_loc (MSG_NOTE
, vect_location
,
2579 "Detected interleaving ");
2580 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (dra
));
2581 dump_printf (MSG_NOTE
, " and ");
2582 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_REF (drb
));
2583 dump_printf (MSG_NOTE
, "\n");
2586 /* Link the found element into the group list. */
2587 if (!GROUP_FIRST_ELEMENT (stmtinfo_a
))
2589 GROUP_FIRST_ELEMENT (stmtinfo_a
) = DR_STMT (dra
);
2590 lastinfo
= stmtinfo_a
;
2592 GROUP_FIRST_ELEMENT (stmtinfo_b
) = DR_STMT (dra
);
2593 GROUP_NEXT_ELEMENT (lastinfo
) = DR_STMT (drb
);
2594 lastinfo
= stmtinfo_b
;
2598 FOR_EACH_VEC_ELT (datarefs_copy
, i
, dr
)
2599 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
)))
2600 && !vect_analyze_data_ref_access (dr
))
2602 if (dump_enabled_p ())
2603 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
2604 "not vectorized: complicated access pattern.\n");
2608 /* Mark the statement as not vectorizable. */
2609 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
2614 datarefs_copy
.release ();
2619 datarefs_copy
.release ();
2624 /* Operator == between two dr_with_seg_len objects.
2626 This equality operator is used to make sure two data refs
2627 are the same one so that we will consider to combine the
2628 aliasing checks of those two pairs of data dependent data
2632 operator == (const dr_with_seg_len
& d1
,
2633 const dr_with_seg_len
& d2
)
2635 return operand_equal_p (DR_BASE_ADDRESS (d1
.dr
),
2636 DR_BASE_ADDRESS (d2
.dr
), 0)
2637 && compare_tree (d1
.offset
, d2
.offset
) == 0
2638 && compare_tree (d1
.seg_len
, d2
.seg_len
) == 0;
2641 /* Function comp_dr_with_seg_len_pair.
2643 Comparison function for sorting objects of dr_with_seg_len_pair_t
2644 so that we can combine aliasing checks in one scan. */
2647 comp_dr_with_seg_len_pair (const void *p1_
, const void *p2_
)
2649 const dr_with_seg_len_pair_t
* p1
= (const dr_with_seg_len_pair_t
*) p1_
;
2650 const dr_with_seg_len_pair_t
* p2
= (const dr_with_seg_len_pair_t
*) p2_
;
2652 const dr_with_seg_len
&p11
= p1
->first
,
2657 /* For DR pairs (a, b) and (c, d), we only consider to merge the alias checks
2658 if a and c have the same basic address snd step, and b and d have the same
2659 address and step. Therefore, if any a&c or b&d don't have the same address
2660 and step, we don't care the order of those two pairs after sorting. */
2663 if ((comp_res
= compare_tree (DR_BASE_ADDRESS (p11
.dr
),
2664 DR_BASE_ADDRESS (p21
.dr
))) != 0)
2666 if ((comp_res
= compare_tree (DR_BASE_ADDRESS (p12
.dr
),
2667 DR_BASE_ADDRESS (p22
.dr
))) != 0)
2669 if ((comp_res
= compare_tree (DR_STEP (p11
.dr
), DR_STEP (p21
.dr
))) != 0)
2671 if ((comp_res
= compare_tree (DR_STEP (p12
.dr
), DR_STEP (p22
.dr
))) != 0)
2673 if ((comp_res
= compare_tree (p11
.offset
, p21
.offset
)) != 0)
2675 if ((comp_res
= compare_tree (p12
.offset
, p22
.offset
)) != 0)
2681 template <class T
> static void
2689 /* Function vect_vfa_segment_size.
2691 Create an expression that computes the size of segment
2692 that will be accessed for a data reference. The functions takes into
2693 account that realignment loads may access one more vector.
2696 DR: The data reference.
2697 LENGTH_FACTOR: segment length to consider.
2699 Return an expression whose value is the size of segment which will be
2703 vect_vfa_segment_size (struct data_reference
*dr
, tree length_factor
)
2705 tree segment_length
;
2707 if (integer_zerop (DR_STEP (dr
)))
2708 segment_length
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
2710 segment_length
= size_binop (MULT_EXPR
,
2711 fold_convert (sizetype
, DR_STEP (dr
)),
2712 fold_convert (sizetype
, length_factor
));
2714 if (vect_supportable_dr_alignment (dr
, false)
2715 == dr_explicit_realign_optimized
)
2717 tree vector_size
= TYPE_SIZE_UNIT
2718 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr
))));
2720 segment_length
= size_binop (PLUS_EXPR
, segment_length
, vector_size
);
2722 return segment_length
;
2725 /* Function vect_prune_runtime_alias_test_list.
2727 Prune a list of ddrs to be tested at run-time by versioning for alias.
2728 Merge several alias checks into one if possible.
2729 Return FALSE if resulting list of ddrs is longer then allowed by
2730 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
2733 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo
)
2735 vec
<ddr_p
> may_alias_ddrs
=
2736 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo
);
2737 vec
<dr_with_seg_len_pair_t
>& comp_alias_ddrs
=
2738 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo
);
2739 int vect_factor
= LOOP_VINFO_VECT_FACTOR (loop_vinfo
);
2740 tree scalar_loop_iters
= LOOP_VINFO_NITERS (loop_vinfo
);
2746 if (dump_enabled_p ())
2747 dump_printf_loc (MSG_NOTE
, vect_location
,
2748 "=== vect_prune_runtime_alias_test_list ===\n");
2750 if (may_alias_ddrs
.is_empty ())
2753 /* Basically, for each pair of dependent data refs store_ptr_0
2754 and load_ptr_0, we create an expression:
2756 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2757 || (load_ptr_0 + load_segment_length_0) <= store_ptr_0))
2759 for aliasing checks. However, in some cases we can decrease
2760 the number of checks by combining two checks into one. For
2761 example, suppose we have another pair of data refs store_ptr_0
2762 and load_ptr_1, and if the following condition is satisfied:
2764 load_ptr_0 < load_ptr_1 &&
2765 load_ptr_1 - load_ptr_0 - load_segment_length_0 < store_segment_length_0
2767 (this condition means, in each iteration of vectorized loop,
2768 the accessed memory of store_ptr_0 cannot be between the memory
2769 of load_ptr_0 and load_ptr_1.)
2771 we then can use only the following expression to finish the
2772 alising checks between store_ptr_0 & load_ptr_0 and
2773 store_ptr_0 & load_ptr_1:
2775 ((store_ptr_0 + store_segment_length_0) <= load_ptr_0)
2776 || (load_ptr_1 + load_segment_length_1 <= store_ptr_0))
2778 Note that we only consider that load_ptr_0 and load_ptr_1 have the
2779 same basic address. */
2781 comp_alias_ddrs
.create (may_alias_ddrs
.length ());
2783 /* First, we collect all data ref pairs for aliasing checks. */
2784 FOR_EACH_VEC_ELT (may_alias_ddrs
, i
, ddr
)
2786 struct data_reference
*dr_a
, *dr_b
;
2787 gimple dr_group_first_a
, dr_group_first_b
;
2788 tree segment_length_a
, segment_length_b
;
2789 gimple stmt_a
, stmt_b
;
2792 stmt_a
= DR_STMT (DDR_A (ddr
));
2793 dr_group_first_a
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_a
));
2794 if (dr_group_first_a
)
2796 stmt_a
= dr_group_first_a
;
2797 dr_a
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a
));
2801 stmt_b
= DR_STMT (DDR_B (ddr
));
2802 dr_group_first_b
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt_b
));
2803 if (dr_group_first_b
)
2805 stmt_b
= dr_group_first_b
;
2806 dr_b
= STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b
));
2809 if (!operand_equal_p (DR_STEP (dr_a
), DR_STEP (dr_b
), 0))
2810 length_factor
= scalar_loop_iters
;
2812 length_factor
= size_int (vect_factor
);
2813 segment_length_a
= vect_vfa_segment_size (dr_a
, length_factor
);
2814 segment_length_b
= vect_vfa_segment_size (dr_b
, length_factor
);
2816 dr_with_seg_len_pair_t dr_with_seg_len_pair
2817 (dr_with_seg_len (dr_a
, segment_length_a
),
2818 dr_with_seg_len (dr_b
, segment_length_b
));
2820 if (compare_tree (DR_BASE_ADDRESS (dr_a
), DR_BASE_ADDRESS (dr_b
)) > 0)
2821 swap (dr_with_seg_len_pair
.first
, dr_with_seg_len_pair
.second
);
2823 comp_alias_ddrs
.safe_push (dr_with_seg_len_pair
);
2826 /* Second, we sort the collected data ref pairs so that we can scan
2827 them once to combine all possible aliasing checks. */
2828 comp_alias_ddrs
.qsort (comp_dr_with_seg_len_pair
);
2830 /* Third, we scan the sorted dr pairs and check if we can combine
2831 alias checks of two neighbouring dr pairs. */
2832 for (size_t i
= 1; i
< comp_alias_ddrs
.length (); ++i
)
2834 /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2). */
2835 dr_with_seg_len
*dr_a1
= &comp_alias_ddrs
[i
-1].first
,
2836 *dr_b1
= &comp_alias_ddrs
[i
-1].second
,
2837 *dr_a2
= &comp_alias_ddrs
[i
].first
,
2838 *dr_b2
= &comp_alias_ddrs
[i
].second
;
2840 /* Remove duplicate data ref pairs. */
2841 if (*dr_a1
== *dr_a2
&& *dr_b1
== *dr_b2
)
2843 if (dump_enabled_p ())
2845 dump_printf_loc (MSG_NOTE
, vect_location
,
2846 "found equal ranges ");
2847 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2848 DR_REF (dr_a1
->dr
));
2849 dump_printf (MSG_NOTE
, ", ");
2850 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2851 DR_REF (dr_b1
->dr
));
2852 dump_printf (MSG_NOTE
, " and ");
2853 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2854 DR_REF (dr_a2
->dr
));
2855 dump_printf (MSG_NOTE
, ", ");
2856 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2857 DR_REF (dr_b2
->dr
));
2858 dump_printf (MSG_NOTE
, "\n");
2861 comp_alias_ddrs
.ordered_remove (i
--);
2865 if (*dr_a1
== *dr_a2
|| *dr_b1
== *dr_b2
)
2867 /* We consider the case that DR_B1 and DR_B2 are same memrefs,
2868 and DR_A1 and DR_A2 are two consecutive memrefs. */
2869 if (*dr_a1
== *dr_a2
)
2871 swap (dr_a1
, dr_b1
);
2872 swap (dr_a2
, dr_b2
);
2875 if (!operand_equal_p (DR_BASE_ADDRESS (dr_a1
->dr
),
2876 DR_BASE_ADDRESS (dr_a2
->dr
),
2878 || !tree_fits_shwi_p (dr_a1
->offset
)
2879 || !tree_fits_shwi_p (dr_a2
->offset
))
2882 HOST_WIDE_INT diff
= (tree_to_shwi (dr_a2
->offset
)
2883 - tree_to_shwi (dr_a1
->offset
));
2886 /* Now we check if the following condition is satisfied:
2888 DIFF - SEGMENT_LENGTH_A < SEGMENT_LENGTH_B
2890 where DIFF = DR_A2->OFFSET - DR_A1->OFFSET. However,
2891 SEGMENT_LENGTH_A or SEGMENT_LENGTH_B may not be constant so we
2892 have to make a best estimation. We can get the minimum value
2893 of SEGMENT_LENGTH_B as a constant, represented by MIN_SEG_LEN_B,
2894 then either of the following two conditions can guarantee the
2897 1: DIFF <= MIN_SEG_LEN_B
2898 2: DIFF - SEGMENT_LENGTH_A < MIN_SEG_LEN_B
2902 HOST_WIDE_INT min_seg_len_b
= (tree_fits_shwi_p (dr_b1
->seg_len
)
2903 ? tree_to_shwi (dr_b1
->seg_len
)
2906 if (diff
<= min_seg_len_b
2907 || (tree_fits_shwi_p (dr_a1
->seg_len
)
2908 && diff
- tree_to_shwi (dr_a1
->seg_len
) < min_seg_len_b
))
2910 if (dump_enabled_p ())
2912 dump_printf_loc (MSG_NOTE
, vect_location
,
2913 "merging ranges for ");
2914 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2915 DR_REF (dr_a1
->dr
));
2916 dump_printf (MSG_NOTE
, ", ");
2917 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2918 DR_REF (dr_b1
->dr
));
2919 dump_printf (MSG_NOTE
, " and ");
2920 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2921 DR_REF (dr_a2
->dr
));
2922 dump_printf (MSG_NOTE
, ", ");
2923 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
2924 DR_REF (dr_b2
->dr
));
2925 dump_printf (MSG_NOTE
, "\n");
2928 dr_a1
->seg_len
= size_binop (PLUS_EXPR
,
2929 dr_a2
->seg_len
, size_int (diff
));
2930 comp_alias_ddrs
.ordered_remove (i
--);
2935 dump_printf_loc (MSG_NOTE
, vect_location
,
2936 "improved number of alias checks from %d to %d\n",
2937 may_alias_ddrs
.length (), comp_alias_ddrs
.length ());
2938 if ((int) comp_alias_ddrs
.length () >
2939 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS
))
2945 /* Check whether a non-affine read in stmt is suitable for gather load
2946 and if so, return a builtin decl for that operation. */
2949 vect_check_gather (gimple stmt
, loop_vec_info loop_vinfo
, tree
*basep
,
2950 tree
*offp
, int *scalep
)
2952 HOST_WIDE_INT scale
= 1, pbitpos
, pbitsize
;
2953 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2954 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
2955 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
2956 tree offtype
= NULL_TREE
;
2957 tree decl
, base
, off
;
2958 enum machine_mode pmode
;
2959 int punsignedp
, pvolatilep
;
2962 /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,
2963 see if we can use the def stmt of the address. */
2964 if (is_gimple_call (stmt
)
2965 && gimple_call_internal_p (stmt
)
2966 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
2967 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
)
2968 && TREE_CODE (base
) == MEM_REF
2969 && TREE_CODE (TREE_OPERAND (base
, 0)) == SSA_NAME
2970 && integer_zerop (TREE_OPERAND (base
, 1))
2971 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (base
, 0)))
2973 gimple def_stmt
= SSA_NAME_DEF_STMT (TREE_OPERAND (base
, 0));
2974 if (is_gimple_assign (def_stmt
)
2975 && gimple_assign_rhs_code (def_stmt
) == ADDR_EXPR
)
2976 base
= TREE_OPERAND (gimple_assign_rhs1 (def_stmt
), 0);
2979 /* The gather builtins need address of the form
2980 loop_invariant + vector * {1, 2, 4, 8}
2982 loop_invariant + sign_extend (vector) * { 1, 2, 4, 8 }.
2983 Unfortunately DR_BASE_ADDRESS/DR_OFFSET can be a mixture
2984 of loop invariants/SSA_NAMEs defined in the loop, with casts,
2985 multiplications and additions in it. To get a vector, we need
2986 a single SSA_NAME that will be defined in the loop and will
2987 contain everything that is not loop invariant and that can be
2988 vectorized. The following code attempts to find such a preexistng
2989 SSA_NAME OFF and put the loop invariants into a tree BASE
2990 that can be gimplified before the loop. */
2991 base
= get_inner_reference (base
, &pbitsize
, &pbitpos
, &off
,
2992 &pmode
, &punsignedp
, &pvolatilep
, false);
2993 gcc_assert (base
!= NULL_TREE
&& (pbitpos
% BITS_PER_UNIT
) == 0);
2995 if (TREE_CODE (base
) == MEM_REF
)
2997 if (!integer_zerop (TREE_OPERAND (base
, 1)))
2999 if (off
== NULL_TREE
)
3001 offset_int moff
= mem_ref_offset (base
);
3002 off
= wide_int_to_tree (sizetype
, moff
);
3005 off
= size_binop (PLUS_EXPR
, off
,
3006 fold_convert (sizetype
, TREE_OPERAND (base
, 1)));
3008 base
= TREE_OPERAND (base
, 0);
3011 base
= build_fold_addr_expr (base
);
3013 if (off
== NULL_TREE
)
3014 off
= size_zero_node
;
3016 /* If base is not loop invariant, either off is 0, then we start with just
3017 the constant offset in the loop invariant BASE and continue with base
3018 as OFF, otherwise give up.
3019 We could handle that case by gimplifying the addition of base + off
3020 into some SSA_NAME and use that as off, but for now punt. */
3021 if (!expr_invariant_in_loop_p (loop
, base
))
3023 if (!integer_zerop (off
))
3026 base
= size_int (pbitpos
/ BITS_PER_UNIT
);
3028 /* Otherwise put base + constant offset into the loop invariant BASE
3029 and continue with OFF. */
3032 base
= fold_convert (sizetype
, base
);
3033 base
= size_binop (PLUS_EXPR
, base
, size_int (pbitpos
/ BITS_PER_UNIT
));
3036 /* OFF at this point may be either a SSA_NAME or some tree expression
3037 from get_inner_reference. Try to peel off loop invariants from it
3038 into BASE as long as possible. */
3040 while (offtype
== NULL_TREE
)
3042 enum tree_code code
;
3043 tree op0
, op1
, add
= NULL_TREE
;
3045 if (TREE_CODE (off
) == SSA_NAME
)
3047 gimple def_stmt
= SSA_NAME_DEF_STMT (off
);
3049 if (expr_invariant_in_loop_p (loop
, off
))
3052 if (gimple_code (def_stmt
) != GIMPLE_ASSIGN
)
3055 op0
= gimple_assign_rhs1 (def_stmt
);
3056 code
= gimple_assign_rhs_code (def_stmt
);
3057 op1
= gimple_assign_rhs2 (def_stmt
);
3061 if (get_gimple_rhs_class (TREE_CODE (off
)) == GIMPLE_TERNARY_RHS
)
3063 code
= TREE_CODE (off
);
3064 extract_ops_from_tree (off
, &code
, &op0
, &op1
);
3068 case POINTER_PLUS_EXPR
:
3070 if (expr_invariant_in_loop_p (loop
, op0
))
3075 add
= fold_convert (sizetype
, add
);
3077 add
= size_binop (MULT_EXPR
, add
, size_int (scale
));
3078 base
= size_binop (PLUS_EXPR
, base
, add
);
3081 if (expr_invariant_in_loop_p (loop
, op1
))
3089 if (expr_invariant_in_loop_p (loop
, op1
))
3091 add
= fold_convert (sizetype
, op1
);
3092 add
= size_binop (MINUS_EXPR
, size_zero_node
, add
);
3098 if (scale
== 1 && tree_fits_shwi_p (op1
))
3100 scale
= tree_to_shwi (op1
);
3109 if (!POINTER_TYPE_P (TREE_TYPE (op0
))
3110 && !INTEGRAL_TYPE_P (TREE_TYPE (op0
)))
3112 if (TYPE_PRECISION (TREE_TYPE (op0
))
3113 == TYPE_PRECISION (TREE_TYPE (off
)))
3118 if (TYPE_PRECISION (TREE_TYPE (op0
))
3119 < TYPE_PRECISION (TREE_TYPE (off
)))
3122 offtype
= TREE_TYPE (off
);
3133 /* If at the end OFF still isn't a SSA_NAME or isn't
3134 defined in the loop, punt. */
3135 if (TREE_CODE (off
) != SSA_NAME
3136 || expr_invariant_in_loop_p (loop
, off
))
3139 if (offtype
== NULL_TREE
)
3140 offtype
= TREE_TYPE (off
);
3142 decl
= targetm
.vectorize
.builtin_gather (STMT_VINFO_VECTYPE (stmt_info
),
3144 if (decl
== NULL_TREE
)
3156 /* Function vect_analyze_data_refs.
3158 Find all the data references in the loop or basic block.
3160 The general structure of the analysis of data refs in the vectorizer is as
3162 1- vect_analyze_data_refs(loop/bb): call
3163 compute_data_dependences_for_loop/bb to find and analyze all data-refs
3164 in the loop/bb and their dependences.
3165 2- vect_analyze_dependences(): apply dependence testing using ddrs.
3166 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
3167 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
3172 vect_analyze_data_refs (loop_vec_info loop_vinfo
,
3173 bb_vec_info bb_vinfo
,
3174 int *min_vf
, unsigned *n_stmts
)
3176 struct loop
*loop
= NULL
;
3177 basic_block bb
= NULL
;
3179 vec
<data_reference_p
> datarefs
;
3180 struct data_reference
*dr
;
3183 if (dump_enabled_p ())
3184 dump_printf_loc (MSG_NOTE
, vect_location
,
3185 "=== vect_analyze_data_refs ===\n");
3189 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
3191 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3192 datarefs
= LOOP_VINFO_DATAREFS (loop_vinfo
);
3193 if (!find_loop_nest (loop
, &LOOP_VINFO_LOOP_NEST (loop_vinfo
)))
3195 if (dump_enabled_p ())
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3197 "not vectorized: loop contains function calls"
3198 " or data references that cannot be analyzed\n");
3202 for (i
= 0; i
< loop
->num_nodes
; i
++)
3204 gimple_stmt_iterator gsi
;
3206 for (gsi
= gsi_start_bb (bbs
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
3208 gimple stmt
= gsi_stmt (gsi
);
3209 if (is_gimple_debug (stmt
))
3212 if (!find_data_references_in_stmt (loop
, stmt
, &datarefs
))
3214 if (is_gimple_call (stmt
) && loop
->safelen
)
3216 tree fndecl
= gimple_call_fndecl (stmt
), op
;
3217 if (fndecl
!= NULL_TREE
)
3219 struct cgraph_node
*node
= cgraph_get_node (fndecl
);
3220 if (node
!= NULL
&& node
->simd_clones
!= NULL
)
3222 unsigned int j
, n
= gimple_call_num_args (stmt
);
3223 for (j
= 0; j
< n
; j
++)
3225 op
= gimple_call_arg (stmt
, j
);
3227 || (REFERENCE_CLASS_P (op
)
3228 && get_base_address (op
)))
3231 op
= gimple_call_lhs (stmt
);
3232 /* Ignore #pragma omp declare simd functions
3233 if they don't have data references in the
3234 call stmt itself. */
3238 || (REFERENCE_CLASS_P (op
)
3239 && get_base_address (op
)))))
3244 LOOP_VINFO_DATAREFS (loop_vinfo
) = datarefs
;
3245 if (dump_enabled_p ())
3246 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3247 "not vectorized: loop contains function "
3248 "calls or data references that cannot "
3255 LOOP_VINFO_DATAREFS (loop_vinfo
) = datarefs
;
3259 gimple_stmt_iterator gsi
;
3261 bb
= BB_VINFO_BB (bb_vinfo
);
3262 for (gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
3264 gimple stmt
= gsi_stmt (gsi
);
3265 if (is_gimple_debug (stmt
))
3268 if (!find_data_references_in_stmt (NULL
, stmt
,
3269 &BB_VINFO_DATAREFS (bb_vinfo
)))
3271 /* Mark the rest of the basic-block as unvectorizable. */
3272 for (; !gsi_end_p (gsi
); gsi_next (&gsi
))
3274 stmt
= gsi_stmt (gsi
);
3275 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt
)) = false;
3281 datarefs
= BB_VINFO_DATAREFS (bb_vinfo
);
3284 /* Go through the data-refs, check that the analysis succeeded. Update
3285 pointer from stmt_vec_info struct to DR and vectype. */
3287 FOR_EACH_VEC_ELT (datarefs
, i
, dr
)
3290 stmt_vec_info stmt_info
;
3291 tree base
, offset
, init
;
3292 bool gather
= false;
3293 bool simd_lane_access
= false;
3297 if (!dr
|| !DR_REF (dr
))
3299 if (dump_enabled_p ())
3300 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3301 "not vectorized: unhandled data-ref\n");
3305 stmt
= DR_STMT (dr
);
3306 stmt_info
= vinfo_for_stmt (stmt
);
3308 /* Discard clobbers from the dataref vector. We will remove
3309 clobber stmts during vectorization. */
3310 if (gimple_clobber_p (stmt
))
3313 if (i
== datarefs
.length () - 1)
3318 datarefs
.ordered_remove (i
);
3323 /* Check that analysis of the data-ref succeeded. */
3324 if (!DR_BASE_ADDRESS (dr
) || !DR_OFFSET (dr
) || !DR_INIT (dr
)
3329 && !TREE_THIS_VOLATILE (DR_REF (dr
))
3330 && targetm
.vectorize
.builtin_gather
!= NULL
;
3331 bool maybe_simd_lane_access
3332 = loop_vinfo
&& loop
->simduid
;
3334 /* If target supports vector gather loads, or if this might be
3335 a SIMD lane access, see if they can't be used. */
3337 && (maybe_gather
|| maybe_simd_lane_access
)
3338 && !nested_in_vect_loop_p (loop
, stmt
))
3340 struct data_reference
*newdr
3341 = create_data_ref (NULL
, loop_containing_stmt (stmt
),
3342 DR_REF (dr
), stmt
, true);
3343 gcc_assert (newdr
!= NULL
&& DR_REF (newdr
));
3344 if (DR_BASE_ADDRESS (newdr
)
3345 && DR_OFFSET (newdr
)
3348 && integer_zerop (DR_STEP (newdr
)))
3350 if (maybe_simd_lane_access
)
3352 tree off
= DR_OFFSET (newdr
);
3354 if (TREE_CODE (DR_INIT (newdr
)) == INTEGER_CST
3355 && TREE_CODE (off
) == MULT_EXPR
3356 && tree_fits_uhwi_p (TREE_OPERAND (off
, 1)))
3358 tree step
= TREE_OPERAND (off
, 1);
3359 off
= TREE_OPERAND (off
, 0);
3361 if (CONVERT_EXPR_P (off
)
3362 && TYPE_PRECISION (TREE_TYPE (TREE_OPERAND (off
,
3364 < TYPE_PRECISION (TREE_TYPE (off
)))
3365 off
= TREE_OPERAND (off
, 0);
3366 if (TREE_CODE (off
) == SSA_NAME
)
3368 gimple def
= SSA_NAME_DEF_STMT (off
);
3369 tree reft
= TREE_TYPE (DR_REF (newdr
));
3370 if (is_gimple_call (def
)
3371 && gimple_call_internal_p (def
)
3372 && (gimple_call_internal_fn (def
)
3373 == IFN_GOMP_SIMD_LANE
))
3375 tree arg
= gimple_call_arg (def
, 0);
3376 gcc_assert (TREE_CODE (arg
) == SSA_NAME
);
3377 arg
= SSA_NAME_VAR (arg
);
3378 if (arg
== loop
->simduid
3380 && tree_int_cst_equal
3381 (TYPE_SIZE_UNIT (reft
),
3384 DR_OFFSET (newdr
) = ssize_int (0);
3385 DR_STEP (newdr
) = step
;
3386 DR_ALIGNED_TO (newdr
)
3387 = size_int (BIGGEST_ALIGNMENT
);
3389 simd_lane_access
= true;
3395 if (!simd_lane_access
&& maybe_gather
)
3401 if (!gather
&& !simd_lane_access
)
3402 free_data_ref (newdr
);
3405 if (!gather
&& !simd_lane_access
)
3407 if (dump_enabled_p ())
3409 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3410 "not vectorized: data ref analysis "
3412 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3413 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3423 if (TREE_CODE (DR_BASE_ADDRESS (dr
)) == INTEGER_CST
)
3425 if (dump_enabled_p ())
3426 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3427 "not vectorized: base addr of dr is a "
3433 if (gather
|| simd_lane_access
)
3438 if (TREE_THIS_VOLATILE (DR_REF (dr
)))
3440 if (dump_enabled_p ())
3442 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3443 "not vectorized: volatile type ");
3444 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3445 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3454 if (stmt_can_throw_internal (stmt
))
3456 if (dump_enabled_p ())
3458 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3459 "not vectorized: statement can throw an "
3461 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3462 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3468 if (gather
|| simd_lane_access
)
3473 if (TREE_CODE (DR_REF (dr
)) == COMPONENT_REF
3474 && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr
), 1)))
3476 if (dump_enabled_p ())
3478 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3479 "not vectorized: statement is bitfield "
3481 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3482 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3488 if (gather
|| simd_lane_access
)
3493 base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3494 offset
= unshare_expr (DR_OFFSET (dr
));
3495 init
= unshare_expr (DR_INIT (dr
));
3497 if (is_gimple_call (stmt
)
3498 && (!gimple_call_internal_p (stmt
)
3499 || (gimple_call_internal_fn (stmt
) != IFN_MASK_LOAD
3500 && gimple_call_internal_fn (stmt
) != IFN_MASK_STORE
)))
3502 if (dump_enabled_p ())
3504 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3505 "not vectorized: dr in a call ");
3506 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3507 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3513 if (gather
|| simd_lane_access
)
3518 /* Update DR field in stmt_vec_info struct. */
3520 /* If the dataref is in an inner-loop of the loop that is considered for
3521 for vectorization, we also want to analyze the access relative to
3522 the outer-loop (DR contains information only relative to the
3523 inner-most enclosing loop). We do that by building a reference to the
3524 first location accessed by the inner-loop, and analyze it relative to
3526 if (loop
&& nested_in_vect_loop_p (loop
, stmt
))
3528 tree outer_step
, outer_base
, outer_init
;
3529 HOST_WIDE_INT pbitsize
, pbitpos
;
3531 enum machine_mode pmode
;
3532 int punsignedp
, pvolatilep
;
3533 affine_iv base_iv
, offset_iv
;
3536 /* Build a reference to the first location accessed by the
3537 inner-loop: *(BASE+INIT). (The first location is actually
3538 BASE+INIT+OFFSET, but we add OFFSET separately later). */
3539 tree inner_base
= build_fold_indirect_ref
3540 (fold_build_pointer_plus (base
, init
));
3542 if (dump_enabled_p ())
3544 dump_printf_loc (MSG_NOTE
, vect_location
,
3545 "analyze in outer-loop: ");
3546 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, inner_base
);
3547 dump_printf (MSG_NOTE
, "\n");
3550 outer_base
= get_inner_reference (inner_base
, &pbitsize
, &pbitpos
,
3551 &poffset
, &pmode
, &punsignedp
, &pvolatilep
, false);
3552 gcc_assert (outer_base
!= NULL_TREE
);
3554 if (pbitpos
% BITS_PER_UNIT
!= 0)
3556 if (dump_enabled_p ())
3557 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3558 "failed: bit offset alignment.\n");
3562 outer_base
= build_fold_addr_expr (outer_base
);
3563 if (!simple_iv (loop
, loop_containing_stmt (stmt
), outer_base
,
3566 if (dump_enabled_p ())
3567 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3568 "failed: evolution of base is not affine.\n");
3575 poffset
= fold_build2 (PLUS_EXPR
, TREE_TYPE (offset
), offset
,
3583 offset_iv
.base
= ssize_int (0);
3584 offset_iv
.step
= ssize_int (0);
3586 else if (!simple_iv (loop
, loop_containing_stmt (stmt
), poffset
,
3589 if (dump_enabled_p ())
3590 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3591 "evolution of offset is not affine.\n");
3595 outer_init
= ssize_int (pbitpos
/ BITS_PER_UNIT
);
3596 split_constant_offset (base_iv
.base
, &base_iv
.base
, &dinit
);
3597 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3598 split_constant_offset (offset_iv
.base
, &offset_iv
.base
, &dinit
);
3599 outer_init
= size_binop (PLUS_EXPR
, outer_init
, dinit
);
3601 outer_step
= size_binop (PLUS_EXPR
,
3602 fold_convert (ssizetype
, base_iv
.step
),
3603 fold_convert (ssizetype
, offset_iv
.step
));
3605 STMT_VINFO_DR_STEP (stmt_info
) = outer_step
;
3606 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
3607 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
) = base_iv
.base
;
3608 STMT_VINFO_DR_INIT (stmt_info
) = outer_init
;
3609 STMT_VINFO_DR_OFFSET (stmt_info
) =
3610 fold_convert (ssizetype
, offset_iv
.base
);
3611 STMT_VINFO_DR_ALIGNED_TO (stmt_info
) =
3612 size_int (highest_pow2_factor (offset_iv
.base
));
3614 if (dump_enabled_p ())
3616 dump_printf_loc (MSG_NOTE
, vect_location
,
3617 "\touter base_address: ");
3618 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3619 STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
3620 dump_printf (MSG_NOTE
, "\n\touter offset from base address: ");
3621 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3622 STMT_VINFO_DR_OFFSET (stmt_info
));
3623 dump_printf (MSG_NOTE
,
3624 "\n\touter constant offset from base address: ");
3625 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3626 STMT_VINFO_DR_INIT (stmt_info
));
3627 dump_printf (MSG_NOTE
, "\n\touter step: ");
3628 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3629 STMT_VINFO_DR_STEP (stmt_info
));
3630 dump_printf (MSG_NOTE
, "\n\touter aligned to: ");
3631 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3632 STMT_VINFO_DR_ALIGNED_TO (stmt_info
));
3633 dump_printf (MSG_NOTE
, "\n");
3637 if (STMT_VINFO_DATA_REF (stmt_info
))
3639 if (dump_enabled_p ())
3641 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3642 "not vectorized: more than one data ref "
3644 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3645 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3651 if (gather
|| simd_lane_access
)
3656 STMT_VINFO_DATA_REF (stmt_info
) = dr
;
3657 if (simd_lane_access
)
3659 STMT_VINFO_SIMD_LANE_ACCESS_P (stmt_info
) = true;
3660 free_data_ref (datarefs
[i
]);
3664 /* Set vectype for STMT. */
3665 scalar_type
= TREE_TYPE (DR_REF (dr
));
3666 STMT_VINFO_VECTYPE (stmt_info
)
3667 = get_vectype_for_scalar_type (scalar_type
);
3668 if (!STMT_VINFO_VECTYPE (stmt_info
))
3670 if (dump_enabled_p ())
3672 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3673 "not vectorized: no vectype for stmt: ");
3674 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3675 dump_printf (MSG_MISSED_OPTIMIZATION
, " scalar_type: ");
3676 dump_generic_expr (MSG_MISSED_OPTIMIZATION
, TDF_DETAILS
,
3678 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3684 if (gather
|| simd_lane_access
)
3686 STMT_VINFO_DATA_REF (stmt_info
) = NULL
;
3694 if (dump_enabled_p ())
3696 dump_printf_loc (MSG_NOTE
, vect_location
,
3697 "got vectype for stmt: ");
3698 dump_gimple_stmt (MSG_NOTE
, TDF_SLIM
, stmt
, 0);
3699 dump_generic_expr (MSG_NOTE
, TDF_SLIM
,
3700 STMT_VINFO_VECTYPE (stmt_info
));
3701 dump_printf (MSG_NOTE
, "\n");
3705 /* Adjust the minimal vectorization factor according to the
3707 vf
= TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info
));
3715 gather
= 0 != vect_check_gather (stmt
, loop_vinfo
, NULL
, &off
, NULL
);
3717 && get_vectype_for_scalar_type (TREE_TYPE (off
)) == NULL_TREE
)
3721 STMT_VINFO_DATA_REF (stmt_info
) = NULL
;
3723 if (dump_enabled_p ())
3725 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3726 "not vectorized: not suitable for gather "
3728 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3729 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3735 STMT_VINFO_GATHER_P (stmt_info
) = true;
3738 && TREE_CODE (DR_STEP (dr
)) != INTEGER_CST
)
3740 if (nested_in_vect_loop_p (loop
, stmt
)
3741 || !DR_IS_READ (dr
))
3743 if (dump_enabled_p ())
3745 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
3746 "not vectorized: not suitable for strided "
3748 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION
, TDF_SLIM
, stmt
, 0);
3749 dump_printf (MSG_MISSED_OPTIMIZATION
, "\n");
3753 STMT_VINFO_STRIDE_LOAD_P (stmt_info
) = true;
3757 /* If we stopped analysis at the first dataref we could not analyze
3758 when trying to vectorize a basic-block mark the rest of the datarefs
3759 as not vectorizable and truncate the vector of datarefs. That
3760 avoids spending useless time in analyzing their dependence. */
3761 if (i
!= datarefs
.length ())
3763 gcc_assert (bb_vinfo
!= NULL
);
3764 for (unsigned j
= i
; j
< datarefs
.length (); ++j
)
3766 data_reference_p dr
= datarefs
[j
];
3767 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr
))) = false;
3770 datarefs
.truncate (i
);
3777 /* Function vect_get_new_vect_var.
3779 Returns a name for a new variable. The current naming scheme appends the
3780 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
3781 the name of vectorizer generated variables, and appends that to NAME if
3785 vect_get_new_vect_var (tree type
, enum vect_var_kind var_kind
, const char *name
)
3792 case vect_simple_var
:
3795 case vect_scalar_var
:
3798 case vect_pointer_var
:
3807 char* tmp
= concat (prefix
, "_", name
, NULL
);
3808 new_vect_var
= create_tmp_reg (type
, tmp
);
3812 new_vect_var
= create_tmp_reg (type
, prefix
);
3814 return new_vect_var
;
3818 /* Function vect_create_addr_base_for_vector_ref.
3820 Create an expression that computes the address of the first memory location
3821 that will be accessed for a data reference.
3824 STMT: The statement containing the data reference.
3825 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
3826 OFFSET: Optional. If supplied, it is be added to the initial address.
3827 LOOP: Specify relative to which loop-nest should the address be computed.
3828 For example, when the dataref is in an inner-loop nested in an
3829 outer-loop that is now being vectorized, LOOP can be either the
3830 outer-loop, or the inner-loop. The first memory location accessed
3831 by the following dataref ('in' points to short):
3838 if LOOP=i_loop: &in (relative to i_loop)
3839 if LOOP=j_loop: &in+i*2B (relative to j_loop)
3842 1. Return an SSA_NAME whose value is the address of the memory location of
3843 the first vector of the data reference.
3844 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
3845 these statement(s) which define the returned SSA_NAME.
3847 FORNOW: We are only handling array accesses with step 1. */
3850 vect_create_addr_base_for_vector_ref (gimple stmt
,
3851 gimple_seq
*new_stmt_list
,
3855 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3856 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
3858 const char *base_name
;
3861 gimple_seq seq
= NULL
;
3865 tree step
= TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr
)));
3866 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3868 if (loop_vinfo
&& loop
&& loop
!= (gimple_bb (stmt
))->loop_father
)
3870 struct loop
*outer_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
3872 gcc_assert (nested_in_vect_loop_p (outer_loop
, stmt
));
3874 data_ref_base
= unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info
));
3875 base_offset
= unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info
));
3876 init
= unshare_expr (STMT_VINFO_DR_INIT (stmt_info
));
3880 data_ref_base
= unshare_expr (DR_BASE_ADDRESS (dr
));
3881 base_offset
= unshare_expr (DR_OFFSET (dr
));
3882 init
= unshare_expr (DR_INIT (dr
));
3886 base_name
= get_name (data_ref_base
);
3889 base_offset
= ssize_int (0);
3890 init
= ssize_int (0);
3891 base_name
= get_name (DR_REF (dr
));
3894 /* Create base_offset */
3895 base_offset
= size_binop (PLUS_EXPR
,
3896 fold_convert (sizetype
, base_offset
),
3897 fold_convert (sizetype
, init
));
3901 offset
= fold_build2 (MULT_EXPR
, sizetype
,
3902 fold_convert (sizetype
, offset
), step
);
3903 base_offset
= fold_build2 (PLUS_EXPR
, sizetype
,
3904 base_offset
, offset
);
3907 /* base + base_offset */
3909 addr_base
= fold_build_pointer_plus (data_ref_base
, base_offset
);
3912 addr_base
= build1 (ADDR_EXPR
,
3913 build_pointer_type (TREE_TYPE (DR_REF (dr
))),
3914 unshare_expr (DR_REF (dr
)));
3917 vect_ptr_type
= build_pointer_type (STMT_VINFO_VECTYPE (stmt_info
));
3918 addr_base
= fold_convert (vect_ptr_type
, addr_base
);
3919 dest
= vect_get_new_vect_var (vect_ptr_type
, vect_pointer_var
, base_name
);
3920 addr_base
= force_gimple_operand (addr_base
, &seq
, false, dest
);
3921 gimple_seq_add_seq (new_stmt_list
, seq
);
3923 if (DR_PTR_INFO (dr
)
3924 && TREE_CODE (addr_base
) == SSA_NAME
)
3926 duplicate_ssa_name_ptr_info (addr_base
, DR_PTR_INFO (dr
));
3928 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr_base
));
3931 if (dump_enabled_p ())
3933 dump_printf_loc (MSG_NOTE
, vect_location
, "created ");
3934 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, addr_base
);
3935 dump_printf (MSG_NOTE
, "\n");
3942 /* Function vect_create_data_ref_ptr.
3944 Create a new pointer-to-AGGR_TYPE variable (ap), that points to the first
3945 location accessed in the loop by STMT, along with the def-use update
3946 chain to appropriately advance the pointer through the loop iterations.
3947 Also set aliasing information for the pointer. This pointer is used by
3948 the callers to this function to create a memory reference expression for
3949 vector load/store access.
3952 1. STMT: a stmt that references memory. Expected to be of the form
3953 GIMPLE_ASSIGN <name, data-ref> or
3954 GIMPLE_ASSIGN <data-ref, name>.
3955 2. AGGR_TYPE: the type of the reference, which should be either a vector
3957 3. AT_LOOP: the loop where the vector memref is to be created.
3958 4. OFFSET (optional): an offset to be added to the initial address accessed
3959 by the data-ref in STMT.
3960 5. BSI: location where the new stmts are to be placed if there is no loop
3961 6. ONLY_INIT: indicate if ap is to be updated in the loop, or remain
3962 pointing to the initial address.
3965 1. Declare a new ptr to vector_type, and have it point to the base of the
3966 data reference (initial addressed accessed by the data reference).
3967 For example, for vector of type V8HI, the following code is generated:
3970 ap = (v8hi *)initial_address;
3972 if OFFSET is not supplied:
3973 initial_address = &a[init];
3974 if OFFSET is supplied:
3975 initial_address = &a[init + OFFSET];
3977 Return the initial_address in INITIAL_ADDRESS.
3979 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
3980 update the pointer in each iteration of the loop.
3982 Return the increment stmt that updates the pointer in PTR_INCR.
3984 3. Set INV_P to true if the access pattern of the data reference in the
3985 vectorized loop is invariant. Set it to false otherwise.
3987 4. Return the pointer. */
3990 vect_create_data_ref_ptr (gimple stmt
, tree aggr_type
, struct loop
*at_loop
,
3991 tree offset
, tree
*initial_address
,
3992 gimple_stmt_iterator
*gsi
, gimple
*ptr_incr
,
3993 bool only_init
, bool *inv_p
)
3995 const char *base_name
;
3996 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
3997 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
3998 struct loop
*loop
= NULL
;
3999 bool nested_in_vect_loop
= false;
4000 struct loop
*containing_loop
= NULL
;
4005 gimple_seq new_stmt_list
= NULL
;
4009 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4011 gimple_stmt_iterator incr_gsi
;
4013 tree indx_before_incr
, indx_after_incr
;
4016 bb_vec_info bb_vinfo
= STMT_VINFO_BB_VINFO (stmt_info
);
4018 gcc_assert (TREE_CODE (aggr_type
) == ARRAY_TYPE
4019 || TREE_CODE (aggr_type
) == VECTOR_TYPE
);
4023 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4024 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4025 containing_loop
= (gimple_bb (stmt
))->loop_father
;
4026 pe
= loop_preheader_edge (loop
);
4030 gcc_assert (bb_vinfo
);
4035 /* Check the step (evolution) of the load in LOOP, and record
4036 whether it's invariant. */
4037 if (nested_in_vect_loop
)
4038 step
= STMT_VINFO_DR_STEP (stmt_info
);
4040 step
= DR_STEP (STMT_VINFO_DATA_REF (stmt_info
));
4042 if (integer_zerop (step
))
4047 /* Create an expression for the first address accessed by this load
4049 base_name
= get_name (DR_BASE_ADDRESS (dr
));
4051 if (dump_enabled_p ())
4053 tree dr_base_type
= TREE_TYPE (DR_BASE_OBJECT (dr
));
4054 dump_printf_loc (MSG_NOTE
, vect_location
,
4055 "create %s-pointer variable to type: ",
4056 get_tree_code_name (TREE_CODE (aggr_type
)));
4057 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, aggr_type
);
4058 if (TREE_CODE (dr_base_type
) == ARRAY_TYPE
)
4059 dump_printf (MSG_NOTE
, " vectorizing an array ref: ");
4060 else if (TREE_CODE (dr_base_type
) == VECTOR_TYPE
)
4061 dump_printf (MSG_NOTE
, " vectorizing a vector ref: ");
4062 else if (TREE_CODE (dr_base_type
) == RECORD_TYPE
)
4063 dump_printf (MSG_NOTE
, " vectorizing a record based array ref: ");
4065 dump_printf (MSG_NOTE
, " vectorizing a pointer ref: ");
4066 dump_generic_expr (MSG_NOTE
, TDF_SLIM
, DR_BASE_OBJECT (dr
));
4067 dump_printf (MSG_NOTE
, "\n");
4070 /* (1) Create the new aggregate-pointer variable.
4071 Vector and array types inherit the alias set of their component
4072 type by default so we need to use a ref-all pointer if the data
4073 reference does not conflict with the created aggregated data
4074 reference because it is not addressable. */
4075 bool need_ref_all
= false;
4076 if (!alias_sets_conflict_p (get_alias_set (aggr_type
),
4077 get_alias_set (DR_REF (dr
))))
4078 need_ref_all
= true;
4079 /* Likewise for any of the data references in the stmt group. */
4080 else if (STMT_VINFO_GROUP_SIZE (stmt_info
) > 1)
4082 gimple orig_stmt
= STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info
);
4085 stmt_vec_info sinfo
= vinfo_for_stmt (orig_stmt
);
4086 struct data_reference
*sdr
= STMT_VINFO_DATA_REF (sinfo
);
4087 if (!alias_sets_conflict_p (get_alias_set (aggr_type
),
4088 get_alias_set (DR_REF (sdr
))))
4090 need_ref_all
= true;
4093 orig_stmt
= STMT_VINFO_GROUP_NEXT_ELEMENT (sinfo
);
4097 aggr_ptr_type
= build_pointer_type_for_mode (aggr_type
, ptr_mode
,
4099 aggr_ptr
= vect_get_new_vect_var (aggr_ptr_type
, vect_pointer_var
, base_name
);
4102 /* Note: If the dataref is in an inner-loop nested in LOOP, and we are
4103 vectorizing LOOP (i.e., outer-loop vectorization), we need to create two
4104 def-use update cycles for the pointer: one relative to the outer-loop
4105 (LOOP), which is what steps (3) and (4) below do. The other is relative
4106 to the inner-loop (which is the inner-most loop containing the dataref),
4107 and this is done be step (5) below.
4109 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
4110 inner-most loop, and so steps (3),(4) work the same, and step (5) is
4111 redundant. Steps (3),(4) create the following:
4114 LOOP: vp1 = phi(vp0,vp2)
4120 If there is an inner-loop nested in loop, then step (5) will also be
4121 applied, and an additional update in the inner-loop will be created:
4124 LOOP: vp1 = phi(vp0,vp2)
4126 inner: vp3 = phi(vp1,vp4)
4127 vp4 = vp3 + inner_step
4133 /* (2) Calculate the initial address of the aggregate-pointer, and set
4134 the aggregate-pointer to point to it before the loop. */
4136 /* Create: (&(base[init_val+offset]) in the loop preheader. */
4138 new_temp
= vect_create_addr_base_for_vector_ref (stmt
, &new_stmt_list
,
4144 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, new_stmt_list
);
4145 gcc_assert (!new_bb
);
4148 gsi_insert_seq_before (gsi
, new_stmt_list
, GSI_SAME_STMT
);
4151 *initial_address
= new_temp
;
4153 /* Create: p = (aggr_type *) initial_base */
4154 if (TREE_CODE (new_temp
) != SSA_NAME
4155 || !useless_type_conversion_p (aggr_ptr_type
, TREE_TYPE (new_temp
)))
4157 vec_stmt
= gimple_build_assign (aggr_ptr
,
4158 fold_convert (aggr_ptr_type
, new_temp
));
4159 aggr_ptr_init
= make_ssa_name (aggr_ptr
, vec_stmt
);
4160 /* Copy the points-to information if it exists. */
4161 if (DR_PTR_INFO (dr
))
4162 duplicate_ssa_name_ptr_info (aggr_ptr_init
, DR_PTR_INFO (dr
));
4163 gimple_assign_set_lhs (vec_stmt
, aggr_ptr_init
);
4166 new_bb
= gsi_insert_on_edge_immediate (pe
, vec_stmt
);
4167 gcc_assert (!new_bb
);
4170 gsi_insert_before (gsi
, vec_stmt
, GSI_SAME_STMT
);
4173 aggr_ptr_init
= new_temp
;
4175 /* (3) Handle the updating of the aggregate-pointer inside the loop.
4176 This is needed when ONLY_INIT is false, and also when AT_LOOP is the
4177 inner-loop nested in LOOP (during outer-loop vectorization). */
4179 /* No update in loop is required. */
4180 if (only_init
&& (!loop_vinfo
|| at_loop
== loop
))
4181 aptr
= aggr_ptr_init
;
4184 /* The step of the aggregate pointer is the type size. */
4185 tree iv_step
= TYPE_SIZE_UNIT (aggr_type
);
4186 /* One exception to the above is when the scalar step of the load in
4187 LOOP is zero. In this case the step here is also zero. */
4189 iv_step
= size_zero_node
;
4190 else if (tree_int_cst_sgn (step
) == -1)
4191 iv_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (iv_step
), iv_step
);
4193 standard_iv_increment_position (loop
, &incr_gsi
, &insert_after
);
4195 create_iv (aggr_ptr_init
,
4196 fold_convert (aggr_ptr_type
, iv_step
),
4197 aggr_ptr
, loop
, &incr_gsi
, insert_after
,
4198 &indx_before_incr
, &indx_after_incr
);
4199 incr
= gsi_stmt (incr_gsi
);
4200 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4202 /* Copy the points-to information if it exists. */
4203 if (DR_PTR_INFO (dr
))
4205 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
4206 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
4211 aptr
= indx_before_incr
;
4214 if (!nested_in_vect_loop
|| only_init
)
4218 /* (4) Handle the updating of the aggregate-pointer inside the inner-loop
4219 nested in LOOP, if exists. */
4221 gcc_assert (nested_in_vect_loop
);
4224 standard_iv_increment_position (containing_loop
, &incr_gsi
,
4226 create_iv (aptr
, fold_convert (aggr_ptr_type
, DR_STEP (dr
)), aggr_ptr
,
4227 containing_loop
, &incr_gsi
, insert_after
, &indx_before_incr
,
4229 incr
= gsi_stmt (incr_gsi
);
4230 set_vinfo_for_stmt (incr
, new_stmt_vec_info (incr
, loop_vinfo
, NULL
));
4232 /* Copy the points-to information if it exists. */
4233 if (DR_PTR_INFO (dr
))
4235 duplicate_ssa_name_ptr_info (indx_before_incr
, DR_PTR_INFO (dr
));
4236 duplicate_ssa_name_ptr_info (indx_after_incr
, DR_PTR_INFO (dr
));
4241 return indx_before_incr
;
4248 /* Function bump_vector_ptr
4250 Increment a pointer (to a vector type) by vector-size. If requested,
4251 i.e. if PTR-INCR is given, then also connect the new increment stmt
4252 to the existing def-use update-chain of the pointer, by modifying
4253 the PTR_INCR as illustrated below:
4255 The pointer def-use update-chain before this function:
4256 DATAREF_PTR = phi (p_0, p_2)
4258 PTR_INCR: p_2 = DATAREF_PTR + step
4260 The pointer def-use update-chain after this function:
4261 DATAREF_PTR = phi (p_0, p_2)
4263 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
4265 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
4268 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
4270 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
4271 the loop. The increment amount across iterations is expected
4273 BSI - location where the new update stmt is to be placed.
4274 STMT - the original scalar memory-access stmt that is being vectorized.
4275 BUMP - optional. The offset by which to bump the pointer. If not given,
4276 the offset is assumed to be vector_size.
4278 Output: Return NEW_DATAREF_PTR as illustrated above.
4283 bump_vector_ptr (tree dataref_ptr
, gimple ptr_incr
, gimple_stmt_iterator
*gsi
,
4284 gimple stmt
, tree bump
)
4286 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4287 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4288 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4289 tree update
= TYPE_SIZE_UNIT (vectype
);
4292 use_operand_p use_p
;
4293 tree new_dataref_ptr
;
4298 new_dataref_ptr
= copy_ssa_name (dataref_ptr
, NULL
);
4299 incr_stmt
= gimple_build_assign_with_ops (POINTER_PLUS_EXPR
, new_dataref_ptr
,
4300 dataref_ptr
, update
);
4301 vect_finish_stmt_generation (stmt
, incr_stmt
, gsi
);
4303 /* Copy the points-to information if it exists. */
4304 if (DR_PTR_INFO (dr
))
4306 duplicate_ssa_name_ptr_info (new_dataref_ptr
, DR_PTR_INFO (dr
));
4307 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (new_dataref_ptr
));
4311 return new_dataref_ptr
;
4313 /* Update the vector-pointer's cross-iteration increment. */
4314 FOR_EACH_SSA_USE_OPERAND (use_p
, ptr_incr
, iter
, SSA_OP_USE
)
4316 tree use
= USE_FROM_PTR (use_p
);
4318 if (use
== dataref_ptr
)
4319 SET_USE (use_p
, new_dataref_ptr
);
4321 gcc_assert (tree_int_cst_compare (use
, update
) == 0);
4324 return new_dataref_ptr
;
4328 /* Function vect_create_destination_var.
4330 Create a new temporary of type VECTYPE. */
4333 vect_create_destination_var (tree scalar_dest
, tree vectype
)
4339 enum vect_var_kind kind
;
4341 kind
= vectype
? vect_simple_var
: vect_scalar_var
;
4342 type
= vectype
? vectype
: TREE_TYPE (scalar_dest
);
4344 gcc_assert (TREE_CODE (scalar_dest
) == SSA_NAME
);
4346 name
= get_name (scalar_dest
);
4348 asprintf (&new_name
, "%s_%u", name
, SSA_NAME_VERSION (scalar_dest
));
4350 asprintf (&new_name
, "_%u", SSA_NAME_VERSION (scalar_dest
));
4351 vec_dest
= vect_get_new_vect_var (type
, kind
, new_name
);
4357 /* Function vect_grouped_store_supported.
4359 Returns TRUE if interleave high and interleave low permutations
4360 are supported, and FALSE otherwise. */
4363 vect_grouped_store_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4365 enum machine_mode mode
= TYPE_MODE (vectype
);
4367 /* vect_permute_store_chain requires the group size to be a power of two. */
4368 if (exact_log2 (count
) == -1)
4370 if (dump_enabled_p ())
4371 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4372 "the size of the group of accesses"
4373 " is not a power of 2\n");
4377 /* Check that the permutation is supported. */
4378 if (VECTOR_MODE_P (mode
))
4380 unsigned int i
, nelt
= GET_MODE_NUNITS (mode
);
4381 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4382 for (i
= 0; i
< nelt
/ 2; i
++)
4385 sel
[i
* 2 + 1] = i
+ nelt
;
4387 if (can_vec_perm_p (mode
, false, sel
))
4389 for (i
= 0; i
< nelt
; i
++)
4391 if (can_vec_perm_p (mode
, false, sel
))
4396 if (dump_enabled_p ())
4397 dump_printf (MSG_MISSED_OPTIMIZATION
,
4398 "interleave op not supported by target.\n");
4403 /* Return TRUE if vec_store_lanes is available for COUNT vectors of
4407 vect_store_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4409 return vect_lanes_optab_supported_p ("vec_store_lanes",
4410 vec_store_lanes_optab
,
4415 /* Function vect_permute_store_chain.
4417 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
4418 a power of 2, generate interleave_high/low stmts to reorder the data
4419 correctly for the stores. Return the final references for stores in
4422 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4423 The input is 4 vectors each containing 8 elements. We assign a number to
4424 each element, the input sequence is:
4426 1st vec: 0 1 2 3 4 5 6 7
4427 2nd vec: 8 9 10 11 12 13 14 15
4428 3rd vec: 16 17 18 19 20 21 22 23
4429 4th vec: 24 25 26 27 28 29 30 31
4431 The output sequence should be:
4433 1st vec: 0 8 16 24 1 9 17 25
4434 2nd vec: 2 10 18 26 3 11 19 27
4435 3rd vec: 4 12 20 28 5 13 21 30
4436 4th vec: 6 14 22 30 7 15 23 31
4438 i.e., we interleave the contents of the four vectors in their order.
4440 We use interleave_high/low instructions to create such output. The input of
4441 each interleave_high/low operation is two vectors:
4444 the even elements of the result vector are obtained left-to-right from the
4445 high/low elements of the first vector. The odd elements of the result are
4446 obtained left-to-right from the high/low elements of the second vector.
4447 The output of interleave_high will be: 0 4 1 5
4448 and of interleave_low: 2 6 3 7
4451 The permutation is done in log LENGTH stages. In each stage interleave_high
4452 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
4453 where the first argument is taken from the first half of DR_CHAIN and the
4454 second argument from it's second half.
4457 I1: interleave_high (1st vec, 3rd vec)
4458 I2: interleave_low (1st vec, 3rd vec)
4459 I3: interleave_high (2nd vec, 4th vec)
4460 I4: interleave_low (2nd vec, 4th vec)
4462 The output for the first stage is:
4464 I1: 0 16 1 17 2 18 3 19
4465 I2: 4 20 5 21 6 22 7 23
4466 I3: 8 24 9 25 10 26 11 27
4467 I4: 12 28 13 29 14 30 15 31
4469 The output of the second stage, i.e. the final result is:
4471 I1: 0 8 16 24 1 9 17 25
4472 I2: 2 10 18 26 3 11 19 27
4473 I3: 4 12 20 28 5 13 21 30
4474 I4: 6 14 22 30 7 15 23 31. */
4477 vect_permute_store_chain (vec
<tree
> dr_chain
,
4478 unsigned int length
,
4480 gimple_stmt_iterator
*gsi
,
4481 vec
<tree
> *result_chain
)
4483 tree vect1
, vect2
, high
, low
;
4485 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
4486 tree perm_mask_low
, perm_mask_high
;
4488 unsigned int j
, nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
4489 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4491 result_chain
->quick_grow (length
);
4492 memcpy (result_chain
->address (), dr_chain
.address (),
4493 length
* sizeof (tree
));
4495 for (i
= 0, n
= nelt
/ 2; i
< n
; i
++)
4498 sel
[i
* 2 + 1] = i
+ nelt
;
4500 perm_mask_high
= vect_gen_perm_mask (vectype
, sel
);
4501 gcc_assert (perm_mask_high
!= NULL
);
4503 for (i
= 0; i
< nelt
; i
++)
4505 perm_mask_low
= vect_gen_perm_mask (vectype
, sel
);
4506 gcc_assert (perm_mask_low
!= NULL
);
4508 for (i
= 0, n
= exact_log2 (length
); i
< n
; i
++)
4510 for (j
= 0; j
< length
/2; j
++)
4512 vect1
= dr_chain
[j
];
4513 vect2
= dr_chain
[j
+length
/2];
4515 /* Create interleaving stmt:
4516 high = VEC_PERM_EXPR <vect1, vect2, {0, nelt, 1, nelt+1, ...}> */
4517 high
= make_temp_ssa_name (vectype
, NULL
, "vect_inter_high");
4519 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, high
,
4520 vect1
, vect2
, perm_mask_high
);
4521 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4522 (*result_chain
)[2*j
] = high
;
4524 /* Create interleaving stmt:
4525 low = VEC_PERM_EXPR <vect1, vect2, {nelt/2, nelt*3/2, nelt/2+1,
4526 nelt*3/2+1, ...}> */
4527 low
= make_temp_ssa_name (vectype
, NULL
, "vect_inter_low");
4529 = gimple_build_assign_with_ops (VEC_PERM_EXPR
, low
,
4530 vect1
, vect2
, perm_mask_low
);
4531 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
4532 (*result_chain
)[2*j
+1] = low
;
4534 memcpy (dr_chain
.address (), result_chain
->address (),
4535 length
* sizeof (tree
));
4539 /* Function vect_setup_realignment
4541 This function is called when vectorizing an unaligned load using
4542 the dr_explicit_realign[_optimized] scheme.
4543 This function generates the following code at the loop prolog:
4546 x msq_init = *(floor(p)); # prolog load
4547 realignment_token = call target_builtin;
4549 x msq = phi (msq_init, ---)
4551 The stmts marked with x are generated only for the case of
4552 dr_explicit_realign_optimized.
4554 The code above sets up a new (vector) pointer, pointing to the first
4555 location accessed by STMT, and a "floor-aligned" load using that pointer.
4556 It also generates code to compute the "realignment-token" (if the relevant
4557 target hook was defined), and creates a phi-node at the loop-header bb
4558 whose arguments are the result of the prolog-load (created by this
4559 function) and the result of a load that takes place in the loop (to be
4560 created by the caller to this function).
4562 For the case of dr_explicit_realign_optimized:
4563 The caller to this function uses the phi-result (msq) to create the
4564 realignment code inside the loop, and sets up the missing phi argument,
4567 msq = phi (msq_init, lsq)
4568 lsq = *(floor(p')); # load in loop
4569 result = realign_load (msq, lsq, realignment_token);
4571 For the case of dr_explicit_realign:
4573 msq = *(floor(p)); # load in loop
4575 lsq = *(floor(p')); # load in loop
4576 result = realign_load (msq, lsq, realignment_token);
4579 STMT - (scalar) load stmt to be vectorized. This load accesses
4580 a memory location that may be unaligned.
4581 BSI - place where new code is to be inserted.
4582 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
4586 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
4587 target hook, if defined.
4588 Return value - the result of the loop-header phi node. */
4591 vect_setup_realignment (gimple stmt
, gimple_stmt_iterator
*gsi
,
4592 tree
*realignment_token
,
4593 enum dr_alignment_support alignment_support_scheme
,
4595 struct loop
**at_loop
)
4597 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
4598 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
4599 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
4600 struct data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
4601 struct loop
*loop
= NULL
;
4603 tree scalar_dest
= gimple_assign_lhs (stmt
);
4610 tree msq_init
= NULL_TREE
;
4613 tree msq
= NULL_TREE
;
4614 gimple_seq stmts
= NULL
;
4616 bool compute_in_loop
= false;
4617 bool nested_in_vect_loop
= false;
4618 struct loop
*containing_loop
= (gimple_bb (stmt
))->loop_father
;
4619 struct loop
*loop_for_initial_load
= NULL
;
4623 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
4624 nested_in_vect_loop
= nested_in_vect_loop_p (loop
, stmt
);
4627 gcc_assert (alignment_support_scheme
== dr_explicit_realign
4628 || alignment_support_scheme
== dr_explicit_realign_optimized
);
4630 /* We need to generate three things:
4631 1. the misalignment computation
4632 2. the extra vector load (for the optimized realignment scheme).
4633 3. the phi node for the two vectors from which the realignment is
4634 done (for the optimized realignment scheme). */
4636 /* 1. Determine where to generate the misalignment computation.
4638 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
4639 calculation will be generated by this function, outside the loop (in the
4640 preheader). Otherwise, INIT_ADDR had already been computed for us by the
4641 caller, inside the loop.
4643 Background: If the misalignment remains fixed throughout the iterations of
4644 the loop, then both realignment schemes are applicable, and also the
4645 misalignment computation can be done outside LOOP. This is because we are
4646 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
4647 are a multiple of VS (the Vector Size), and therefore the misalignment in
4648 different vectorized LOOP iterations is always the same.
4649 The problem arises only if the memory access is in an inner-loop nested
4650 inside LOOP, which is now being vectorized using outer-loop vectorization.
4651 This is the only case when the misalignment of the memory access may not
4652 remain fixed throughout the iterations of the inner-loop (as explained in
4653 detail in vect_supportable_dr_alignment). In this case, not only is the
4654 optimized realignment scheme not applicable, but also the misalignment
4655 computation (and generation of the realignment token that is passed to
4656 REALIGN_LOAD) have to be done inside the loop.
4658 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
4659 or not, which in turn determines if the misalignment is computed inside
4660 the inner-loop, or outside LOOP. */
4662 if (init_addr
!= NULL_TREE
|| !loop_vinfo
)
4664 compute_in_loop
= true;
4665 gcc_assert (alignment_support_scheme
== dr_explicit_realign
);
4669 /* 2. Determine where to generate the extra vector load.
4671 For the optimized realignment scheme, instead of generating two vector
4672 loads in each iteration, we generate a single extra vector load in the
4673 preheader of the loop, and in each iteration reuse the result of the
4674 vector load from the previous iteration. In case the memory access is in
4675 an inner-loop nested inside LOOP, which is now being vectorized using
4676 outer-loop vectorization, we need to determine whether this initial vector
4677 load should be generated at the preheader of the inner-loop, or can be
4678 generated at the preheader of LOOP. If the memory access has no evolution
4679 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
4680 to be generated inside LOOP (in the preheader of the inner-loop). */
4682 if (nested_in_vect_loop
)
4684 tree outerloop_step
= STMT_VINFO_DR_STEP (stmt_info
);
4685 bool invariant_in_outerloop
=
4686 (tree_int_cst_compare (outerloop_step
, size_zero_node
) == 0);
4687 loop_for_initial_load
= (invariant_in_outerloop
? loop
: loop
->inner
);
4690 loop_for_initial_load
= loop
;
4692 *at_loop
= loop_for_initial_load
;
4694 if (loop_for_initial_load
)
4695 pe
= loop_preheader_edge (loop_for_initial_load
);
4697 /* 3. For the case of the optimized realignment, create the first vector
4698 load at the loop preheader. */
4700 if (alignment_support_scheme
== dr_explicit_realign_optimized
)
4702 /* Create msq_init = *(floor(p1)) in the loop preheader */
4704 gcc_assert (!compute_in_loop
);
4705 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4706 ptr
= vect_create_data_ref_ptr (stmt
, vectype
, loop_for_initial_load
,
4707 NULL_TREE
, &init_addr
, NULL
, &inc
,
4709 new_temp
= copy_ssa_name (ptr
, NULL
);
4710 new_stmt
= gimple_build_assign_with_ops
4711 (BIT_AND_EXPR
, new_temp
, ptr
,
4712 build_int_cst (TREE_TYPE (ptr
),
4713 -(HOST_WIDE_INT
)TYPE_ALIGN_UNIT (vectype
)));
4714 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4715 gcc_assert (!new_bb
);
4717 = build2 (MEM_REF
, TREE_TYPE (vec_dest
), new_temp
,
4718 build_int_cst (reference_alias_ptr_type (DR_REF (dr
)), 0));
4719 new_stmt
= gimple_build_assign (vec_dest
, data_ref
);
4720 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4721 gimple_assign_set_lhs (new_stmt
, new_temp
);
4724 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4725 gcc_assert (!new_bb
);
4728 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4730 msq_init
= gimple_assign_lhs (new_stmt
);
4733 /* 4. Create realignment token using a target builtin, if available.
4734 It is done either inside the containing loop, or before LOOP (as
4735 determined above). */
4737 if (targetm
.vectorize
.builtin_mask_for_load
)
4741 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
4744 /* Generate the INIT_ADDR computation outside LOOP. */
4745 init_addr
= vect_create_addr_base_for_vector_ref (stmt
, &stmts
,
4749 pe
= loop_preheader_edge (loop
);
4750 new_bb
= gsi_insert_seq_on_edge_immediate (pe
, stmts
);
4751 gcc_assert (!new_bb
);
4754 gsi_insert_seq_before (gsi
, stmts
, GSI_SAME_STMT
);
4757 builtin_decl
= targetm
.vectorize
.builtin_mask_for_load ();
4758 new_stmt
= gimple_build_call (builtin_decl
, 1, init_addr
);
4760 vect_create_destination_var (scalar_dest
,
4761 gimple_call_return_type (new_stmt
));
4762 new_temp
= make_ssa_name (vec_dest
, new_stmt
);
4763 gimple_call_set_lhs (new_stmt
, new_temp
);
4765 if (compute_in_loop
)
4766 gsi_insert_before (gsi
, new_stmt
, GSI_SAME_STMT
);
4769 /* Generate the misalignment computation outside LOOP. */
4770 pe
= loop_preheader_edge (loop
);
4771 new_bb
= gsi_insert_on_edge_immediate (pe
, new_stmt
);
4772 gcc_assert (!new_bb
);
4775 *realignment_token
= gimple_call_lhs (new_stmt
);
4777 /* The result of the CALL_EXPR to this builtin is determined from
4778 the value of the parameter and no global variables are touched
4779 which makes the builtin a "const" function. Requiring the
4780 builtin to have the "const" attribute makes it unnecessary
4781 to call mark_call_clobbered. */
4782 gcc_assert (TREE_READONLY (builtin_decl
));
4785 if (alignment_support_scheme
== dr_explicit_realign
)
4788 gcc_assert (!compute_in_loop
);
4789 gcc_assert (alignment_support_scheme
== dr_explicit_realign_optimized
);
4792 /* 5. Create msq = phi <msq_init, lsq> in loop */
4794 pe
= loop_preheader_edge (containing_loop
);
4795 vec_dest
= vect_create_destination_var (scalar_dest
, vectype
);
4796 msq
= make_ssa_name (vec_dest
, NULL
);
4797 phi_stmt
= create_phi_node (msq
, containing_loop
->header
);
4798 add_phi_arg (phi_stmt
, msq_init
, pe
, UNKNOWN_LOCATION
);
4804 /* Function vect_grouped_load_supported.
4806 Returns TRUE if even and odd permutations are supported,
4807 and FALSE otherwise. */
4810 vect_grouped_load_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4812 enum machine_mode mode
= TYPE_MODE (vectype
);
4814 /* vect_permute_load_chain requires the group size to be equal to 3 or
4815 be a power of two. */
4816 if (count
!= 3 && exact_log2 (count
) == -1)
4818 if (dump_enabled_p ())
4819 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4820 "the size of the group of accesses"
4821 " is not a power of 2 or not equal to 3\n");
4825 /* Check that the permutation is supported. */
4826 if (VECTOR_MODE_P (mode
))
4828 unsigned int i
, j
, nelt
= GET_MODE_NUNITS (mode
);
4829 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4834 for (k
= 0; k
< 3; k
++)
4836 for (i
= 0; i
< nelt
; i
++)
4837 if (3 * i
+ k
< 2 * nelt
)
4841 if (!can_vec_perm_p (mode
, false, sel
))
4843 if (dump_enabled_p ())
4844 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4845 "shuffle of 3 loads is not supported by"
4849 for (i
= 0, j
= 0; i
< nelt
; i
++)
4850 if (3 * i
+ k
< 2 * nelt
)
4853 sel
[i
] = nelt
+ ((nelt
+ k
) % 3) + 3 * (j
++);
4854 if (!can_vec_perm_p (mode
, false, sel
))
4856 if (dump_enabled_p ())
4857 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4858 "shuffle of 3 loads is not supported by"
4867 /* If length is not equal to 3 then only power of 2 is supported. */
4868 gcc_assert (exact_log2 (count
) != -1);
4869 for (i
= 0; i
< nelt
; i
++)
4871 if (can_vec_perm_p (mode
, false, sel
))
4873 for (i
= 0; i
< nelt
; i
++)
4875 if (can_vec_perm_p (mode
, false, sel
))
4881 if (dump_enabled_p ())
4882 dump_printf_loc (MSG_MISSED_OPTIMIZATION
, vect_location
,
4883 "extract even/odd not supported by target\n");
4887 /* Return TRUE if vec_load_lanes is available for COUNT vectors of
4891 vect_load_lanes_supported (tree vectype
, unsigned HOST_WIDE_INT count
)
4893 return vect_lanes_optab_supported_p ("vec_load_lanes",
4894 vec_load_lanes_optab
,
4898 /* Function vect_permute_load_chain.
4900 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
4901 a power of 2 or equal to 3, generate extract_even/odd stmts to reorder
4902 the input data correctly. Return the final references for loads in
4905 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
4906 The input is 4 vectors each containing 8 elements. We assign a number to each
4907 element, the input sequence is:
4909 1st vec: 0 1 2 3 4 5 6 7
4910 2nd vec: 8 9 10 11 12 13 14 15
4911 3rd vec: 16 17 18 19 20 21 22 23
4912 4th vec: 24 25 26 27 28 29 30 31
4914 The output sequence should be:
4916 1st vec: 0 4 8 12 16 20 24 28
4917 2nd vec: 1 5 9 13 17 21 25 29
4918 3rd vec: 2 6 10 14 18 22 26 30
4919 4th vec: 3 7 11 15 19 23 27 31
4921 i.e., the first output vector should contain the first elements of each
4922 interleaving group, etc.
4924 We use extract_even/odd instructions to create such output. The input of
4925 each extract_even/odd operation is two vectors
4929 and the output is the vector of extracted even/odd elements. The output of
4930 extract_even will be: 0 2 4 6
4931 and of extract_odd: 1 3 5 7
4934 The permutation is done in log LENGTH stages. In each stage extract_even
4935 and extract_odd stmts are created for each pair of vectors in DR_CHAIN in
4936 their order. In our example,
4938 E1: extract_even (1st vec, 2nd vec)
4939 E2: extract_odd (1st vec, 2nd vec)
4940 E3: extract_even (3rd vec, 4th vec)
4941 E4: extract_odd (3rd vec, 4th vec)
4943 The output for the first stage will be:
4945 E1: 0 2 4 6 8 10 12 14
4946 E2: 1 3 5 7 9 11 13 15
4947 E3: 16 18 20 22 24 26 28 30
4948 E4: 17 19 21 23 25 27 29 31
4950 In order to proceed and create the correct sequence for the next stage (or
4951 for the correct output, if the second stage is the last one, as in our
4952 example), we first put the output of extract_even operation and then the
4953 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
4954 The input for the second stage is:
4956 1st vec (E1): 0 2 4 6 8 10 12 14
4957 2nd vec (E3): 16 18 20 22 24 26 28 30
4958 3rd vec (E2): 1 3 5 7 9 11 13 15
4959 4th vec (E4): 17 19 21 23 25 27 29 31
4961 The output of the second stage:
4963 E1: 0 4 8 12 16 20 24 28
4964 E2: 2 6 10 14 18 22 26 30
4965 E3: 1 5 9 13 17 21 25 29
4966 E4: 3 7 11 15 19 23 27 31
4968 And RESULT_CHAIN after reordering:
4970 1st vec (E1): 0 4 8 12 16 20 24 28
4971 2nd vec (E3): 1 5 9 13 17 21 25 29
4972 3rd vec (E2): 2 6 10 14 18 22 26 30
4973 4th vec (E4): 3 7 11 15 19 23 27 31. */
4976 vect_permute_load_chain (vec
<tree
> dr_chain
,
4977 unsigned int length
,
4979 gimple_stmt_iterator
*gsi
,
4980 vec
<tree
> *result_chain
)
4982 tree data_ref
, first_vect
, second_vect
;
4983 tree perm_mask_even
, perm_mask_odd
;
4984 tree perm3_mask_low
, perm3_mask_high
;
4986 tree vectype
= STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt
));
4987 unsigned int i
, j
, log_length
= exact_log2 (length
);
4988 unsigned nelt
= TYPE_VECTOR_SUBPARTS (vectype
);
4989 unsigned char *sel
= XALLOCAVEC (unsigned char, nelt
);
4991 result_chain
->quick_grow (length
);
4992 memcpy (result_chain
->address (), dr_chain
.address (),
4993 length
* sizeof (tree
));
4999 for (k
= 0; k
< 3; k
++)
5001 for (i
= 0; i
< nelt
; i
++)
5002 if (3 * i
+ k
< 2 * nelt
)
5006 perm3_mask_low
= vect_gen_perm_mask (vectype
, sel
);
5007 gcc_assert (perm3_mask_low
!= NULL
);
5009 for (i
= 0, j
= 0; i
< nelt
; i
++)
5010 if (3 * i
+ k
< 2 * nelt
)
5013 sel
[i
] = nelt
+ ((nelt
+ k
) % 3) + 3 * (j
++);
5015 perm3_mask_high
= vect_gen_perm_mask (vectype
, sel
);
5016 gcc_assert (perm3_mask_high
!= NULL
);
5018 first_vect
= dr_chain
[0];
5019 second_vect
= dr_chain
[1];
5021 /* Create interleaving stmt (low part of):
5022 low = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5024 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_suffle3_low");
5025 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5026 first_vect
, second_vect
,
5028 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5030 /* Create interleaving stmt (high part of):
5031 high = VEC_PERM_EXPR <first_vect, second_vect2, {k, 3 + k, 6 + k,
5033 first_vect
= data_ref
;
5034 second_vect
= dr_chain
[2];
5035 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_suffle3_high");
5036 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5037 first_vect
, second_vect
,
5039 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5040 (*result_chain
)[k
] = data_ref
;
5045 /* If length is not equal to 3 then only power of 2 is supported. */
5046 gcc_assert (exact_log2 (length
) != -1);
5048 for (i
= 0; i
< nelt
; ++i
)
5050 perm_mask_even
= vect_gen_perm_mask (vectype
, sel
);
5051 gcc_assert (perm_mask_even
!= NULL
);
5053 for (i
= 0; i
< nelt
; ++i
)
5055 perm_mask_odd
= vect_gen_perm_mask (vectype
, sel
);
5056 gcc_assert (perm_mask_odd
!= NULL
);
5058 for (i
= 0; i
< log_length
; i
++)
5060 for (j
= 0; j
< length
; j
+= 2)
5062 first_vect
= dr_chain
[j
];
5063 second_vect
= dr_chain
[j
+1];
5065 /* data_ref = permute_even (first_data_ref, second_data_ref); */
5066 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_perm_even");
5067 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5068 first_vect
, second_vect
,
5070 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5071 (*result_chain
)[j
/2] = data_ref
;
5073 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
5074 data_ref
= make_temp_ssa_name (vectype
, NULL
, "vect_perm_odd");
5075 perm_stmt
= gimple_build_assign_with_ops (VEC_PERM_EXPR
, data_ref
,
5076 first_vect
, second_vect
,
5078 vect_finish_stmt_generation (stmt
, perm_stmt
, gsi
);
5079 (*result_chain
)[j
/2+length
/2] = data_ref
;
5081 memcpy (dr_chain
.address (), result_chain
->address (),
5082 length
* sizeof (tree
));
5087 /* Function vect_transform_grouped_load.
5089 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
5090 to perform their permutation and ascribe the result vectorized statements to
5091 the scalar statements.
5095 vect_transform_grouped_load (gimple stmt
, vec
<tree
> dr_chain
, int size
,
5096 gimple_stmt_iterator
*gsi
)
5098 vec
<tree
> result_chain
= vNULL
;
5100 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
5101 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
5102 vectors, that are ready for vector computation. */
5103 result_chain
.create (size
);
5104 vect_permute_load_chain (dr_chain
, size
, stmt
, gsi
, &result_chain
);
5105 vect_record_grouped_load_vectors (stmt
, result_chain
);
5106 result_chain
.release ();
5109 /* RESULT_CHAIN contains the output of a group of grouped loads that were
5110 generated as part of the vectorization of STMT. Assign the statement
5111 for each vector to the associated scalar statement. */
5114 vect_record_grouped_load_vectors (gimple stmt
, vec
<tree
> result_chain
)
5116 gimple first_stmt
= GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt
));
5117 gimple next_stmt
, new_stmt
;
5118 unsigned int i
, gap_count
;
5121 /* Put a permuted data-ref in the VECTORIZED_STMT field.
5122 Since we scan the chain starting from it's first node, their order
5123 corresponds the order of data-refs in RESULT_CHAIN. */
5124 next_stmt
= first_stmt
;
5126 FOR_EACH_VEC_ELT (result_chain
, i
, tmp_data_ref
)
5131 /* Skip the gaps. Loads created for the gaps will be removed by dead
5132 code elimination pass later. No need to check for the first stmt in
5133 the group, since it always exists.
5134 GROUP_GAP is the number of steps in elements from the previous
5135 access (if there is no gap GROUP_GAP is 1). We skip loads that
5136 correspond to the gaps. */
5137 if (next_stmt
!= first_stmt
5138 && gap_count
< GROUP_GAP (vinfo_for_stmt (next_stmt
)))
5146 new_stmt
= SSA_NAME_DEF_STMT (tmp_data_ref
);
5147 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
5148 copies, and we put the new vector statement in the first available
5150 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)))
5151 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
)) = new_stmt
;
5154 if (!GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
5157 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt
));
5159 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
));
5162 prev_stmt
= rel_stmt
;
5164 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt
));
5167 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt
)) =
5172 next_stmt
= GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt
));
5174 /* If NEXT_STMT accesses the same DR as the previous statement,
5175 put the same TMP_DATA_REF as its vectorized statement; otherwise
5176 get the next data-ref from RESULT_CHAIN. */
5177 if (!next_stmt
|| !GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt
)))
5183 /* Function vect_force_dr_alignment_p.
5185 Returns whether the alignment of a DECL can be forced to be aligned
5186 on ALIGNMENT bit boundary. */
5189 vect_can_force_dr_alignment_p (const_tree decl
, unsigned int alignment
)
5191 if (TREE_CODE (decl
) != VAR_DECL
)
5194 /* We cannot change alignment of common or external symbols as another
5195 translation unit may contain a definition with lower alignment.
5196 The rules of common symbol linking mean that the definition
5197 will override the common symbol. The same is true for constant
5198 pool entries which may be shared and are not properly merged
5200 if (DECL_EXTERNAL (decl
)
5201 || DECL_COMMON (decl
)
5202 || DECL_IN_CONSTANT_POOL (decl
))
5205 if (TREE_ASM_WRITTEN (decl
))
5208 /* Do not override the alignment as specified by the ABI when the used
5209 attribute is set. */
5210 if (DECL_PRESERVE_P (decl
))
5213 /* Do not override explicit alignment set by the user when an explicit
5214 section name is also used. This is a common idiom used by many
5215 software projects. */
5216 if (DECL_SECTION_NAME (decl
) != NULL_TREE
5217 && !DECL_HAS_IMPLICIT_SECTION_NAME_P (decl
))
5220 if (TREE_STATIC (decl
))
5221 return (alignment
<= MAX_OFILE_ALIGNMENT
);
5223 return (alignment
<= MAX_STACK_ALIGNMENT
);
5227 /* Return whether the data reference DR is supported with respect to its
5229 If CHECK_ALIGNED_ACCESSES is TRUE, check if the access is supported even
5230 it is aligned, i.e., check if it is possible to vectorize it with different
5233 enum dr_alignment_support
5234 vect_supportable_dr_alignment (struct data_reference
*dr
,
5235 bool check_aligned_accesses
)
5237 gimple stmt
= DR_STMT (dr
);
5238 stmt_vec_info stmt_info
= vinfo_for_stmt (stmt
);
5239 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5240 enum machine_mode mode
= TYPE_MODE (vectype
);
5241 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
5242 struct loop
*vect_loop
= NULL
;
5243 bool nested_in_vect_loop
= false;
5245 if (aligned_access_p (dr
) && !check_aligned_accesses
)
5248 /* For now assume all conditional loads/stores support unaligned
5249 access without any special code. */
5250 if (is_gimple_call (stmt
)
5251 && gimple_call_internal_p (stmt
)
5252 && (gimple_call_internal_fn (stmt
) == IFN_MASK_LOAD
5253 || gimple_call_internal_fn (stmt
) == IFN_MASK_STORE
))
5254 return dr_unaligned_supported
;
5258 vect_loop
= LOOP_VINFO_LOOP (loop_vinfo
);
5259 nested_in_vect_loop
= nested_in_vect_loop_p (vect_loop
, stmt
);
5262 /* Possibly unaligned access. */
5264 /* We can choose between using the implicit realignment scheme (generating
5265 a misaligned_move stmt) and the explicit realignment scheme (generating
5266 aligned loads with a REALIGN_LOAD). There are two variants to the
5267 explicit realignment scheme: optimized, and unoptimized.
5268 We can optimize the realignment only if the step between consecutive
5269 vector loads is equal to the vector size. Since the vector memory
5270 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
5271 is guaranteed that the misalignment amount remains the same throughout the
5272 execution of the vectorized loop. Therefore, we can create the
5273 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
5274 at the loop preheader.
5276 However, in the case of outer-loop vectorization, when vectorizing a
5277 memory access in the inner-loop nested within the LOOP that is now being
5278 vectorized, while it is guaranteed that the misalignment of the
5279 vectorized memory access will remain the same in different outer-loop
5280 iterations, it is *not* guaranteed that is will remain the same throughout
5281 the execution of the inner-loop. This is because the inner-loop advances
5282 with the original scalar step (and not in steps of VS). If the inner-loop
5283 step happens to be a multiple of VS, then the misalignment remains fixed
5284 and we can use the optimized realignment scheme. For example:
5290 When vectorizing the i-loop in the above example, the step between
5291 consecutive vector loads is 1, and so the misalignment does not remain
5292 fixed across the execution of the inner-loop, and the realignment cannot
5293 be optimized (as illustrated in the following pseudo vectorized loop):
5295 for (i=0; i<N; i+=4)
5296 for (j=0; j<M; j++){
5297 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
5298 // when j is {0,1,2,3,4,5,6,7,...} respectively.
5299 // (assuming that we start from an aligned address).
5302 We therefore have to use the unoptimized realignment scheme:
5304 for (i=0; i<N; i+=4)
5305 for (j=k; j<M; j+=4)
5306 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
5307 // that the misalignment of the initial address is
5310 The loop can then be vectorized as follows:
5312 for (k=0; k<4; k++){
5313 rt = get_realignment_token (&vp[k]);
5314 for (i=0; i<N; i+=4){
5316 for (j=k; j<M; j+=4){
5318 va = REALIGN_LOAD <v1,v2,rt>;
5325 if (DR_IS_READ (dr
))
5327 bool is_packed
= false;
5328 tree type
= (TREE_TYPE (DR_REF (dr
)));
5330 if (optab_handler (vec_realign_load_optab
, mode
) != CODE_FOR_nothing
5331 && (!targetm
.vectorize
.builtin_mask_for_load
5332 || targetm
.vectorize
.builtin_mask_for_load ()))
5334 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
5335 if ((nested_in_vect_loop
5336 && (TREE_INT_CST_LOW (DR_STEP (dr
))
5337 != GET_MODE_SIZE (TYPE_MODE (vectype
))))
5339 return dr_explicit_realign
;
5341 return dr_explicit_realign_optimized
;
5343 if (!known_alignment_for_access_p (dr
))
5344 is_packed
= not_size_aligned (DR_REF (dr
));
5346 if ((TYPE_USER_ALIGN (type
) && !is_packed
)
5347 || targetm
.vectorize
.
5348 support_vector_misalignment (mode
, type
,
5349 DR_MISALIGNMENT (dr
), is_packed
))
5350 /* Can't software pipeline the loads, but can at least do them. */
5351 return dr_unaligned_supported
;
5355 bool is_packed
= false;
5356 tree type
= (TREE_TYPE (DR_REF (dr
)));
5358 if (!known_alignment_for_access_p (dr
))
5359 is_packed
= not_size_aligned (DR_REF (dr
));
5361 if ((TYPE_USER_ALIGN (type
) && !is_packed
)
5362 || targetm
.vectorize
.
5363 support_vector_misalignment (mode
, type
,
5364 DR_MISALIGNMENT (dr
), is_packed
))
5365 return dr_unaligned_supported
;
5369 return dr_unaligned_unsupported
;