]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-data-refs.c
re PR middle-end/42834 (memcpy folding overeager)
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "tree-pretty-print.h"
32 #include "gimple-pretty-print.h"
33 #include "tree-flow.h"
34 #include "tree-dump.h"
35 #include "cfgloop.h"
36 #include "tree-chrec.h"
37 #include "tree-scalar-evolution.h"
38 #include "tree-vectorizer.h"
39 #include "toplev.h"
40
41 /* Need to include rtl.h, expr.h, etc. for optabs. */
42 #include "expr.h"
43 #include "optabs.h"
44
45 /* Return the smallest scalar part of STMT.
46 This is used to determine the vectype of the stmt. We generally set the
47 vectype according to the type of the result (lhs). For stmts whose
48 result-type is different than the type of the arguments (e.g., demotion,
49 promotion), vectype will be reset appropriately (later). Note that we have
50 to visit the smallest datatype in this function, because that determines the
51 VF. If the smallest datatype in the loop is present only as the rhs of a
52 promotion operation - we'd miss it.
53 Such a case, where a variable of this datatype does not appear in the lhs
54 anywhere in the loop, can only occur if it's an invariant: e.g.:
55 'int_x = (int) short_inv', which we'd expect to have been optimized away by
56 invariant motion. However, we cannot rely on invariant motion to always take
57 invariants out of the loop, and so in the case of promotion we also have to
58 check the rhs.
59 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
60 types. */
61
62 tree
63 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
64 HOST_WIDE_INT *rhs_size_unit)
65 {
66 tree scalar_type = gimple_expr_type (stmt);
67 HOST_WIDE_INT lhs, rhs;
68
69 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
70
71 if (is_gimple_assign (stmt)
72 && (gimple_assign_cast_p (stmt)
73 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
74 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
75 {
76 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
77
78 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
79 if (rhs < lhs)
80 scalar_type = rhs_type;
81 }
82
83 *lhs_size_unit = lhs;
84 *rhs_size_unit = rhs;
85 return scalar_type;
86 }
87
88
89 /* Find the place of the data-ref in STMT in the interleaving chain that starts
90 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
91
92 int
93 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
94 {
95 gimple next_stmt = first_stmt;
96 int result = 0;
97
98 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
99 return -1;
100
101 while (next_stmt && next_stmt != stmt)
102 {
103 result++;
104 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
105 }
106
107 if (next_stmt)
108 return result;
109 else
110 return -1;
111 }
112
113
114 /* Function vect_insert_into_interleaving_chain.
115
116 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
117
118 static void
119 vect_insert_into_interleaving_chain (struct data_reference *dra,
120 struct data_reference *drb)
121 {
122 gimple prev, next;
123 tree next_init;
124 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
125 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
126
127 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
128 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
129 while (next)
130 {
131 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
132 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
133 {
134 /* Insert here. */
135 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
136 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
137 return;
138 }
139 prev = next;
140 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
141 }
142
143 /* We got to the end of the list. Insert here. */
144 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
145 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
146 }
147
148
149 /* Function vect_update_interleaving_chain.
150
151 For two data-refs DRA and DRB that are a part of a chain interleaved data
152 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
153
154 There are four possible cases:
155 1. New stmts - both DRA and DRB are not a part of any chain:
156 FIRST_DR = DRB
157 NEXT_DR (DRB) = DRA
158 2. DRB is a part of a chain and DRA is not:
159 no need to update FIRST_DR
160 no need to insert DRB
161 insert DRA according to init
162 3. DRA is a part of a chain and DRB is not:
163 if (init of FIRST_DR > init of DRB)
164 FIRST_DR = DRB
165 NEXT(FIRST_DR) = previous FIRST_DR
166 else
167 insert DRB according to its init
168 4. both DRA and DRB are in some interleaving chains:
169 choose the chain with the smallest init of FIRST_DR
170 insert the nodes of the second chain into the first one. */
171
172 static void
173 vect_update_interleaving_chain (struct data_reference *drb,
174 struct data_reference *dra)
175 {
176 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
177 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
178 tree next_init, init_dra_chain, init_drb_chain;
179 gimple first_a, first_b;
180 tree node_init;
181 gimple node, prev, next, first_stmt;
182
183 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
184 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
185 {
186 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
187 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
188 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
189 return;
190 }
191
192 /* 2. DRB is a part of a chain and DRA is not. */
193 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
194 {
195 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
196 /* Insert DRA into the chain of DRB. */
197 vect_insert_into_interleaving_chain (dra, drb);
198 return;
199 }
200
201 /* 3. DRA is a part of a chain and DRB is not. */
202 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
203 {
204 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
205 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
206 old_first_stmt)));
207 gimple tmp;
208
209 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
210 {
211 /* DRB's init is smaller than the init of the stmt previously marked
212 as the first stmt of the interleaving chain of DRA. Therefore, we
213 update FIRST_STMT and put DRB in the head of the list. */
214 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
215 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
216
217 /* Update all the stmts in the list to point to the new FIRST_STMT. */
218 tmp = old_first_stmt;
219 while (tmp)
220 {
221 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
222 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
223 }
224 }
225 else
226 {
227 /* Insert DRB in the list of DRA. */
228 vect_insert_into_interleaving_chain (drb, dra);
229 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
230 }
231 return;
232 }
233
234 /* 4. both DRA and DRB are in some interleaving chains. */
235 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
236 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
237 if (first_a == first_b)
238 return;
239 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
240 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
241
242 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
243 {
244 /* Insert the nodes of DRA chain into the DRB chain.
245 After inserting a node, continue from this node of the DRB chain (don't
246 start from the beginning. */
247 node = DR_GROUP_FIRST_DR (stmtinfo_a);
248 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
249 first_stmt = first_b;
250 }
251 else
252 {
253 /* Insert the nodes of DRB chain into the DRA chain.
254 After inserting a node, continue from this node of the DRA chain (don't
255 start from the beginning. */
256 node = DR_GROUP_FIRST_DR (stmtinfo_b);
257 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
258 first_stmt = first_a;
259 }
260
261 while (node)
262 {
263 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
264 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
265 while (next)
266 {
267 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
268 if (tree_int_cst_compare (next_init, node_init) > 0)
269 {
270 /* Insert here. */
271 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
272 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
273 prev = node;
274 break;
275 }
276 prev = next;
277 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
278 }
279 if (!next)
280 {
281 /* We got to the end of the list. Insert here. */
282 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
283 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
284 prev = node;
285 }
286 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
287 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
288 }
289 }
290
291
292 /* Function vect_equal_offsets.
293
294 Check if OFFSET1 and OFFSET2 are identical expressions. */
295
296 static bool
297 vect_equal_offsets (tree offset1, tree offset2)
298 {
299 bool res;
300
301 STRIP_NOPS (offset1);
302 STRIP_NOPS (offset2);
303
304 if (offset1 == offset2)
305 return true;
306
307 if (TREE_CODE (offset1) != TREE_CODE (offset2)
308 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
309 return false;
310
311 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
312 TREE_OPERAND (offset2, 0));
313
314 if (!res || !BINARY_CLASS_P (offset1))
315 return res;
316
317 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
318 TREE_OPERAND (offset2, 1));
319
320 return res;
321 }
322
323
324 /* Function vect_check_interleaving.
325
326 Check if DRA and DRB are a part of interleaving. In case they are, insert
327 DRA and DRB in an interleaving chain. */
328
329 static bool
330 vect_check_interleaving (struct data_reference *dra,
331 struct data_reference *drb)
332 {
333 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
334
335 /* Check that the data-refs have same first location (except init) and they
336 are both either store or load (not load and store). */
337 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
338 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
339 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
340 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
341 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
342 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
343 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
344 || DR_IS_READ (dra) != DR_IS_READ (drb))
345 return false;
346
347 /* Check:
348 1. data-refs are of the same type
349 2. their steps are equal
350 3. the step (if greater than zero) is greater than the difference between
351 data-refs' inits. */
352 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
353 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
354
355 if (type_size_a != type_size_b
356 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
357 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
358 TREE_TYPE (DR_REF (drb))))
359 return false;
360
361 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
362 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
363 step = TREE_INT_CST_LOW (DR_STEP (dra));
364
365 if (init_a > init_b)
366 {
367 /* If init_a == init_b + the size of the type * k, we have an interleaving,
368 and DRB is accessed before DRA. */
369 diff_mod_size = (init_a - init_b) % type_size_a;
370
371 if (step && (init_a - init_b) > step)
372 return false;
373
374 if (diff_mod_size == 0)
375 {
376 vect_update_interleaving_chain (drb, dra);
377 if (vect_print_dump_info (REPORT_DR_DETAILS))
378 {
379 fprintf (vect_dump, "Detected interleaving ");
380 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
381 fprintf (vect_dump, " and ");
382 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
383 }
384 return true;
385 }
386 }
387 else
388 {
389 /* If init_b == init_a + the size of the type * k, we have an
390 interleaving, and DRA is accessed before DRB. */
391 diff_mod_size = (init_b - init_a) % type_size_a;
392
393 if (step && (init_b - init_a) > step)
394 return false;
395
396 if (diff_mod_size == 0)
397 {
398 vect_update_interleaving_chain (dra, drb);
399 if (vect_print_dump_info (REPORT_DR_DETAILS))
400 {
401 fprintf (vect_dump, "Detected interleaving ");
402 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
403 fprintf (vect_dump, " and ");
404 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
405 }
406 return true;
407 }
408 }
409
410 return false;
411 }
412
413 /* Check if data references pointed by DR_I and DR_J are same or
414 belong to same interleaving group. Return FALSE if drs are
415 different, otherwise return TRUE. */
416
417 static bool
418 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
419 {
420 gimple stmt_i = DR_STMT (dr_i);
421 gimple stmt_j = DR_STMT (dr_j);
422
423 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
424 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
425 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
426 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
427 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
428 return true;
429 else
430 return false;
431 }
432
433 /* If address ranges represented by DDR_I and DDR_J are equal,
434 return TRUE, otherwise return FALSE. */
435
436 static bool
437 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
438 {
439 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
440 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
441 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
442 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
443 return true;
444 else
445 return false;
446 }
447
448 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
449 tested at run-time. Return TRUE if DDR was successfully inserted.
450 Return false if versioning is not supported. */
451
452 static bool
453 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
454 {
455 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
456
457 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
458 return false;
459
460 if (vect_print_dump_info (REPORT_DR_DETAILS))
461 {
462 fprintf (vect_dump, "mark for run-time aliasing test between ");
463 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
464 fprintf (vect_dump, " and ");
465 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
466 }
467
468 if (optimize_loop_nest_for_size_p (loop))
469 {
470 if (vect_print_dump_info (REPORT_DR_DETAILS))
471 fprintf (vect_dump, "versioning not supported when optimizing for size.");
472 return false;
473 }
474
475 /* FORNOW: We don't support versioning with outer-loop vectorization. */
476 if (loop->inner)
477 {
478 if (vect_print_dump_info (REPORT_DR_DETAILS))
479 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
480 return false;
481 }
482
483 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
484 return true;
485 }
486
487
488 /* Function vect_analyze_data_ref_dependence.
489
490 Return TRUE if there (might) exist a dependence between a memory-reference
491 DRA and a memory-reference DRB. When versioning for alias may check a
492 dependence at run-time, return FALSE. Adjust *MAX_VF according to
493 the data dependence. */
494
495 static bool
496 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
497 loop_vec_info loop_vinfo, int *max_vf)
498 {
499 unsigned int i;
500 struct loop *loop = NULL;
501 struct data_reference *dra = DDR_A (ddr);
502 struct data_reference *drb = DDR_B (ddr);
503 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
504 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
505 lambda_vector dist_v;
506 unsigned int loop_depth;
507
508 /* Don't bother to analyze statements marked as unvectorizable. */
509 if (!STMT_VINFO_VECTORIZABLE (stmtinfo_a)
510 || !STMT_VINFO_VECTORIZABLE (stmtinfo_b))
511 return false;
512
513 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
514 {
515 /* Independent data accesses. */
516 vect_check_interleaving (dra, drb);
517 return false;
518 }
519
520 if (loop_vinfo)
521 loop = LOOP_VINFO_LOOP (loop_vinfo);
522
523 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
524 return false;
525
526 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
527 {
528 if (loop_vinfo)
529 {
530 if (vect_print_dump_info (REPORT_DR_DETAILS))
531 {
532 fprintf (vect_dump, "versioning for alias required: "
533 "can't determine dependence between ");
534 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
535 fprintf (vect_dump, " and ");
536 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
537 }
538
539 /* Add to list of ddrs that need to be tested at run-time. */
540 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
541 }
542
543 /* When vectorizing a basic block unknown depnedence can still mean
544 strided access. */
545 if (vect_check_interleaving (dra, drb))
546 return false;
547
548 if (vect_print_dump_info (REPORT_DR_DETAILS))
549 {
550 fprintf (vect_dump, "can't determine dependence between ");
551 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
552 fprintf (vect_dump, " and ");
553 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
554 }
555
556 /* Mark the statements as unvectorizable. */
557 STMT_VINFO_VECTORIZABLE (stmtinfo_a) = false;
558 STMT_VINFO_VECTORIZABLE (stmtinfo_b) = false;
559
560 return false;
561 }
562
563 /* Versioning for alias is not yet supported for basic block SLP, and
564 dependence distance is unapplicable, hence, in case of known data
565 dependence, basic block vectorization is impossible for now. */
566 if (!loop_vinfo)
567 {
568 if (dra != drb && vect_check_interleaving (dra, drb))
569 return false;
570
571 if (vect_print_dump_info (REPORT_DR_DETAILS))
572 {
573 fprintf (vect_dump, "determined dependence between ");
574 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
575 fprintf (vect_dump, " and ");
576 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
577 }
578
579 return true;
580 }
581
582 /* Loop-based vectorization and known data dependence. */
583 if (DDR_NUM_DIST_VECTS (ddr) == 0)
584 {
585 if (vect_print_dump_info (REPORT_DR_DETAILS))
586 {
587 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
588 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
589 fprintf (vect_dump, " and ");
590 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
591 }
592 /* Add to list of ddrs that need to be tested at run-time. */
593 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
594 }
595
596 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
597 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
598 {
599 int dist = dist_v[loop_depth];
600
601 if (vect_print_dump_info (REPORT_DR_DETAILS))
602 fprintf (vect_dump, "dependence distance = %d.", dist);
603
604 if (dist == 0)
605 {
606 if (vect_print_dump_info (REPORT_DR_DETAILS))
607 {
608 fprintf (vect_dump, "dependence distance == 0 between ");
609 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610 fprintf (vect_dump, " and ");
611 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
612 }
613
614 /* For interleaving, mark that there is a read-write dependency if
615 necessary. We check before that one of the data-refs is store. */
616 if (DR_IS_READ (dra))
617 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
618 else
619 {
620 if (DR_IS_READ (drb))
621 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
622 }
623
624 continue;
625 }
626
627 if (dist > 0 && DDR_REVERSED_P (ddr))
628 {
629 /* If DDR_REVERSED_P the order of the data-refs in DDR was
630 reversed (to make distance vector positive), and the actual
631 distance is negative. */
632 if (vect_print_dump_info (REPORT_DR_DETAILS))
633 fprintf (vect_dump, "dependence distance negative.");
634 continue;
635 }
636
637 if (abs (dist) >= 2
638 && abs (dist) < *max_vf)
639 {
640 /* The dependence distance requires reduction of the maximal
641 vectorization factor. */
642 *max_vf = abs (dist);
643 if (vect_print_dump_info (REPORT_DR_DETAILS))
644 fprintf (vect_dump, "adjusting maximal vectorization factor to %i",
645 *max_vf);
646 }
647
648 if (abs (dist) >= *max_vf)
649 {
650 /* Dependence distance does not create dependence, as far as
651 vectorization is concerned, in this case. */
652 if (vect_print_dump_info (REPORT_DR_DETAILS))
653 fprintf (vect_dump, "dependence distance >= VF.");
654 continue;
655 }
656
657 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
658 {
659 fprintf (vect_dump, "not vectorized, possible dependence "
660 "between data-refs ");
661 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
662 fprintf (vect_dump, " and ");
663 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
664 }
665
666 return true;
667 }
668
669 return false;
670 }
671
672 /* Function vect_analyze_data_ref_dependences.
673
674 Examine all the data references in the loop, and make sure there do not
675 exist any data dependences between them. Set *MAX_VF according to
676 the maximum vectorization factor the data dependences allow. */
677
678 bool
679 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
680 bb_vec_info bb_vinfo, int *max_vf)
681 {
682 unsigned int i;
683 VEC (ddr_p, heap) *ddrs = NULL;
684 struct data_dependence_relation *ddr;
685
686 if (vect_print_dump_info (REPORT_DETAILS))
687 fprintf (vect_dump, "=== vect_analyze_dependences ===");
688
689 if (loop_vinfo)
690 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
691 else
692 ddrs = BB_VINFO_DDRS (bb_vinfo);
693
694 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
695 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo, max_vf))
696 return false;
697
698 return true;
699 }
700
701
702 /* Function vect_compute_data_ref_alignment
703
704 Compute the misalignment of the data reference DR.
705
706 Output:
707 1. If during the misalignment computation it is found that the data reference
708 cannot be vectorized then false is returned.
709 2. DR_MISALIGNMENT (DR) is defined.
710
711 FOR NOW: No analysis is actually performed. Misalignment is calculated
712 only for trivial cases. TODO. */
713
714 static bool
715 vect_compute_data_ref_alignment (struct data_reference *dr)
716 {
717 gimple stmt = DR_STMT (dr);
718 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
719 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
720 struct loop *loop = NULL;
721 tree ref = DR_REF (dr);
722 tree vectype;
723 tree base, base_addr;
724 bool base_aligned;
725 tree misalign;
726 tree aligned_to, alignment;
727
728 if (vect_print_dump_info (REPORT_DETAILS))
729 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
730
731 if (loop_vinfo)
732 loop = LOOP_VINFO_LOOP (loop_vinfo);
733
734 /* Initialize misalignment to unknown. */
735 SET_DR_MISALIGNMENT (dr, -1);
736
737 misalign = DR_INIT (dr);
738 aligned_to = DR_ALIGNED_TO (dr);
739 base_addr = DR_BASE_ADDRESS (dr);
740 vectype = STMT_VINFO_VECTYPE (stmt_info);
741
742 /* In case the dataref is in an inner-loop of the loop that is being
743 vectorized (LOOP), we use the base and misalignment information
744 relative to the outer-loop (LOOP). This is ok only if the misalignment
745 stays the same throughout the execution of the inner-loop, which is why
746 we have to check that the stride of the dataref in the inner-loop evenly
747 divides by the vector size. */
748 if (loop && nested_in_vect_loop_p (loop, stmt))
749 {
750 tree step = DR_STEP (dr);
751 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
752
753 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
754 {
755 if (vect_print_dump_info (REPORT_ALIGNMENT))
756 fprintf (vect_dump, "inner step divides the vector-size.");
757 misalign = STMT_VINFO_DR_INIT (stmt_info);
758 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
759 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
760 }
761 else
762 {
763 if (vect_print_dump_info (REPORT_ALIGNMENT))
764 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
765 misalign = NULL_TREE;
766 }
767 }
768
769 base = build_fold_indirect_ref (base_addr);
770 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
771
772 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
773 || !misalign)
774 {
775 if (vect_print_dump_info (REPORT_ALIGNMENT))
776 {
777 fprintf (vect_dump, "Unknown alignment for access: ");
778 print_generic_expr (vect_dump, base, TDF_SLIM);
779 }
780 return true;
781 }
782
783 if ((DECL_P (base)
784 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
785 alignment) >= 0)
786 || (TREE_CODE (base_addr) == SSA_NAME
787 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
788 TREE_TYPE (base_addr)))),
789 alignment) >= 0))
790 base_aligned = true;
791 else
792 base_aligned = false;
793
794 if (!base_aligned)
795 {
796 /* Do not change the alignment of global variables if
797 flag_section_anchors is enabled. */
798 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
799 || (TREE_STATIC (base) && flag_section_anchors))
800 {
801 if (vect_print_dump_info (REPORT_DETAILS))
802 {
803 fprintf (vect_dump, "can't force alignment of ref: ");
804 print_generic_expr (vect_dump, ref, TDF_SLIM);
805 }
806 return true;
807 }
808
809 /* Force the alignment of the decl.
810 NOTE: This is the only change to the code we make during
811 the analysis phase, before deciding to vectorize the loop. */
812 if (vect_print_dump_info (REPORT_DETAILS))
813 fprintf (vect_dump, "force alignment");
814 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
815 DECL_USER_ALIGN (base) = 1;
816 }
817
818 /* At this point we assume that the base is aligned. */
819 gcc_assert (base_aligned
820 || (TREE_CODE (base) == VAR_DECL
821 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
822
823 /* Modulo alignment. */
824 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
825
826 if (!host_integerp (misalign, 1))
827 {
828 /* Negative or overflowed misalignment value. */
829 if (vect_print_dump_info (REPORT_DETAILS))
830 fprintf (vect_dump, "unexpected misalign value");
831 return false;
832 }
833
834 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
835
836 if (vect_print_dump_info (REPORT_DETAILS))
837 {
838 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
839 print_generic_expr (vect_dump, ref, TDF_SLIM);
840 }
841
842 return true;
843 }
844
845
846 /* Function vect_compute_data_refs_alignment
847
848 Compute the misalignment of data references in the loop.
849 Return FALSE if a data reference is found that cannot be vectorized. */
850
851 static bool
852 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
853 bb_vec_info bb_vinfo)
854 {
855 VEC (data_reference_p, heap) *datarefs;
856 struct data_reference *dr;
857 unsigned int i;
858
859 if (loop_vinfo)
860 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
861 else
862 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
863
864 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
865 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
866 && !vect_compute_data_ref_alignment (dr))
867 {
868 if (bb_vinfo)
869 {
870 /* Mark unsupported statement as unvectorizable. */
871 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
872 continue;
873 }
874 else
875 return false;
876 }
877
878 return true;
879 }
880
881
882 /* Function vect_update_misalignment_for_peel
883
884 DR - the data reference whose misalignment is to be adjusted.
885 DR_PEEL - the data reference whose misalignment is being made
886 zero in the vector loop by the peel.
887 NPEEL - the number of iterations in the peel loop if the misalignment
888 of DR_PEEL is known at compile time. */
889
890 static void
891 vect_update_misalignment_for_peel (struct data_reference *dr,
892 struct data_reference *dr_peel, int npeel)
893 {
894 unsigned int i;
895 VEC(dr_p,heap) *same_align_drs;
896 struct data_reference *current_dr;
897 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
898 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
899 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
900 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
901
902 /* For interleaved data accesses the step in the loop must be multiplied by
903 the size of the interleaving group. */
904 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
905 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
906 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
907 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
908
909 /* It can be assumed that the data refs with the same alignment as dr_peel
910 are aligned in the vector loop. */
911 same_align_drs
912 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
913 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
914 {
915 if (current_dr != dr)
916 continue;
917 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
918 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
919 SET_DR_MISALIGNMENT (dr, 0);
920 return;
921 }
922
923 if (known_alignment_for_access_p (dr)
924 && known_alignment_for_access_p (dr_peel))
925 {
926 int misal = DR_MISALIGNMENT (dr);
927 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
928 misal += npeel * dr_size;
929 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
930 SET_DR_MISALIGNMENT (dr, misal);
931 return;
932 }
933
934 if (vect_print_dump_info (REPORT_DETAILS))
935 fprintf (vect_dump, "Setting misalignment to -1.");
936 SET_DR_MISALIGNMENT (dr, -1);
937 }
938
939
940 /* Function vect_verify_datarefs_alignment
941
942 Return TRUE if all data references in the loop can be
943 handled with respect to alignment. */
944
945 bool
946 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
947 {
948 VEC (data_reference_p, heap) *datarefs;
949 struct data_reference *dr;
950 enum dr_alignment_support supportable_dr_alignment;
951 unsigned int i;
952
953 if (loop_vinfo)
954 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
955 else
956 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
957
958 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
959 {
960 gimple stmt = DR_STMT (dr);
961 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
962
963 /* For interleaving, only the alignment of the first access matters.
964 Skip statements marked as not vectorizable. */
965 if ((STMT_VINFO_STRIDED_ACCESS (stmt_info)
966 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
967 || !STMT_VINFO_VECTORIZABLE (stmt_info))
968 continue;
969
970 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
971 if (!supportable_dr_alignment)
972 {
973 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
974 {
975 if (DR_IS_READ (dr))
976 fprintf (vect_dump,
977 "not vectorized: unsupported unaligned load.");
978 else
979 fprintf (vect_dump,
980 "not vectorized: unsupported unaligned store.");
981
982 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
983 }
984 return false;
985 }
986 if (supportable_dr_alignment != dr_aligned
987 && vect_print_dump_info (REPORT_ALIGNMENT))
988 fprintf (vect_dump, "Vectorizing an unaligned access.");
989 }
990 return true;
991 }
992
993
994 /* Function vector_alignment_reachable_p
995
996 Return true if vector alignment for DR is reachable by peeling
997 a few loop iterations. Return false otherwise. */
998
999 static bool
1000 vector_alignment_reachable_p (struct data_reference *dr)
1001 {
1002 gimple stmt = DR_STMT (dr);
1003 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1004 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1005
1006 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1007 {
1008 /* For interleaved access we peel only if number of iterations in
1009 the prolog loop ({VF - misalignment}), is a multiple of the
1010 number of the interleaved accesses. */
1011 int elem_size, mis_in_elements;
1012 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1013
1014 /* FORNOW: handle only known alignment. */
1015 if (!known_alignment_for_access_p (dr))
1016 return false;
1017
1018 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
1019 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
1020
1021 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
1022 return false;
1023 }
1024
1025 /* If misalignment is known at the compile time then allow peeling
1026 only if natural alignment is reachable through peeling. */
1027 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
1028 {
1029 HOST_WIDE_INT elmsize =
1030 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
1031 if (vect_print_dump_info (REPORT_DETAILS))
1032 {
1033 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1034 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1035 }
1036 if (DR_MISALIGNMENT (dr) % elmsize)
1037 {
1038 if (vect_print_dump_info (REPORT_DETAILS))
1039 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1040 return false;
1041 }
1042 }
1043
1044 if (!known_alignment_for_access_p (dr))
1045 {
1046 tree type = (TREE_TYPE (DR_REF (dr)));
1047 tree ba = DR_BASE_OBJECT (dr);
1048 bool is_packed = false;
1049
1050 if (ba)
1051 is_packed = contains_packed_reference (ba);
1052
1053 if (vect_print_dump_info (REPORT_DETAILS))
1054 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1055 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1056 return true;
1057 else
1058 return false;
1059 }
1060
1061 return true;
1062 }
1063
1064 /* Function vect_enhance_data_refs_alignment
1065
1066 This pass will use loop versioning and loop peeling in order to enhance
1067 the alignment of data references in the loop.
1068
1069 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1070 original loop is to be vectorized; Any other loops that are created by
1071 the transformations performed in this pass - are not supposed to be
1072 vectorized. This restriction will be relaxed.
1073
1074 This pass will require a cost model to guide it whether to apply peeling
1075 or versioning or a combination of the two. For example, the scheme that
1076 intel uses when given a loop with several memory accesses, is as follows:
1077 choose one memory access ('p') which alignment you want to force by doing
1078 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1079 other accesses are not necessarily aligned, or (2) use loop versioning to
1080 generate one loop in which all accesses are aligned, and another loop in
1081 which only 'p' is necessarily aligned.
1082
1083 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1084 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1085 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1086
1087 Devising a cost model is the most critical aspect of this work. It will
1088 guide us on which access to peel for, whether to use loop versioning, how
1089 many versions to create, etc. The cost model will probably consist of
1090 generic considerations as well as target specific considerations (on
1091 powerpc for example, misaligned stores are more painful than misaligned
1092 loads).
1093
1094 Here are the general steps involved in alignment enhancements:
1095
1096 -- original loop, before alignment analysis:
1097 for (i=0; i<N; i++){
1098 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1099 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1100 }
1101
1102 -- After vect_compute_data_refs_alignment:
1103 for (i=0; i<N; i++){
1104 x = q[i]; # DR_MISALIGNMENT(q) = 3
1105 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1106 }
1107
1108 -- Possibility 1: we do loop versioning:
1109 if (p is aligned) {
1110 for (i=0; i<N; i++){ # loop 1A
1111 x = q[i]; # DR_MISALIGNMENT(q) = 3
1112 p[i] = y; # DR_MISALIGNMENT(p) = 0
1113 }
1114 }
1115 else {
1116 for (i=0; i<N; i++){ # loop 1B
1117 x = q[i]; # DR_MISALIGNMENT(q) = 3
1118 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1119 }
1120 }
1121
1122 -- Possibility 2: we do loop peeling:
1123 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1124 x = q[i];
1125 p[i] = y;
1126 }
1127 for (i = 3; i < N; i++){ # loop 2A
1128 x = q[i]; # DR_MISALIGNMENT(q) = 0
1129 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1130 }
1131
1132 -- Possibility 3: combination of loop peeling and versioning:
1133 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1134 x = q[i];
1135 p[i] = y;
1136 }
1137 if (p is aligned) {
1138 for (i = 3; i<N; i++){ # loop 3A
1139 x = q[i]; # DR_MISALIGNMENT(q) = 0
1140 p[i] = y; # DR_MISALIGNMENT(p) = 0
1141 }
1142 }
1143 else {
1144 for (i = 3; i<N; i++){ # loop 3B
1145 x = q[i]; # DR_MISALIGNMENT(q) = 0
1146 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1147 }
1148 }
1149
1150 These loops are later passed to loop_transform to be vectorized. The
1151 vectorizer will use the alignment information to guide the transformation
1152 (whether to generate regular loads/stores, or with special handling for
1153 misalignment). */
1154
1155 bool
1156 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1157 {
1158 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1159 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1160 enum dr_alignment_support supportable_dr_alignment;
1161 struct data_reference *dr0 = NULL;
1162 struct data_reference *dr;
1163 unsigned int i;
1164 bool do_peeling = false;
1165 bool do_versioning = false;
1166 bool stat;
1167 gimple stmt;
1168 stmt_vec_info stmt_info;
1169 int vect_versioning_for_alias_required;
1170
1171 if (vect_print_dump_info (REPORT_DETAILS))
1172 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1173
1174 /* While cost model enhancements are expected in the future, the high level
1175 view of the code at this time is as follows:
1176
1177 A) If there is a misaligned access then see if peeling to align
1178 this access can make all data references satisfy
1179 vect_supportable_dr_alignment. If so, update data structures
1180 as needed and return true.
1181
1182 B) If peeling wasn't possible and there is a data reference with an
1183 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1184 then see if loop versioning checks can be used to make all data
1185 references satisfy vect_supportable_dr_alignment. If so, update
1186 data structures as needed and return true.
1187
1188 C) If neither peeling nor versioning were successful then return false if
1189 any data reference does not satisfy vect_supportable_dr_alignment.
1190
1191 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1192
1193 Note, Possibility 3 above (which is peeling and versioning together) is not
1194 being done at this time. */
1195
1196 /* (1) Peeling to force alignment. */
1197
1198 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1199 Considerations:
1200 + How many accesses will become aligned due to the peeling
1201 - How many accesses will become unaligned due to the peeling,
1202 and the cost of misaligned accesses.
1203 - The cost of peeling (the extra runtime checks, the increase
1204 in code size).
1205
1206 The scheme we use FORNOW: peel to force the alignment of the first
1207 unsupported misaligned access in the loop.
1208
1209 TODO: Use a cost model. */
1210
1211 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1212 {
1213 stmt = DR_STMT (dr);
1214 stmt_info = vinfo_for_stmt (stmt);
1215
1216 /* For interleaving, only the alignment of the first access
1217 matters. */
1218 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1219 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1220 continue;
1221
1222 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1223 {
1224 do_peeling = vector_alignment_reachable_p (dr);
1225 if (do_peeling)
1226 dr0 = dr;
1227 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1228 fprintf (vect_dump, "vector alignment may not be reachable");
1229 break;
1230 }
1231 }
1232
1233 vect_versioning_for_alias_required
1234 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1235
1236 /* Temporarily, if versioning for alias is required, we disable peeling
1237 until we support peeling and versioning. Often peeling for alignment
1238 will require peeling for loop-bound, which in turn requires that we
1239 know how to adjust the loop ivs after the loop. */
1240 if (vect_versioning_for_alias_required
1241 || !vect_can_advance_ivs_p (loop_vinfo)
1242 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1243 do_peeling = false;
1244
1245 if (do_peeling)
1246 {
1247 int mis;
1248 int npeel = 0;
1249 gimple stmt = DR_STMT (dr0);
1250 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1251 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1252 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1253
1254 if (known_alignment_for_access_p (dr0))
1255 {
1256 /* Since it's known at compile time, compute the number of iterations
1257 in the peeled loop (the peeling factor) for use in updating
1258 DR_MISALIGNMENT values. The peeling factor is the vectorization
1259 factor minus the misalignment as an element count. */
1260 mis = DR_MISALIGNMENT (dr0);
1261 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1262 npeel = nelements - mis;
1263
1264 /* For interleaved data access every iteration accesses all the
1265 members of the group, therefore we divide the number of iterations
1266 by the group size. */
1267 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1268 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1269 npeel /= DR_GROUP_SIZE (stmt_info);
1270
1271 if (vect_print_dump_info (REPORT_DETAILS))
1272 fprintf (vect_dump, "Try peeling by %d", npeel);
1273 }
1274
1275 /* Ensure that all data refs can be vectorized after the peel. */
1276 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1277 {
1278 int save_misalignment;
1279
1280 if (dr == dr0)
1281 continue;
1282
1283 stmt = DR_STMT (dr);
1284 stmt_info = vinfo_for_stmt (stmt);
1285 /* For interleaving, only the alignment of the first access
1286 matters. */
1287 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1288 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1289 continue;
1290
1291 save_misalignment = DR_MISALIGNMENT (dr);
1292 vect_update_misalignment_for_peel (dr, dr0, npeel);
1293 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1294 SET_DR_MISALIGNMENT (dr, save_misalignment);
1295
1296 if (!supportable_dr_alignment)
1297 {
1298 do_peeling = false;
1299 break;
1300 }
1301 }
1302
1303 if (do_peeling)
1304 {
1305 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1306 If the misalignment of DR_i is identical to that of dr0 then set
1307 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1308 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1309 by the peeling factor times the element size of DR_i (MOD the
1310 vectorization factor times the size). Otherwise, the
1311 misalignment of DR_i must be set to unknown. */
1312 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1313 if (dr != dr0)
1314 vect_update_misalignment_for_peel (dr, dr0, npeel);
1315
1316 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1317 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1318 SET_DR_MISALIGNMENT (dr0, 0);
1319 if (vect_print_dump_info (REPORT_ALIGNMENT))
1320 fprintf (vect_dump, "Alignment of access forced using peeling.");
1321
1322 if (vect_print_dump_info (REPORT_DETAILS))
1323 fprintf (vect_dump, "Peeling for alignment will be applied.");
1324
1325 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1326 gcc_assert (stat);
1327 return stat;
1328 }
1329 }
1330
1331
1332 /* (2) Versioning to force alignment. */
1333
1334 /* Try versioning if:
1335 1) flag_tree_vect_loop_version is TRUE
1336 2) optimize loop for speed
1337 3) there is at least one unsupported misaligned data ref with an unknown
1338 misalignment, and
1339 4) all misaligned data refs with a known misalignment are supported, and
1340 5) the number of runtime alignment checks is within reason. */
1341
1342 do_versioning =
1343 flag_tree_vect_loop_version
1344 && optimize_loop_nest_for_speed_p (loop)
1345 && (!loop->inner); /* FORNOW */
1346
1347 if (do_versioning)
1348 {
1349 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1350 {
1351 stmt = DR_STMT (dr);
1352 stmt_info = vinfo_for_stmt (stmt);
1353
1354 /* For interleaving, only the alignment of the first access
1355 matters. */
1356 if (aligned_access_p (dr)
1357 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1358 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1359 continue;
1360
1361 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1362
1363 if (!supportable_dr_alignment)
1364 {
1365 gimple stmt;
1366 int mask;
1367 tree vectype;
1368
1369 if (known_alignment_for_access_p (dr)
1370 || VEC_length (gimple,
1371 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1372 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1373 {
1374 do_versioning = false;
1375 break;
1376 }
1377
1378 stmt = DR_STMT (dr);
1379 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1380 gcc_assert (vectype);
1381
1382 /* The rightmost bits of an aligned address must be zeros.
1383 Construct the mask needed for this test. For example,
1384 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1385 mask must be 15 = 0xf. */
1386 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1387
1388 /* FORNOW: use the same mask to test all potentially unaligned
1389 references in the loop. The vectorizer currently supports
1390 a single vector size, see the reference to
1391 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1392 vectorization factor is computed. */
1393 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1394 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1395 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1396 VEC_safe_push (gimple, heap,
1397 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1398 DR_STMT (dr));
1399 }
1400 }
1401
1402 /* Versioning requires at least one misaligned data reference. */
1403 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1404 do_versioning = false;
1405 else if (!do_versioning)
1406 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1407 }
1408
1409 if (do_versioning)
1410 {
1411 VEC(gimple,heap) *may_misalign_stmts
1412 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1413 gimple stmt;
1414
1415 /* It can now be assumed that the data references in the statements
1416 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1417 of the loop being vectorized. */
1418 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1419 {
1420 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1421 dr = STMT_VINFO_DATA_REF (stmt_info);
1422 SET_DR_MISALIGNMENT (dr, 0);
1423 if (vect_print_dump_info (REPORT_ALIGNMENT))
1424 fprintf (vect_dump, "Alignment of access forced using versioning.");
1425 }
1426
1427 if (vect_print_dump_info (REPORT_DETAILS))
1428 fprintf (vect_dump, "Versioning for alignment will be applied.");
1429
1430 /* Peeling and versioning can't be done together at this time. */
1431 gcc_assert (! (do_peeling && do_versioning));
1432
1433 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1434 gcc_assert (stat);
1435 return stat;
1436 }
1437
1438 /* This point is reached if neither peeling nor versioning is being done. */
1439 gcc_assert (! (do_peeling || do_versioning));
1440
1441 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1442 return stat;
1443 }
1444
1445
1446 /* Function vect_find_same_alignment_drs.
1447
1448 Update group and alignment relations according to the chosen
1449 vectorization factor. */
1450
1451 static void
1452 vect_find_same_alignment_drs (struct data_dependence_relation *ddr,
1453 loop_vec_info loop_vinfo)
1454 {
1455 unsigned int i;
1456 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1457 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1458 struct data_reference *dra = DDR_A (ddr);
1459 struct data_reference *drb = DDR_B (ddr);
1460 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
1461 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
1462 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
1463 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
1464 lambda_vector dist_v;
1465 unsigned int loop_depth;
1466
1467 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
1468 return;
1469
1470 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
1471 return;
1472
1473 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
1474 return;
1475
1476 /* Loop-based vectorization and known data dependence. */
1477 if (DDR_NUM_DIST_VECTS (ddr) == 0)
1478 return;
1479
1480 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
1481 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
1482 {
1483 int dist = dist_v[loop_depth];
1484
1485 if (vect_print_dump_info (REPORT_DR_DETAILS))
1486 fprintf (vect_dump, "dependence distance = %d.", dist);
1487
1488 /* Same loop iteration. */
1489 if (dist == 0
1490 || (dist % vectorization_factor == 0 && dra_size == drb_size))
1491 {
1492 /* Two references with distance zero have the same alignment. */
1493 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
1494 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
1495 if (vect_print_dump_info (REPORT_ALIGNMENT))
1496 fprintf (vect_dump, "accesses have the same alignment.");
1497 if (vect_print_dump_info (REPORT_DR_DETAILS))
1498 {
1499 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
1500 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
1501 fprintf (vect_dump, " and ");
1502 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
1503 }
1504 }
1505 }
1506 }
1507
1508
1509 /* Function vect_analyze_data_refs_alignment
1510
1511 Analyze the alignment of the data-references in the loop.
1512 Return FALSE if a data reference is found that cannot be vectorized. */
1513
1514 bool
1515 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1516 bb_vec_info bb_vinfo)
1517 {
1518 if (vect_print_dump_info (REPORT_DETAILS))
1519 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1520
1521 /* Mark groups of data references with same alignment using
1522 data dependence information. */
1523 if (loop_vinfo)
1524 {
1525 VEC (ddr_p, heap) *ddrs = LOOP_VINFO_DDRS (loop_vinfo);
1526 struct data_dependence_relation *ddr;
1527 unsigned int i;
1528
1529 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
1530 vect_find_same_alignment_drs (ddr, loop_vinfo);
1531 }
1532
1533 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1534 {
1535 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1536 fprintf (vect_dump,
1537 "not vectorized: can't calculate alignment for data ref.");
1538 return false;
1539 }
1540
1541 return true;
1542 }
1543
1544
1545 /* Analyze groups of strided accesses: check that DR belongs to a group of
1546 strided accesses of legal size, step, etc. Detect gaps, single element
1547 interleaving, and other special cases. Set strided access info.
1548 Collect groups of strided stores for further use in SLP analysis. */
1549
1550 static bool
1551 vect_analyze_group_access (struct data_reference *dr)
1552 {
1553 tree step = DR_STEP (dr);
1554 tree scalar_type = TREE_TYPE (DR_REF (dr));
1555 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1556 gimple stmt = DR_STMT (dr);
1557 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1558 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1559 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1560 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1561 HOST_WIDE_INT stride;
1562 bool slp_impossible = false;
1563
1564 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1565 interleaving group (including gaps). */
1566 stride = dr_step / type_size;
1567
1568 /* Not consecutive access is possible only if it is a part of interleaving. */
1569 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1570 {
1571 /* Check if it this DR is a part of interleaving, and is a single
1572 element of the group that is accessed in the loop. */
1573
1574 /* Gaps are supported only for loads. STEP must be a multiple of the type
1575 size. The size of the group must be a power of 2. */
1576 if (DR_IS_READ (dr)
1577 && (dr_step % type_size) == 0
1578 && stride > 0
1579 && exact_log2 (stride) != -1)
1580 {
1581 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1582 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1583 if (vect_print_dump_info (REPORT_DR_DETAILS))
1584 {
1585 fprintf (vect_dump, "Detected single element interleaving ");
1586 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1587 fprintf (vect_dump, " step ");
1588 print_generic_expr (vect_dump, step, TDF_SLIM);
1589 }
1590 return true;
1591 }
1592
1593 if (vect_print_dump_info (REPORT_DETAILS))
1594 {
1595 fprintf (vect_dump, "not consecutive access ");
1596 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1597 }
1598
1599 if (bb_vinfo)
1600 {
1601 /* Mark the statement as unvectorizable. */
1602 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1603 return true;
1604 }
1605
1606 return false;
1607 }
1608
1609 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1610 {
1611 /* First stmt in the interleaving chain. Check the chain. */
1612 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1613 struct data_reference *data_ref = dr;
1614 unsigned int count = 1;
1615 tree next_step;
1616 tree prev_init = DR_INIT (data_ref);
1617 gimple prev = stmt;
1618 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1619
1620 while (next)
1621 {
1622 /* Skip same data-refs. In case that two or more stmts share data-ref
1623 (supported only for loads), we vectorize only the first stmt, and
1624 the rest get their vectorized loads from the first one. */
1625 if (!tree_int_cst_compare (DR_INIT (data_ref),
1626 DR_INIT (STMT_VINFO_DATA_REF (
1627 vinfo_for_stmt (next)))))
1628 {
1629 if (!DR_IS_READ (data_ref))
1630 {
1631 if (vect_print_dump_info (REPORT_DETAILS))
1632 fprintf (vect_dump, "Two store stmts share the same dr.");
1633 return false;
1634 }
1635
1636 /* Check that there is no load-store dependencies for this loads
1637 to prevent a case of load-store-load to the same location. */
1638 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1639 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1640 {
1641 if (vect_print_dump_info (REPORT_DETAILS))
1642 fprintf (vect_dump,
1643 "READ_WRITE dependence in interleaving.");
1644 return false;
1645 }
1646
1647 /* For load use the same data-ref load. */
1648 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1649
1650 prev = next;
1651 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1652 continue;
1653 }
1654 prev = next;
1655
1656 /* Check that all the accesses have the same STEP. */
1657 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1658 if (tree_int_cst_compare (step, next_step))
1659 {
1660 if (vect_print_dump_info (REPORT_DETAILS))
1661 fprintf (vect_dump, "not consecutive access in interleaving");
1662 return false;
1663 }
1664
1665 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1666 /* Check that the distance between two accesses is equal to the type
1667 size. Otherwise, we have gaps. */
1668 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1669 - TREE_INT_CST_LOW (prev_init)) / type_size;
1670 if (diff != 1)
1671 {
1672 /* FORNOW: SLP of accesses with gaps is not supported. */
1673 slp_impossible = true;
1674 if (!DR_IS_READ (data_ref))
1675 {
1676 if (vect_print_dump_info (REPORT_DETAILS))
1677 fprintf (vect_dump, "interleaved store with gaps");
1678 return false;
1679 }
1680
1681 gaps += diff - 1;
1682 }
1683
1684 /* Store the gap from the previous member of the group. If there is no
1685 gap in the access, DR_GROUP_GAP is always 1. */
1686 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1687
1688 prev_init = DR_INIT (data_ref);
1689 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1690 /* Count the number of data-refs in the chain. */
1691 count++;
1692 }
1693
1694 /* COUNT is the number of accesses found, we multiply it by the size of
1695 the type to get COUNT_IN_BYTES. */
1696 count_in_bytes = type_size * count;
1697
1698 /* Check that the size of the interleaving (including gaps) is not
1699 greater than STEP. */
1700 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1701 {
1702 if (vect_print_dump_info (REPORT_DETAILS))
1703 {
1704 fprintf (vect_dump, "interleaving size is greater than step for ");
1705 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1706 }
1707 return false;
1708 }
1709
1710 /* Check that the size of the interleaving is equal to STEP for stores,
1711 i.e., that there are no gaps. */
1712 if (dr_step && dr_step != count_in_bytes)
1713 {
1714 if (DR_IS_READ (dr))
1715 {
1716 slp_impossible = true;
1717 /* There is a gap after the last load in the group. This gap is a
1718 difference between the stride and the number of elements. When
1719 there is no gap, this difference should be 0. */
1720 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1721 }
1722 else
1723 {
1724 if (vect_print_dump_info (REPORT_DETAILS))
1725 fprintf (vect_dump, "interleaved store with gaps");
1726 return false;
1727 }
1728 }
1729
1730 /* Check that STEP is a multiple of type size. */
1731 if (dr_step && (dr_step % type_size) != 0)
1732 {
1733 if (vect_print_dump_info (REPORT_DETAILS))
1734 {
1735 fprintf (vect_dump, "step is not a multiple of type size: step ");
1736 print_generic_expr (vect_dump, step, TDF_SLIM);
1737 fprintf (vect_dump, " size ");
1738 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1739 TDF_SLIM);
1740 }
1741 return false;
1742 }
1743
1744 /* FORNOW: we handle only interleaving that is a power of 2.
1745 We don't fail here if it may be still possible to vectorize the
1746 group using SLP. If not, the size of the group will be checked in
1747 vect_analyze_operations, and the vectorization will fail. */
1748 if (exact_log2 (stride) == -1)
1749 {
1750 if (vect_print_dump_info (REPORT_DETAILS))
1751 fprintf (vect_dump, "interleaving is not a power of 2");
1752
1753 if (slp_impossible)
1754 return false;
1755 }
1756
1757 if (stride == 0)
1758 stride = count;
1759
1760 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1761 if (vect_print_dump_info (REPORT_DETAILS))
1762 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1763
1764 /* SLP: create an SLP data structure for every interleaving group of
1765 stores for further analysis in vect_analyse_slp. */
1766 if (!DR_IS_READ (dr) && !slp_impossible)
1767 {
1768 if (loop_vinfo)
1769 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1770 stmt);
1771 if (bb_vinfo)
1772 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1773 stmt);
1774 }
1775 }
1776
1777 return true;
1778 }
1779
1780
1781 /* Analyze the access pattern of the data-reference DR.
1782 In case of non-consecutive accesses call vect_analyze_group_access() to
1783 analyze groups of strided accesses. */
1784
1785 static bool
1786 vect_analyze_data_ref_access (struct data_reference *dr)
1787 {
1788 tree step = DR_STEP (dr);
1789 tree scalar_type = TREE_TYPE (DR_REF (dr));
1790 gimple stmt = DR_STMT (dr);
1791 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1792 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1793 struct loop *loop = NULL;
1794 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1795
1796 if (loop_vinfo)
1797 loop = LOOP_VINFO_LOOP (loop_vinfo);
1798
1799 if (loop_vinfo && !step)
1800 {
1801 if (vect_print_dump_info (REPORT_DETAILS))
1802 fprintf (vect_dump, "bad data-ref access in loop");
1803 return false;
1804 }
1805
1806 /* Don't allow invariant accesses in loops. */
1807 if (loop_vinfo && dr_step == 0)
1808 return false;
1809
1810 if (loop && nested_in_vect_loop_p (loop, stmt))
1811 {
1812 /* Interleaved accesses are not yet supported within outer-loop
1813 vectorization for references in the inner-loop. */
1814 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1815
1816 /* For the rest of the analysis we use the outer-loop step. */
1817 step = STMT_VINFO_DR_STEP (stmt_info);
1818 dr_step = TREE_INT_CST_LOW (step);
1819
1820 if (dr_step == 0)
1821 {
1822 if (vect_print_dump_info (REPORT_ALIGNMENT))
1823 fprintf (vect_dump, "zero step in outer loop.");
1824 if (DR_IS_READ (dr))
1825 return true;
1826 else
1827 return false;
1828 }
1829 }
1830
1831 /* Consecutive? */
1832 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1833 {
1834 /* Mark that it is not interleaving. */
1835 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1836 return true;
1837 }
1838
1839 if (loop && nested_in_vect_loop_p (loop, stmt))
1840 {
1841 if (vect_print_dump_info (REPORT_ALIGNMENT))
1842 fprintf (vect_dump, "strided access in outer loop.");
1843 return false;
1844 }
1845
1846 /* Not consecutive access - check if it's a part of interleaving group. */
1847 return vect_analyze_group_access (dr);
1848 }
1849
1850
1851 /* Function vect_analyze_data_ref_accesses.
1852
1853 Analyze the access pattern of all the data references in the loop.
1854
1855 FORNOW: the only access pattern that is considered vectorizable is a
1856 simple step 1 (consecutive) access.
1857
1858 FORNOW: handle only arrays and pointer accesses. */
1859
1860 bool
1861 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1862 {
1863 unsigned int i;
1864 VEC (data_reference_p, heap) *datarefs;
1865 struct data_reference *dr;
1866
1867 if (vect_print_dump_info (REPORT_DETAILS))
1868 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1869
1870 if (loop_vinfo)
1871 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1872 else
1873 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1874
1875 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1876 if (STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr)))
1877 && !vect_analyze_data_ref_access (dr))
1878 {
1879 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1880 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1881
1882 if (bb_vinfo)
1883 {
1884 /* Mark the statement as not vectorizable. */
1885 STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (DR_STMT (dr))) = false;
1886 continue;
1887 }
1888 else
1889 return false;
1890 }
1891
1892 return true;
1893 }
1894
1895 /* Function vect_prune_runtime_alias_test_list.
1896
1897 Prune a list of ddrs to be tested at run-time by versioning for alias.
1898 Return FALSE if resulting list of ddrs is longer then allowed by
1899 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1900
1901 bool
1902 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1903 {
1904 VEC (ddr_p, heap) * ddrs =
1905 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1906 unsigned i, j;
1907
1908 if (vect_print_dump_info (REPORT_DETAILS))
1909 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1910
1911 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1912 {
1913 bool found;
1914 ddr_p ddr_i;
1915
1916 ddr_i = VEC_index (ddr_p, ddrs, i);
1917 found = false;
1918
1919 for (j = 0; j < i; j++)
1920 {
1921 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1922
1923 if (vect_vfa_range_equal (ddr_i, ddr_j))
1924 {
1925 if (vect_print_dump_info (REPORT_DR_DETAILS))
1926 {
1927 fprintf (vect_dump, "found equal ranges ");
1928 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1929 fprintf (vect_dump, ", ");
1930 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1931 fprintf (vect_dump, " and ");
1932 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1933 fprintf (vect_dump, ", ");
1934 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1935 }
1936 found = true;
1937 break;
1938 }
1939 }
1940
1941 if (found)
1942 {
1943 VEC_ordered_remove (ddr_p, ddrs, i);
1944 continue;
1945 }
1946 i++;
1947 }
1948
1949 if (VEC_length (ddr_p, ddrs) >
1950 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1951 {
1952 if (vect_print_dump_info (REPORT_DR_DETAILS))
1953 {
1954 fprintf (vect_dump,
1955 "disable versioning for alias - max number of generated "
1956 "checks exceeded.");
1957 }
1958
1959 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1960
1961 return false;
1962 }
1963
1964 return true;
1965 }
1966
1967
1968 /* Function vect_analyze_data_refs.
1969
1970 Find all the data references in the loop or basic block.
1971
1972 The general structure of the analysis of data refs in the vectorizer is as
1973 follows:
1974 1- vect_analyze_data_refs(loop/bb): call
1975 compute_data_dependences_for_loop/bb to find and analyze all data-refs
1976 in the loop/bb and their dependences.
1977 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1978 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1979 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1980
1981 */
1982
1983 bool
1984 vect_analyze_data_refs (loop_vec_info loop_vinfo,
1985 bb_vec_info bb_vinfo,
1986 int *min_vf)
1987 {
1988 struct loop *loop = NULL;
1989 basic_block bb = NULL;
1990 unsigned int i;
1991 VEC (data_reference_p, heap) *datarefs;
1992 struct data_reference *dr;
1993 tree scalar_type;
1994 bool res;
1995
1996 if (vect_print_dump_info (REPORT_DETAILS))
1997 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1998
1999 if (loop_vinfo)
2000 {
2001 loop = LOOP_VINFO_LOOP (loop_vinfo);
2002 res = compute_data_dependences_for_loop
2003 (loop, true, &LOOP_VINFO_DATAREFS (loop_vinfo),
2004 &LOOP_VINFO_DDRS (loop_vinfo));
2005
2006 if (!res)
2007 {
2008 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2009 fprintf (vect_dump, "not vectorized: loop contains function calls"
2010 " or data references that cannot be analyzed");
2011 return false;
2012 }
2013
2014 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
2015 }
2016 else
2017 {
2018 bb = BB_VINFO_BB (bb_vinfo);
2019 res = compute_data_dependences_for_bb (bb, true,
2020 &BB_VINFO_DATAREFS (bb_vinfo),
2021 &BB_VINFO_DDRS (bb_vinfo));
2022 if (!res)
2023 {
2024 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2025 fprintf (vect_dump, "not vectorized: basic block contains function"
2026 " calls or data references that cannot be analyzed");
2027 return false;
2028 }
2029
2030 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
2031 }
2032
2033 /* Go through the data-refs, check that the analysis succeeded. Update pointer
2034 from stmt_vec_info struct to DR and vectype. */
2035
2036 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
2037 {
2038 gimple stmt;
2039 stmt_vec_info stmt_info;
2040 tree base, offset, init;
2041 int vf;
2042
2043 if (!dr || !DR_REF (dr))
2044 {
2045 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2046 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
2047 return false;
2048 }
2049
2050 stmt = DR_STMT (dr);
2051 stmt_info = vinfo_for_stmt (stmt);
2052
2053 /* Check that analysis of the data-ref succeeded. */
2054 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
2055 || !DR_STEP (dr))
2056 {
2057 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2058 {
2059 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
2060 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2061 }
2062
2063 if (bb_vinfo)
2064 {
2065 /* Mark the statement as not vectorizable. */
2066 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2067 continue;
2068 }
2069 else
2070 return false;
2071 }
2072
2073 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
2074 {
2075 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2076 fprintf (vect_dump, "not vectorized: base addr of dr is a "
2077 "constant");
2078 if (bb_vinfo)
2079 {
2080 /* Mark the statement as not vectorizable. */
2081 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2082 continue;
2083 }
2084 else
2085 return false;
2086 }
2087
2088 base = unshare_expr (DR_BASE_ADDRESS (dr));
2089 offset = unshare_expr (DR_OFFSET (dr));
2090 init = unshare_expr (DR_INIT (dr));
2091
2092 /* Update DR field in stmt_vec_info struct. */
2093
2094 /* If the dataref is in an inner-loop of the loop that is considered for
2095 for vectorization, we also want to analyze the access relative to
2096 the outer-loop (DR contains information only relative to the
2097 inner-most enclosing loop). We do that by building a reference to the
2098 first location accessed by the inner-loop, and analyze it relative to
2099 the outer-loop. */
2100 if (loop && nested_in_vect_loop_p (loop, stmt))
2101 {
2102 tree outer_step, outer_base, outer_init;
2103 HOST_WIDE_INT pbitsize, pbitpos;
2104 tree poffset;
2105 enum machine_mode pmode;
2106 int punsignedp, pvolatilep;
2107 affine_iv base_iv, offset_iv;
2108 tree dinit;
2109
2110 /* Build a reference to the first location accessed by the
2111 inner-loop: *(BASE+INIT). (The first location is actually
2112 BASE+INIT+OFFSET, but we add OFFSET separately later). */
2113 tree inner_base = build_fold_indirect_ref
2114 (fold_build2 (POINTER_PLUS_EXPR,
2115 TREE_TYPE (base), base,
2116 fold_convert (sizetype, init)));
2117
2118 if (vect_print_dump_info (REPORT_DETAILS))
2119 {
2120 fprintf (vect_dump, "analyze in outer-loop: ");
2121 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
2122 }
2123
2124 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
2125 &poffset, &pmode, &punsignedp, &pvolatilep, false);
2126 gcc_assert (outer_base != NULL_TREE);
2127
2128 if (pbitpos % BITS_PER_UNIT != 0)
2129 {
2130 if (vect_print_dump_info (REPORT_DETAILS))
2131 fprintf (vect_dump, "failed: bit offset alignment.\n");
2132 return false;
2133 }
2134
2135 outer_base = build_fold_addr_expr (outer_base);
2136 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
2137 &base_iv, false))
2138 {
2139 if (vect_print_dump_info (REPORT_DETAILS))
2140 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
2141 return false;
2142 }
2143
2144 if (offset)
2145 {
2146 if (poffset)
2147 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
2148 poffset);
2149 else
2150 poffset = offset;
2151 }
2152
2153 if (!poffset)
2154 {
2155 offset_iv.base = ssize_int (0);
2156 offset_iv.step = ssize_int (0);
2157 }
2158 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
2159 &offset_iv, false))
2160 {
2161 if (vect_print_dump_info (REPORT_DETAILS))
2162 fprintf (vect_dump, "evolution of offset is not affine.\n");
2163 return false;
2164 }
2165
2166 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2167 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2168 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2169 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2170 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2171
2172 outer_step = size_binop (PLUS_EXPR,
2173 fold_convert (ssizetype, base_iv.step),
2174 fold_convert (ssizetype, offset_iv.step));
2175
2176 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2177 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2178 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2179 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2180 STMT_VINFO_DR_OFFSET (stmt_info) =
2181 fold_convert (ssizetype, offset_iv.base);
2182 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2183 size_int (highest_pow2_factor (offset_iv.base));
2184
2185 if (vect_print_dump_info (REPORT_DETAILS))
2186 {
2187 fprintf (vect_dump, "\touter base_address: ");
2188 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2189 fprintf (vect_dump, "\n\touter offset from base address: ");
2190 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2191 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2192 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2193 fprintf (vect_dump, "\n\touter step: ");
2194 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2195 fprintf (vect_dump, "\n\touter aligned to: ");
2196 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2197 }
2198 }
2199
2200 if (STMT_VINFO_DATA_REF (stmt_info))
2201 {
2202 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2203 {
2204 fprintf (vect_dump,
2205 "not vectorized: more than one data ref in stmt: ");
2206 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2207 }
2208 return false;
2209 }
2210
2211 STMT_VINFO_DATA_REF (stmt_info) = dr;
2212
2213 /* Set vectype for STMT. */
2214 scalar_type = TREE_TYPE (DR_REF (dr));
2215 STMT_VINFO_VECTYPE (stmt_info) =
2216 get_vectype_for_scalar_type (scalar_type);
2217 if (!STMT_VINFO_VECTYPE (stmt_info))
2218 {
2219 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2220 {
2221 fprintf (vect_dump,
2222 "not vectorized: no vectype for stmt: ");
2223 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2224 fprintf (vect_dump, " scalar_type: ");
2225 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2226 }
2227
2228 if (bb_vinfo)
2229 {
2230 /* Mark the statement as not vectorizable. */
2231 STMT_VINFO_VECTORIZABLE (stmt_info) = false;
2232 continue;
2233 }
2234 else
2235 return false;
2236 }
2237
2238 /* Adjust the minimal vectorization factor according to the
2239 vector type. */
2240 vf = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
2241 if (vf > *min_vf)
2242 *min_vf = vf;
2243 }
2244
2245 return true;
2246 }
2247
2248
2249 /* Function vect_get_new_vect_var.
2250
2251 Returns a name for a new variable. The current naming scheme appends the
2252 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2253 the name of vectorizer generated variables, and appends that to NAME if
2254 provided. */
2255
2256 tree
2257 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2258 {
2259 const char *prefix;
2260 tree new_vect_var;
2261
2262 switch (var_kind)
2263 {
2264 case vect_simple_var:
2265 prefix = "vect_";
2266 break;
2267 case vect_scalar_var:
2268 prefix = "stmp_";
2269 break;
2270 case vect_pointer_var:
2271 prefix = "vect_p";
2272 break;
2273 default:
2274 gcc_unreachable ();
2275 }
2276
2277 if (name)
2278 {
2279 char* tmp = concat (prefix, name, NULL);
2280 new_vect_var = create_tmp_var (type, tmp);
2281 free (tmp);
2282 }
2283 else
2284 new_vect_var = create_tmp_var (type, prefix);
2285
2286 /* Mark vector typed variable as a gimple register variable. */
2287 if (TREE_CODE (type) == VECTOR_TYPE)
2288 DECL_GIMPLE_REG_P (new_vect_var) = true;
2289
2290 return new_vect_var;
2291 }
2292
2293
2294 /* Function vect_create_addr_base_for_vector_ref.
2295
2296 Create an expression that computes the address of the first memory location
2297 that will be accessed for a data reference.
2298
2299 Input:
2300 STMT: The statement containing the data reference.
2301 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2302 OFFSET: Optional. If supplied, it is be added to the initial address.
2303 LOOP: Specify relative to which loop-nest should the address be computed.
2304 For example, when the dataref is in an inner-loop nested in an
2305 outer-loop that is now being vectorized, LOOP can be either the
2306 outer-loop, or the inner-loop. The first memory location accessed
2307 by the following dataref ('in' points to short):
2308
2309 for (i=0; i<N; i++)
2310 for (j=0; j<M; j++)
2311 s += in[i+j]
2312
2313 is as follows:
2314 if LOOP=i_loop: &in (relative to i_loop)
2315 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2316
2317 Output:
2318 1. Return an SSA_NAME whose value is the address of the memory location of
2319 the first vector of the data reference.
2320 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2321 these statement(s) which define the returned SSA_NAME.
2322
2323 FORNOW: We are only handling array accesses with step 1. */
2324
2325 tree
2326 vect_create_addr_base_for_vector_ref (gimple stmt,
2327 gimple_seq *new_stmt_list,
2328 tree offset,
2329 struct loop *loop)
2330 {
2331 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2332 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2333 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2334 tree base_name;
2335 tree data_ref_base_var;
2336 tree vec_stmt;
2337 tree addr_base, addr_expr;
2338 tree dest;
2339 gimple_seq seq = NULL;
2340 tree base_offset = unshare_expr (DR_OFFSET (dr));
2341 tree init = unshare_expr (DR_INIT (dr));
2342 tree vect_ptr_type;
2343 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2344 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2345 tree base;
2346
2347 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2348 {
2349 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2350
2351 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2352
2353 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2354 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2355 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2356 }
2357
2358 if (loop_vinfo)
2359 base_name = build_fold_indirect_ref (data_ref_base);
2360 else
2361 {
2362 base_offset = ssize_int (0);
2363 init = ssize_int (0);
2364 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2365 }
2366
2367 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2368 add_referenced_var (data_ref_base_var);
2369 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2370 data_ref_base_var);
2371 gimple_seq_add_seq (new_stmt_list, seq);
2372
2373 /* Create base_offset */
2374 base_offset = size_binop (PLUS_EXPR,
2375 fold_convert (sizetype, base_offset),
2376 fold_convert (sizetype, init));
2377 dest = create_tmp_var (sizetype, "base_off");
2378 add_referenced_var (dest);
2379 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2380 gimple_seq_add_seq (new_stmt_list, seq);
2381
2382 if (offset)
2383 {
2384 tree tmp = create_tmp_var (sizetype, "offset");
2385
2386 add_referenced_var (tmp);
2387 offset = fold_build2 (MULT_EXPR, sizetype,
2388 fold_convert (sizetype, offset), step);
2389 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2390 base_offset, offset);
2391 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2392 gimple_seq_add_seq (new_stmt_list, seq);
2393 }
2394
2395 /* base + base_offset */
2396 if (loop_vinfo)
2397 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2398 data_ref_base, base_offset);
2399 else
2400 {
2401 addr_base = build1 (ADDR_EXPR,
2402 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2403 unshare_expr (DR_REF (dr)));
2404 }
2405
2406 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2407 base = get_base_address (DR_REF (dr));
2408 if (base
2409 && INDIRECT_REF_P (base))
2410 vect_ptr_type
2411 = build_qualified_type (vect_ptr_type,
2412 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2413
2414 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2415 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2416 get_name (base_name));
2417 add_referenced_var (addr_expr);
2418 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2419 gimple_seq_add_seq (new_stmt_list, seq);
2420
2421 if (vect_print_dump_info (REPORT_DETAILS))
2422 {
2423 fprintf (vect_dump, "created ");
2424 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2425 }
2426
2427 return vec_stmt;
2428 }
2429
2430
2431 /* Function vect_create_data_ref_ptr.
2432
2433 Create a new pointer to vector type (vp), that points to the first location
2434 accessed in the loop by STMT, along with the def-use update chain to
2435 appropriately advance the pointer through the loop iterations. Also set
2436 aliasing information for the pointer. This vector pointer is used by the
2437 callers to this function to create a memory reference expression for vector
2438 load/store access.
2439
2440 Input:
2441 1. STMT: a stmt that references memory. Expected to be of the form
2442 GIMPLE_ASSIGN <name, data-ref> or
2443 GIMPLE_ASSIGN <data-ref, name>.
2444 2. AT_LOOP: the loop where the vector memref is to be created.
2445 3. OFFSET (optional): an offset to be added to the initial address accessed
2446 by the data-ref in STMT.
2447 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2448 pointing to the initial address.
2449 5. TYPE: if not NULL indicates the required type of the data-ref.
2450
2451 Output:
2452 1. Declare a new ptr to vector_type, and have it point to the base of the
2453 data reference (initial addressed accessed by the data reference).
2454 For example, for vector of type V8HI, the following code is generated:
2455
2456 v8hi *vp;
2457 vp = (v8hi *)initial_address;
2458
2459 if OFFSET is not supplied:
2460 initial_address = &a[init];
2461 if OFFSET is supplied:
2462 initial_address = &a[init + OFFSET];
2463
2464 Return the initial_address in INITIAL_ADDRESS.
2465
2466 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2467 update the pointer in each iteration of the loop.
2468
2469 Return the increment stmt that updates the pointer in PTR_INCR.
2470
2471 3. Set INV_P to true if the access pattern of the data reference in the
2472 vectorized loop is invariant. Set it to false otherwise.
2473
2474 4. Return the pointer. */
2475
2476 tree
2477 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2478 tree offset, tree *initial_address, gimple *ptr_incr,
2479 bool only_init, bool *inv_p)
2480 {
2481 tree base_name;
2482 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2483 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2484 struct loop *loop = NULL;
2485 bool nested_in_vect_loop = false;
2486 struct loop *containing_loop = NULL;
2487 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2488 tree vect_ptr_type;
2489 tree vect_ptr;
2490 tree new_temp;
2491 gimple vec_stmt;
2492 gimple_seq new_stmt_list = NULL;
2493 edge pe = NULL;
2494 basic_block new_bb;
2495 tree vect_ptr_init;
2496 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2497 tree vptr;
2498 gimple_stmt_iterator incr_gsi;
2499 bool insert_after;
2500 tree indx_before_incr, indx_after_incr;
2501 gimple incr;
2502 tree step;
2503 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2504 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2505 tree base;
2506
2507 if (loop_vinfo)
2508 {
2509 loop = LOOP_VINFO_LOOP (loop_vinfo);
2510 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2511 containing_loop = (gimple_bb (stmt))->loop_father;
2512 pe = loop_preheader_edge (loop);
2513 }
2514 else
2515 {
2516 gcc_assert (bb_vinfo);
2517 only_init = true;
2518 *ptr_incr = NULL;
2519 }
2520
2521 /* Check the step (evolution) of the load in LOOP, and record
2522 whether it's invariant. */
2523 if (nested_in_vect_loop)
2524 step = STMT_VINFO_DR_STEP (stmt_info);
2525 else
2526 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2527
2528 if (tree_int_cst_compare (step, size_zero_node) == 0)
2529 *inv_p = true;
2530 else
2531 *inv_p = false;
2532
2533 /* Create an expression for the first address accessed by this load
2534 in LOOP. */
2535 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2536
2537 if (vect_print_dump_info (REPORT_DETAILS))
2538 {
2539 tree data_ref_base = base_name;
2540 fprintf (vect_dump, "create vector-pointer variable to type: ");
2541 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2542 if (TREE_CODE (data_ref_base) == VAR_DECL
2543 || TREE_CODE (data_ref_base) == ARRAY_REF)
2544 fprintf (vect_dump, " vectorizing an array ref: ");
2545 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2546 fprintf (vect_dump, " vectorizing a record based array ref: ");
2547 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2548 fprintf (vect_dump, " vectorizing a pointer ref: ");
2549 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2550 }
2551
2552 /** (1) Create the new vector-pointer variable: **/
2553 vect_ptr_type = build_pointer_type (vectype);
2554 base = get_base_address (DR_REF (dr));
2555 if (base
2556 && INDIRECT_REF_P (base))
2557 vect_ptr_type
2558 = build_qualified_type (vect_ptr_type,
2559 TYPE_QUALS (TREE_TYPE (TREE_OPERAND (base, 0))));
2560 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2561 get_name (base_name));
2562
2563 /* Vector types inherit the alias set of their component type by default so
2564 we need to use a ref-all pointer if the data reference does not conflict
2565 with the created vector data reference because it is not addressable. */
2566 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2567 get_alias_set (DR_REF (dr))))
2568 {
2569 vect_ptr_type
2570 = build_pointer_type_for_mode (vectype,
2571 TYPE_MODE (vect_ptr_type), true);
2572 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2573 get_name (base_name));
2574 }
2575
2576 /* Likewise for any of the data references in the stmt group. */
2577 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2578 {
2579 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2580 do
2581 {
2582 tree lhs = gimple_assign_lhs (orig_stmt);
2583 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2584 get_alias_set (lhs)))
2585 {
2586 vect_ptr_type
2587 = build_pointer_type_for_mode (vectype,
2588 TYPE_MODE (vect_ptr_type), true);
2589 vect_ptr
2590 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2591 get_name (base_name));
2592 break;
2593 }
2594
2595 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2596 }
2597 while (orig_stmt);
2598 }
2599
2600 add_referenced_var (vect_ptr);
2601
2602 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2603 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2604 def-use update cycles for the pointer: One relative to the outer-loop
2605 (LOOP), which is what steps (3) and (4) below do. The other is relative
2606 to the inner-loop (which is the inner-most loop containing the dataref),
2607 and this is done be step (5) below.
2608
2609 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2610 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2611 redundant. Steps (3),(4) create the following:
2612
2613 vp0 = &base_addr;
2614 LOOP: vp1 = phi(vp0,vp2)
2615 ...
2616 ...
2617 vp2 = vp1 + step
2618 goto LOOP
2619
2620 If there is an inner-loop nested in loop, then step (5) will also be
2621 applied, and an additional update in the inner-loop will be created:
2622
2623 vp0 = &base_addr;
2624 LOOP: vp1 = phi(vp0,vp2)
2625 ...
2626 inner: vp3 = phi(vp1,vp4)
2627 vp4 = vp3 + inner_step
2628 if () goto inner
2629 ...
2630 vp2 = vp1 + step
2631 if () goto LOOP */
2632
2633 /** (3) Calculate the initial address the vector-pointer, and set
2634 the vector-pointer to point to it before the loop: **/
2635
2636 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2637
2638 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2639 offset, loop);
2640 if (new_stmt_list)
2641 {
2642 if (pe)
2643 {
2644 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2645 gcc_assert (!new_bb);
2646 }
2647 else
2648 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2649 }
2650
2651 *initial_address = new_temp;
2652
2653 /* Create: p = (vectype *) initial_base */
2654 vec_stmt = gimple_build_assign (vect_ptr,
2655 fold_convert (vect_ptr_type, new_temp));
2656 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2657 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2658 if (pe)
2659 {
2660 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2661 gcc_assert (!new_bb);
2662 }
2663 else
2664 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2665
2666 /** (4) Handle the updating of the vector-pointer inside the loop.
2667 This is needed when ONLY_INIT is false, and also when AT_LOOP
2668 is the inner-loop nested in LOOP (during outer-loop vectorization).
2669 **/
2670
2671 /* No update in loop is required. */
2672 if (only_init && (!loop_vinfo || at_loop == loop))
2673 {
2674 /* Copy the points-to information if it exists. */
2675 if (DR_PTR_INFO (dr))
2676 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2677 vptr = vect_ptr_init;
2678 }
2679 else
2680 {
2681 /* The step of the vector pointer is the Vector Size. */
2682 tree step = TYPE_SIZE_UNIT (vectype);
2683 /* One exception to the above is when the scalar step of the load in
2684 LOOP is zero. In this case the step here is also zero. */
2685 if (*inv_p)
2686 step = size_zero_node;
2687
2688 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2689
2690 create_iv (vect_ptr_init,
2691 fold_convert (vect_ptr_type, step),
2692 vect_ptr, loop, &incr_gsi, insert_after,
2693 &indx_before_incr, &indx_after_incr);
2694 incr = gsi_stmt (incr_gsi);
2695 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2696
2697 /* Copy the points-to information if it exists. */
2698 if (DR_PTR_INFO (dr))
2699 {
2700 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2701 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2702 }
2703 if (ptr_incr)
2704 *ptr_incr = incr;
2705
2706 vptr = indx_before_incr;
2707 }
2708
2709 if (!nested_in_vect_loop || only_init)
2710 return vptr;
2711
2712
2713 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2714 nested in LOOP, if exists: **/
2715
2716 gcc_assert (nested_in_vect_loop);
2717 if (!only_init)
2718 {
2719 standard_iv_increment_position (containing_loop, &incr_gsi,
2720 &insert_after);
2721 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2722 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2723 &indx_after_incr);
2724 incr = gsi_stmt (incr_gsi);
2725 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2726
2727 /* Copy the points-to information if it exists. */
2728 if (DR_PTR_INFO (dr))
2729 {
2730 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2731 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2732 }
2733 if (ptr_incr)
2734 *ptr_incr = incr;
2735
2736 return indx_before_incr;
2737 }
2738 else
2739 gcc_unreachable ();
2740 }
2741
2742
2743 /* Function bump_vector_ptr
2744
2745 Increment a pointer (to a vector type) by vector-size. If requested,
2746 i.e. if PTR-INCR is given, then also connect the new increment stmt
2747 to the existing def-use update-chain of the pointer, by modifying
2748 the PTR_INCR as illustrated below:
2749
2750 The pointer def-use update-chain before this function:
2751 DATAREF_PTR = phi (p_0, p_2)
2752 ....
2753 PTR_INCR: p_2 = DATAREF_PTR + step
2754
2755 The pointer def-use update-chain after this function:
2756 DATAREF_PTR = phi (p_0, p_2)
2757 ....
2758 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2759 ....
2760 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2761
2762 Input:
2763 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2764 in the loop.
2765 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2766 the loop. The increment amount across iterations is expected
2767 to be vector_size.
2768 BSI - location where the new update stmt is to be placed.
2769 STMT - the original scalar memory-access stmt that is being vectorized.
2770 BUMP - optional. The offset by which to bump the pointer. If not given,
2771 the offset is assumed to be vector_size.
2772
2773 Output: Return NEW_DATAREF_PTR as illustrated above.
2774
2775 */
2776
2777 tree
2778 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2779 gimple stmt, tree bump)
2780 {
2781 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2782 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2783 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2784 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2785 tree update = TYPE_SIZE_UNIT (vectype);
2786 gimple incr_stmt;
2787 ssa_op_iter iter;
2788 use_operand_p use_p;
2789 tree new_dataref_ptr;
2790
2791 if (bump)
2792 update = bump;
2793
2794 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2795 dataref_ptr, update);
2796 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2797 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2798 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2799
2800 /* Copy the points-to information if it exists. */
2801 if (DR_PTR_INFO (dr))
2802 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2803
2804 if (!ptr_incr)
2805 return new_dataref_ptr;
2806
2807 /* Update the vector-pointer's cross-iteration increment. */
2808 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2809 {
2810 tree use = USE_FROM_PTR (use_p);
2811
2812 if (use == dataref_ptr)
2813 SET_USE (use_p, new_dataref_ptr);
2814 else
2815 gcc_assert (tree_int_cst_compare (use, update) == 0);
2816 }
2817
2818 return new_dataref_ptr;
2819 }
2820
2821
2822 /* Function vect_create_destination_var.
2823
2824 Create a new temporary of type VECTYPE. */
2825
2826 tree
2827 vect_create_destination_var (tree scalar_dest, tree vectype)
2828 {
2829 tree vec_dest;
2830 const char *new_name;
2831 tree type;
2832 enum vect_var_kind kind;
2833
2834 kind = vectype ? vect_simple_var : vect_scalar_var;
2835 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2836
2837 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2838
2839 new_name = get_name (scalar_dest);
2840 if (!new_name)
2841 new_name = "var_";
2842 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2843 add_referenced_var (vec_dest);
2844
2845 return vec_dest;
2846 }
2847
2848 /* Function vect_strided_store_supported.
2849
2850 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2851 and FALSE otherwise. */
2852
2853 bool
2854 vect_strided_store_supported (tree vectype)
2855 {
2856 optab interleave_high_optab, interleave_low_optab;
2857 int mode;
2858
2859 mode = (int) TYPE_MODE (vectype);
2860
2861 /* Check that the operation is supported. */
2862 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2863 vectype, optab_default);
2864 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2865 vectype, optab_default);
2866 if (!interleave_high_optab || !interleave_low_optab)
2867 {
2868 if (vect_print_dump_info (REPORT_DETAILS))
2869 fprintf (vect_dump, "no optab for interleave.");
2870 return false;
2871 }
2872
2873 if (optab_handler (interleave_high_optab, mode)->insn_code
2874 == CODE_FOR_nothing
2875 || optab_handler (interleave_low_optab, mode)->insn_code
2876 == CODE_FOR_nothing)
2877 {
2878 if (vect_print_dump_info (REPORT_DETAILS))
2879 fprintf (vect_dump, "interleave op not supported by target.");
2880 return false;
2881 }
2882
2883 return true;
2884 }
2885
2886
2887 /* Function vect_permute_store_chain.
2888
2889 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2890 a power of 2, generate interleave_high/low stmts to reorder the data
2891 correctly for the stores. Return the final references for stores in
2892 RESULT_CHAIN.
2893
2894 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2895 The input is 4 vectors each containing 8 elements. We assign a number to each
2896 element, the input sequence is:
2897
2898 1st vec: 0 1 2 3 4 5 6 7
2899 2nd vec: 8 9 10 11 12 13 14 15
2900 3rd vec: 16 17 18 19 20 21 22 23
2901 4th vec: 24 25 26 27 28 29 30 31
2902
2903 The output sequence should be:
2904
2905 1st vec: 0 8 16 24 1 9 17 25
2906 2nd vec: 2 10 18 26 3 11 19 27
2907 3rd vec: 4 12 20 28 5 13 21 30
2908 4th vec: 6 14 22 30 7 15 23 31
2909
2910 i.e., we interleave the contents of the four vectors in their order.
2911
2912 We use interleave_high/low instructions to create such output. The input of
2913 each interleave_high/low operation is two vectors:
2914 1st vec 2nd vec
2915 0 1 2 3 4 5 6 7
2916 the even elements of the result vector are obtained left-to-right from the
2917 high/low elements of the first vector. The odd elements of the result are
2918 obtained left-to-right from the high/low elements of the second vector.
2919 The output of interleave_high will be: 0 4 1 5
2920 and of interleave_low: 2 6 3 7
2921
2922
2923 The permutation is done in log LENGTH stages. In each stage interleave_high
2924 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2925 where the first argument is taken from the first half of DR_CHAIN and the
2926 second argument from it's second half.
2927 In our example,
2928
2929 I1: interleave_high (1st vec, 3rd vec)
2930 I2: interleave_low (1st vec, 3rd vec)
2931 I3: interleave_high (2nd vec, 4th vec)
2932 I4: interleave_low (2nd vec, 4th vec)
2933
2934 The output for the first stage is:
2935
2936 I1: 0 16 1 17 2 18 3 19
2937 I2: 4 20 5 21 6 22 7 23
2938 I3: 8 24 9 25 10 26 11 27
2939 I4: 12 28 13 29 14 30 15 31
2940
2941 The output of the second stage, i.e. the final result is:
2942
2943 I1: 0 8 16 24 1 9 17 25
2944 I2: 2 10 18 26 3 11 19 27
2945 I3: 4 12 20 28 5 13 21 30
2946 I4: 6 14 22 30 7 15 23 31. */
2947
2948 bool
2949 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2950 unsigned int length,
2951 gimple stmt,
2952 gimple_stmt_iterator *gsi,
2953 VEC(tree,heap) **result_chain)
2954 {
2955 tree perm_dest, vect1, vect2, high, low;
2956 gimple perm_stmt;
2957 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2958 int i;
2959 unsigned int j;
2960 enum tree_code high_code, low_code;
2961
2962 /* Check that the operation is supported. */
2963 if (!vect_strided_store_supported (vectype))
2964 return false;
2965
2966 *result_chain = VEC_copy (tree, heap, dr_chain);
2967
2968 for (i = 0; i < exact_log2 (length); i++)
2969 {
2970 for (j = 0; j < length/2; j++)
2971 {
2972 vect1 = VEC_index (tree, dr_chain, j);
2973 vect2 = VEC_index (tree, dr_chain, j+length/2);
2974
2975 /* Create interleaving stmt:
2976 in the case of big endian:
2977 high = interleave_high (vect1, vect2)
2978 and in the case of little endian:
2979 high = interleave_low (vect1, vect2). */
2980 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2981 DECL_GIMPLE_REG_P (perm_dest) = 1;
2982 add_referenced_var (perm_dest);
2983 if (BYTES_BIG_ENDIAN)
2984 {
2985 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2986 low_code = VEC_INTERLEAVE_LOW_EXPR;
2987 }
2988 else
2989 {
2990 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2991 high_code = VEC_INTERLEAVE_LOW_EXPR;
2992 }
2993 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2994 vect1, vect2);
2995 high = make_ssa_name (perm_dest, perm_stmt);
2996 gimple_assign_set_lhs (perm_stmt, high);
2997 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2998 VEC_replace (tree, *result_chain, 2*j, high);
2999
3000 /* Create interleaving stmt:
3001 in the case of big endian:
3002 low = interleave_low (vect1, vect2)
3003 and in the case of little endian:
3004 low = interleave_high (vect1, vect2). */
3005 perm_dest = create_tmp_var (vectype, "vect_inter_low");
3006 DECL_GIMPLE_REG_P (perm_dest) = 1;
3007 add_referenced_var (perm_dest);
3008 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
3009 vect1, vect2);
3010 low = make_ssa_name (perm_dest, perm_stmt);
3011 gimple_assign_set_lhs (perm_stmt, low);
3012 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3013 VEC_replace (tree, *result_chain, 2*j+1, low);
3014 }
3015 dr_chain = VEC_copy (tree, heap, *result_chain);
3016 }
3017 return true;
3018 }
3019
3020 /* Function vect_setup_realignment
3021
3022 This function is called when vectorizing an unaligned load using
3023 the dr_explicit_realign[_optimized] scheme.
3024 This function generates the following code at the loop prolog:
3025
3026 p = initial_addr;
3027 x msq_init = *(floor(p)); # prolog load
3028 realignment_token = call target_builtin;
3029 loop:
3030 x msq = phi (msq_init, ---)
3031
3032 The stmts marked with x are generated only for the case of
3033 dr_explicit_realign_optimized.
3034
3035 The code above sets up a new (vector) pointer, pointing to the first
3036 location accessed by STMT, and a "floor-aligned" load using that pointer.
3037 It also generates code to compute the "realignment-token" (if the relevant
3038 target hook was defined), and creates a phi-node at the loop-header bb
3039 whose arguments are the result of the prolog-load (created by this
3040 function) and the result of a load that takes place in the loop (to be
3041 created by the caller to this function).
3042
3043 For the case of dr_explicit_realign_optimized:
3044 The caller to this function uses the phi-result (msq) to create the
3045 realignment code inside the loop, and sets up the missing phi argument,
3046 as follows:
3047 loop:
3048 msq = phi (msq_init, lsq)
3049 lsq = *(floor(p')); # load in loop
3050 result = realign_load (msq, lsq, realignment_token);
3051
3052 For the case of dr_explicit_realign:
3053 loop:
3054 msq = *(floor(p)); # load in loop
3055 p' = p + (VS-1);
3056 lsq = *(floor(p')); # load in loop
3057 result = realign_load (msq, lsq, realignment_token);
3058
3059 Input:
3060 STMT - (scalar) load stmt to be vectorized. This load accesses
3061 a memory location that may be unaligned.
3062 BSI - place where new code is to be inserted.
3063 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
3064 is used.
3065
3066 Output:
3067 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
3068 target hook, if defined.
3069 Return value - the result of the loop-header phi node. */
3070
3071 tree
3072 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
3073 tree *realignment_token,
3074 enum dr_alignment_support alignment_support_scheme,
3075 tree init_addr,
3076 struct loop **at_loop)
3077 {
3078 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3079 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3080 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3081 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3082 edge pe;
3083 tree scalar_dest = gimple_assign_lhs (stmt);
3084 tree vec_dest;
3085 gimple inc;
3086 tree ptr;
3087 tree data_ref;
3088 gimple new_stmt;
3089 basic_block new_bb;
3090 tree msq_init = NULL_TREE;
3091 tree new_temp;
3092 gimple phi_stmt;
3093 tree msq = NULL_TREE;
3094 gimple_seq stmts = NULL;
3095 bool inv_p;
3096 bool compute_in_loop = false;
3097 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
3098 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
3099 struct loop *loop_for_initial_load;
3100
3101 gcc_assert (alignment_support_scheme == dr_explicit_realign
3102 || alignment_support_scheme == dr_explicit_realign_optimized);
3103
3104 /* We need to generate three things:
3105 1. the misalignment computation
3106 2. the extra vector load (for the optimized realignment scheme).
3107 3. the phi node for the two vectors from which the realignment is
3108 done (for the optimized realignment scheme).
3109 */
3110
3111 /* 1. Determine where to generate the misalignment computation.
3112
3113 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
3114 calculation will be generated by this function, outside the loop (in the
3115 preheader). Otherwise, INIT_ADDR had already been computed for us by the
3116 caller, inside the loop.
3117
3118 Background: If the misalignment remains fixed throughout the iterations of
3119 the loop, then both realignment schemes are applicable, and also the
3120 misalignment computation can be done outside LOOP. This is because we are
3121 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
3122 are a multiple of VS (the Vector Size), and therefore the misalignment in
3123 different vectorized LOOP iterations is always the same.
3124 The problem arises only if the memory access is in an inner-loop nested
3125 inside LOOP, which is now being vectorized using outer-loop vectorization.
3126 This is the only case when the misalignment of the memory access may not
3127 remain fixed throughout the iterations of the inner-loop (as explained in
3128 detail in vect_supportable_dr_alignment). In this case, not only is the
3129 optimized realignment scheme not applicable, but also the misalignment
3130 computation (and generation of the realignment token that is passed to
3131 REALIGN_LOAD) have to be done inside the loop.
3132
3133 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
3134 or not, which in turn determines if the misalignment is computed inside
3135 the inner-loop, or outside LOOP. */
3136
3137 if (init_addr != NULL_TREE)
3138 {
3139 compute_in_loop = true;
3140 gcc_assert (alignment_support_scheme == dr_explicit_realign);
3141 }
3142
3143
3144 /* 2. Determine where to generate the extra vector load.
3145
3146 For the optimized realignment scheme, instead of generating two vector
3147 loads in each iteration, we generate a single extra vector load in the
3148 preheader of the loop, and in each iteration reuse the result of the
3149 vector load from the previous iteration. In case the memory access is in
3150 an inner-loop nested inside LOOP, which is now being vectorized using
3151 outer-loop vectorization, we need to determine whether this initial vector
3152 load should be generated at the preheader of the inner-loop, or can be
3153 generated at the preheader of LOOP. If the memory access has no evolution
3154 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
3155 to be generated inside LOOP (in the preheader of the inner-loop). */
3156
3157 if (nested_in_vect_loop)
3158 {
3159 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3160 bool invariant_in_outerloop =
3161 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3162 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
3163 }
3164 else
3165 loop_for_initial_load = loop;
3166 if (at_loop)
3167 *at_loop = loop_for_initial_load;
3168
3169 /* 3. For the case of the optimized realignment, create the first vector
3170 load at the loop preheader. */
3171
3172 if (alignment_support_scheme == dr_explicit_realign_optimized)
3173 {
3174 /* Create msq_init = *(floor(p1)) in the loop preheader */
3175
3176 gcc_assert (!compute_in_loop);
3177 pe = loop_preheader_edge (loop_for_initial_load);
3178 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3179 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
3180 &init_addr, &inc, true, &inv_p);
3181 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
3182 new_stmt = gimple_build_assign (vec_dest, data_ref);
3183 new_temp = make_ssa_name (vec_dest, new_stmt);
3184 gimple_assign_set_lhs (new_stmt, new_temp);
3185 mark_symbols_for_renaming (new_stmt);
3186 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3187 gcc_assert (!new_bb);
3188 msq_init = gimple_assign_lhs (new_stmt);
3189 }
3190
3191 /* 4. Create realignment token using a target builtin, if available.
3192 It is done either inside the containing loop, or before LOOP (as
3193 determined above). */
3194
3195 if (targetm.vectorize.builtin_mask_for_load)
3196 {
3197 tree builtin_decl;
3198
3199 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3200 if (compute_in_loop)
3201 gcc_assert (init_addr); /* already computed by the caller. */
3202 else
3203 {
3204 /* Generate the INIT_ADDR computation outside LOOP. */
3205 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3206 NULL_TREE, loop);
3207 pe = loop_preheader_edge (loop);
3208 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3209 gcc_assert (!new_bb);
3210 }
3211
3212 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3213 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3214 vec_dest =
3215 vect_create_destination_var (scalar_dest,
3216 gimple_call_return_type (new_stmt));
3217 new_temp = make_ssa_name (vec_dest, new_stmt);
3218 gimple_call_set_lhs (new_stmt, new_temp);
3219
3220 if (compute_in_loop)
3221 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3222 else
3223 {
3224 /* Generate the misalignment computation outside LOOP. */
3225 pe = loop_preheader_edge (loop);
3226 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3227 gcc_assert (!new_bb);
3228 }
3229
3230 *realignment_token = gimple_call_lhs (new_stmt);
3231
3232 /* The result of the CALL_EXPR to this builtin is determined from
3233 the value of the parameter and no global variables are touched
3234 which makes the builtin a "const" function. Requiring the
3235 builtin to have the "const" attribute makes it unnecessary
3236 to call mark_call_clobbered. */
3237 gcc_assert (TREE_READONLY (builtin_decl));
3238 }
3239
3240 if (alignment_support_scheme == dr_explicit_realign)
3241 return msq;
3242
3243 gcc_assert (!compute_in_loop);
3244 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3245
3246
3247 /* 5. Create msq = phi <msq_init, lsq> in loop */
3248
3249 pe = loop_preheader_edge (containing_loop);
3250 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3251 msq = make_ssa_name (vec_dest, NULL);
3252 phi_stmt = create_phi_node (msq, containing_loop->header);
3253 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3254 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3255
3256 return msq;
3257 }
3258
3259
3260 /* Function vect_strided_load_supported.
3261
3262 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3263 and FALSE otherwise. */
3264
3265 bool
3266 vect_strided_load_supported (tree vectype)
3267 {
3268 optab perm_even_optab, perm_odd_optab;
3269 int mode;
3270
3271 mode = (int) TYPE_MODE (vectype);
3272
3273 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3274 optab_default);
3275 if (!perm_even_optab)
3276 {
3277 if (vect_print_dump_info (REPORT_DETAILS))
3278 fprintf (vect_dump, "no optab for perm_even.");
3279 return false;
3280 }
3281
3282 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3283 {
3284 if (vect_print_dump_info (REPORT_DETAILS))
3285 fprintf (vect_dump, "perm_even op not supported by target.");
3286 return false;
3287 }
3288
3289 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3290 optab_default);
3291 if (!perm_odd_optab)
3292 {
3293 if (vect_print_dump_info (REPORT_DETAILS))
3294 fprintf (vect_dump, "no optab for perm_odd.");
3295 return false;
3296 }
3297
3298 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3299 {
3300 if (vect_print_dump_info (REPORT_DETAILS))
3301 fprintf (vect_dump, "perm_odd op not supported by target.");
3302 return false;
3303 }
3304 return true;
3305 }
3306
3307
3308 /* Function vect_permute_load_chain.
3309
3310 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3311 a power of 2, generate extract_even/odd stmts to reorder the input data
3312 correctly. Return the final references for loads in RESULT_CHAIN.
3313
3314 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3315 The input is 4 vectors each containing 8 elements. We assign a number to each
3316 element, the input sequence is:
3317
3318 1st vec: 0 1 2 3 4 5 6 7
3319 2nd vec: 8 9 10 11 12 13 14 15
3320 3rd vec: 16 17 18 19 20 21 22 23
3321 4th vec: 24 25 26 27 28 29 30 31
3322
3323 The output sequence should be:
3324
3325 1st vec: 0 4 8 12 16 20 24 28
3326 2nd vec: 1 5 9 13 17 21 25 29
3327 3rd vec: 2 6 10 14 18 22 26 30
3328 4th vec: 3 7 11 15 19 23 27 31
3329
3330 i.e., the first output vector should contain the first elements of each
3331 interleaving group, etc.
3332
3333 We use extract_even/odd instructions to create such output. The input of each
3334 extract_even/odd operation is two vectors
3335 1st vec 2nd vec
3336 0 1 2 3 4 5 6 7
3337
3338 and the output is the vector of extracted even/odd elements. The output of
3339 extract_even will be: 0 2 4 6
3340 and of extract_odd: 1 3 5 7
3341
3342
3343 The permutation is done in log LENGTH stages. In each stage extract_even and
3344 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3345 order. In our example,
3346
3347 E1: extract_even (1st vec, 2nd vec)
3348 E2: extract_odd (1st vec, 2nd vec)
3349 E3: extract_even (3rd vec, 4th vec)
3350 E4: extract_odd (3rd vec, 4th vec)
3351
3352 The output for the first stage will be:
3353
3354 E1: 0 2 4 6 8 10 12 14
3355 E2: 1 3 5 7 9 11 13 15
3356 E3: 16 18 20 22 24 26 28 30
3357 E4: 17 19 21 23 25 27 29 31
3358
3359 In order to proceed and create the correct sequence for the next stage (or
3360 for the correct output, if the second stage is the last one, as in our
3361 example), we first put the output of extract_even operation and then the
3362 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3363 The input for the second stage is:
3364
3365 1st vec (E1): 0 2 4 6 8 10 12 14
3366 2nd vec (E3): 16 18 20 22 24 26 28 30
3367 3rd vec (E2): 1 3 5 7 9 11 13 15
3368 4th vec (E4): 17 19 21 23 25 27 29 31
3369
3370 The output of the second stage:
3371
3372 E1: 0 4 8 12 16 20 24 28
3373 E2: 2 6 10 14 18 22 26 30
3374 E3: 1 5 9 13 17 21 25 29
3375 E4: 3 7 11 15 19 23 27 31
3376
3377 And RESULT_CHAIN after reordering:
3378
3379 1st vec (E1): 0 4 8 12 16 20 24 28
3380 2nd vec (E3): 1 5 9 13 17 21 25 29
3381 3rd vec (E2): 2 6 10 14 18 22 26 30
3382 4th vec (E4): 3 7 11 15 19 23 27 31. */
3383
3384 bool
3385 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3386 unsigned int length,
3387 gimple stmt,
3388 gimple_stmt_iterator *gsi,
3389 VEC(tree,heap) **result_chain)
3390 {
3391 tree perm_dest, data_ref, first_vect, second_vect;
3392 gimple perm_stmt;
3393 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3394 int i;
3395 unsigned int j;
3396
3397 /* Check that the operation is supported. */
3398 if (!vect_strided_load_supported (vectype))
3399 return false;
3400
3401 *result_chain = VEC_copy (tree, heap, dr_chain);
3402 for (i = 0; i < exact_log2 (length); i++)
3403 {
3404 for (j = 0; j < length; j +=2)
3405 {
3406 first_vect = VEC_index (tree, dr_chain, j);
3407 second_vect = VEC_index (tree, dr_chain, j+1);
3408
3409 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3410 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3411 DECL_GIMPLE_REG_P (perm_dest) = 1;
3412 add_referenced_var (perm_dest);
3413
3414 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3415 perm_dest, first_vect,
3416 second_vect);
3417
3418 data_ref = make_ssa_name (perm_dest, perm_stmt);
3419 gimple_assign_set_lhs (perm_stmt, data_ref);
3420 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3421 mark_symbols_for_renaming (perm_stmt);
3422
3423 VEC_replace (tree, *result_chain, j/2, data_ref);
3424
3425 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3426 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3427 DECL_GIMPLE_REG_P (perm_dest) = 1;
3428 add_referenced_var (perm_dest);
3429
3430 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3431 perm_dest, first_vect,
3432 second_vect);
3433 data_ref = make_ssa_name (perm_dest, perm_stmt);
3434 gimple_assign_set_lhs (perm_stmt, data_ref);
3435 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3436 mark_symbols_for_renaming (perm_stmt);
3437
3438 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3439 }
3440 dr_chain = VEC_copy (tree, heap, *result_chain);
3441 }
3442 return true;
3443 }
3444
3445
3446 /* Function vect_transform_strided_load.
3447
3448 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3449 to perform their permutation and ascribe the result vectorized statements to
3450 the scalar statements.
3451 */
3452
3453 bool
3454 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3455 gimple_stmt_iterator *gsi)
3456 {
3457 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3458 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3459 gimple next_stmt, new_stmt;
3460 VEC(tree,heap) *result_chain = NULL;
3461 unsigned int i, gap_count;
3462 tree tmp_data_ref;
3463
3464 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3465 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3466 vectors, that are ready for vector computation. */
3467 result_chain = VEC_alloc (tree, heap, size);
3468 /* Permute. */
3469 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3470 return false;
3471
3472 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3473 Since we scan the chain starting from it's first node, their order
3474 corresponds the order of data-refs in RESULT_CHAIN. */
3475 next_stmt = first_stmt;
3476 gap_count = 1;
3477 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3478 {
3479 if (!next_stmt)
3480 break;
3481
3482 /* Skip the gaps. Loads created for the gaps will be removed by dead
3483 code elimination pass later. No need to check for the first stmt in
3484 the group, since it always exists.
3485 DR_GROUP_GAP is the number of steps in elements from the previous
3486 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3487 correspond to the gaps.
3488 */
3489 if (next_stmt != first_stmt
3490 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3491 {
3492 gap_count++;
3493 continue;
3494 }
3495
3496 while (next_stmt)
3497 {
3498 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3499 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3500 copies, and we put the new vector statement in the first available
3501 RELATED_STMT. */
3502 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3503 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3504 else
3505 {
3506 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3507 {
3508 gimple prev_stmt =
3509 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3510 gimple rel_stmt =
3511 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3512 while (rel_stmt)
3513 {
3514 prev_stmt = rel_stmt;
3515 rel_stmt =
3516 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3517 }
3518
3519 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3520 new_stmt;
3521 }
3522 }
3523
3524 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3525 gap_count = 1;
3526 /* If NEXT_STMT accesses the same DR as the previous statement,
3527 put the same TMP_DATA_REF as its vectorized statement; otherwise
3528 get the next data-ref from RESULT_CHAIN. */
3529 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3530 break;
3531 }
3532 }
3533
3534 VEC_free (tree, heap, result_chain);
3535 return true;
3536 }
3537
3538 /* Function vect_force_dr_alignment_p.
3539
3540 Returns whether the alignment of a DECL can be forced to be aligned
3541 on ALIGNMENT bit boundary. */
3542
3543 bool
3544 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3545 {
3546 if (TREE_CODE (decl) != VAR_DECL)
3547 return false;
3548
3549 if (DECL_EXTERNAL (decl))
3550 return false;
3551
3552 if (TREE_ASM_WRITTEN (decl))
3553 return false;
3554
3555 if (TREE_STATIC (decl))
3556 return (alignment <= MAX_OFILE_ALIGNMENT);
3557 else
3558 return (alignment <= MAX_STACK_ALIGNMENT);
3559 }
3560
3561 /* Function vect_supportable_dr_alignment
3562
3563 Return whether the data reference DR is supported with respect to its
3564 alignment. */
3565
3566 enum dr_alignment_support
3567 vect_supportable_dr_alignment (struct data_reference *dr)
3568 {
3569 gimple stmt = DR_STMT (dr);
3570 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3571 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3572 enum machine_mode mode = TYPE_MODE (vectype);
3573 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3574 struct loop *vect_loop = NULL;
3575 bool nested_in_vect_loop = false;
3576
3577 if (aligned_access_p (dr))
3578 return dr_aligned;
3579
3580 if (!loop_vinfo)
3581 /* FORNOW: Misaligned accesses are supported only in loops. */
3582 return dr_unaligned_unsupported;
3583
3584 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3585 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3586
3587 /* Possibly unaligned access. */
3588
3589 /* We can choose between using the implicit realignment scheme (generating
3590 a misaligned_move stmt) and the explicit realignment scheme (generating
3591 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3592 realignment scheme: optimized, and unoptimized.
3593 We can optimize the realignment only if the step between consecutive
3594 vector loads is equal to the vector size. Since the vector memory
3595 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3596 is guaranteed that the misalignment amount remains the same throughout the
3597 execution of the vectorized loop. Therefore, we can create the
3598 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3599 at the loop preheader.
3600
3601 However, in the case of outer-loop vectorization, when vectorizing a
3602 memory access in the inner-loop nested within the LOOP that is now being
3603 vectorized, while it is guaranteed that the misalignment of the
3604 vectorized memory access will remain the same in different outer-loop
3605 iterations, it is *not* guaranteed that is will remain the same throughout
3606 the execution of the inner-loop. This is because the inner-loop advances
3607 with the original scalar step (and not in steps of VS). If the inner-loop
3608 step happens to be a multiple of VS, then the misalignment remains fixed
3609 and we can use the optimized realignment scheme. For example:
3610
3611 for (i=0; i<N; i++)
3612 for (j=0; j<M; j++)
3613 s += a[i+j];
3614
3615 When vectorizing the i-loop in the above example, the step between
3616 consecutive vector loads is 1, and so the misalignment does not remain
3617 fixed across the execution of the inner-loop, and the realignment cannot
3618 be optimized (as illustrated in the following pseudo vectorized loop):
3619
3620 for (i=0; i<N; i+=4)
3621 for (j=0; j<M; j++){
3622 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3623 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3624 // (assuming that we start from an aligned address).
3625 }
3626
3627 We therefore have to use the unoptimized realignment scheme:
3628
3629 for (i=0; i<N; i+=4)
3630 for (j=k; j<M; j+=4)
3631 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3632 // that the misalignment of the initial address is
3633 // 0).
3634
3635 The loop can then be vectorized as follows:
3636
3637 for (k=0; k<4; k++){
3638 rt = get_realignment_token (&vp[k]);
3639 for (i=0; i<N; i+=4){
3640 v1 = vp[i+k];
3641 for (j=k; j<M; j+=4){
3642 v2 = vp[i+j+VS-1];
3643 va = REALIGN_LOAD <v1,v2,rt>;
3644 vs += va;
3645 v1 = v2;
3646 }
3647 }
3648 } */
3649
3650 if (DR_IS_READ (dr))
3651 {
3652 bool is_packed = false;
3653 tree type = (TREE_TYPE (DR_REF (dr)));
3654
3655 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3656 CODE_FOR_nothing
3657 && (!targetm.vectorize.builtin_mask_for_load
3658 || targetm.vectorize.builtin_mask_for_load ()))
3659 {
3660 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3661 if (nested_in_vect_loop
3662 && (TREE_INT_CST_LOW (DR_STEP (dr))
3663 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3664 return dr_explicit_realign;
3665 else
3666 return dr_explicit_realign_optimized;
3667 }
3668 if (!known_alignment_for_access_p (dr))
3669 {
3670 tree ba = DR_BASE_OBJECT (dr);
3671
3672 if (ba)
3673 is_packed = contains_packed_reference (ba);
3674 }
3675
3676 if (targetm.vectorize.
3677 support_vector_misalignment (mode, type,
3678 DR_MISALIGNMENT (dr), is_packed))
3679 /* Can't software pipeline the loads, but can at least do them. */
3680 return dr_unaligned_supported;
3681 }
3682 else
3683 {
3684 bool is_packed = false;
3685 tree type = (TREE_TYPE (DR_REF (dr)));
3686
3687 if (!known_alignment_for_access_p (dr))
3688 {
3689 tree ba = DR_BASE_OBJECT (dr);
3690
3691 if (ba)
3692 is_packed = contains_packed_reference (ba);
3693 }
3694
3695 if (targetm.vectorize.
3696 support_vector_misalignment (mode, type,
3697 DR_MISALIGNMENT (dr), is_packed))
3698 return dr_unaligned_supported;
3699 }
3700
3701 /* Unsupported. */
3702 return dr_unaligned_unsupported;
3703 }