]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-data-refs.c
PR target/42811 (prerequisite)
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "toplev.h"
41
42
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
56 check the rhs.
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58 types. */
59
60 tree
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
63 {
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
66
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
68
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
73 {
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
75
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77 if (rhs < lhs)
78 scalar_type = rhs_type;
79 }
80
81 *lhs_size_unit = lhs;
82 *rhs_size_unit = rhs;
83 return scalar_type;
84 }
85
86
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
89
90 int
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
92 {
93 gimple next_stmt = first_stmt;
94 int result = 0;
95
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97 return -1;
98
99 while (next_stmt && next_stmt != stmt)
100 {
101 result++;
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
103 }
104
105 if (next_stmt)
106 return result;
107 else
108 return -1;
109 }
110
111
112 /* Function vect_insert_into_interleaving_chain.
113
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
115
116 static void
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
119 {
120 gimple prev, next;
121 tree next_init;
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
124
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127 while (next)
128 {
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
131 {
132 /* Insert here. */
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 return;
136 }
137 prev = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
139 }
140
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
144 }
145
146
147 /* Function vect_update_interleaving_chain.
148
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
151
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
154 FIRST_DR = DRB
155 NEXT_DR (DRB) = DRA
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
162 FIRST_DR = DRB
163 NEXT(FIRST_DR) = previous FIRST_DR
164 else
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
169
170 static void
171 vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
173 {
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
178 tree node_init;
179 gimple node, prev, next, first_stmt;
180
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
183 {
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187 return;
188 }
189
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
192 {
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
196 return;
197 }
198
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
201 {
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 old_first_stmt)));
205 gimple tmp;
206
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
208 {
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
214
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
217 while (tmp)
218 {
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
221 }
222 }
223 else
224 {
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
228 }
229 return;
230 }
231
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
236 return;
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
239
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
241 {
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
248 }
249 else
250 {
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
257 }
258
259 while (node)
260 {
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263 while (next)
264 {
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
267 {
268 /* Insert here. */
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 prev = node;
272 break;
273 }
274 prev = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
276 }
277 if (!next)
278 {
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 prev = node;
283 }
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
286 }
287 }
288
289
290 /* Function vect_equal_offsets.
291
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
293
294 static bool
295 vect_equal_offsets (tree offset1, tree offset2)
296 {
297 bool res;
298
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
301
302 if (offset1 == offset2)
303 return true;
304
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
307 return false;
308
309 res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
310 TREE_OPERAND (offset2, 0));
311
312 if (!res || !BINARY_CLASS_P (offset1))
313 return res;
314
315 res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
316 TREE_OPERAND (offset2, 1));
317
318 return res;
319 }
320
321
322 /* Function vect_check_interleaving.
323
324 Check if DRA and DRB are a part of interleaving. In case they are, insert
325 DRA and DRB in an interleaving chain. */
326
327 static bool
328 vect_check_interleaving (struct data_reference *dra,
329 struct data_reference *drb)
330 {
331 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
332
333 /* Check that the data-refs have same first location (except init) and they
334 are both either store or load (not load and store). */
335 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
336 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
337 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
338 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
339 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
340 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
341 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
342 || DR_IS_READ (dra) != DR_IS_READ (drb))
343 return false;
344
345 /* Check:
346 1. data-refs are of the same type
347 2. their steps are equal
348 3. the step (if greater than zero) is greater than the difference between
349 data-refs' inits. */
350 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
351 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
352
353 if (type_size_a != type_size_b
354 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
355 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
356 TREE_TYPE (DR_REF (drb))))
357 return false;
358
359 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
360 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
361 step = TREE_INT_CST_LOW (DR_STEP (dra));
362
363 if (init_a > init_b)
364 {
365 /* If init_a == init_b + the size of the type * k, we have an interleaving,
366 and DRB is accessed before DRA. */
367 diff_mod_size = (init_a - init_b) % type_size_a;
368
369 if (step && (init_a - init_b) > step)
370 return false;
371
372 if (diff_mod_size == 0)
373 {
374 vect_update_interleaving_chain (drb, dra);
375 if (vect_print_dump_info (REPORT_DR_DETAILS))
376 {
377 fprintf (vect_dump, "Detected interleaving ");
378 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
379 fprintf (vect_dump, " and ");
380 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
381 }
382 return true;
383 }
384 }
385 else
386 {
387 /* If init_b == init_a + the size of the type * k, we have an
388 interleaving, and DRA is accessed before DRB. */
389 diff_mod_size = (init_b - init_a) % type_size_a;
390
391 if (step && (init_b - init_a) > step)
392 return false;
393
394 if (diff_mod_size == 0)
395 {
396 vect_update_interleaving_chain (dra, drb);
397 if (vect_print_dump_info (REPORT_DR_DETAILS))
398 {
399 fprintf (vect_dump, "Detected interleaving ");
400 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
401 fprintf (vect_dump, " and ");
402 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
403 }
404 return true;
405 }
406 }
407
408 return false;
409 }
410
411 /* Check if data references pointed by DR_I and DR_J are same or
412 belong to same interleaving group. Return FALSE if drs are
413 different, otherwise return TRUE. */
414
415 static bool
416 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
417 {
418 gimple stmt_i = DR_STMT (dr_i);
419 gimple stmt_j = DR_STMT (dr_j);
420
421 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
422 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
423 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
424 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
425 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
426 return true;
427 else
428 return false;
429 }
430
431 /* If address ranges represented by DDR_I and DDR_J are equal,
432 return TRUE, otherwise return FALSE. */
433
434 static bool
435 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
436 {
437 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
438 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
439 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
440 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
441 return true;
442 else
443 return false;
444 }
445
446 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
447 tested at run-time. Return TRUE if DDR was successfully inserted.
448 Return false if versioning is not supported. */
449
450 static bool
451 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
452 {
453 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
454
455 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
456 return false;
457
458 if (vect_print_dump_info (REPORT_DR_DETAILS))
459 {
460 fprintf (vect_dump, "mark for run-time aliasing test between ");
461 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
462 fprintf (vect_dump, " and ");
463 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
464 }
465
466 if (optimize_loop_nest_for_size_p (loop))
467 {
468 if (vect_print_dump_info (REPORT_DR_DETAILS))
469 fprintf (vect_dump, "versioning not supported when optimizing for size.");
470 return false;
471 }
472
473 /* FORNOW: We don't support versioning with outer-loop vectorization. */
474 if (loop->inner)
475 {
476 if (vect_print_dump_info (REPORT_DR_DETAILS))
477 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
478 return false;
479 }
480
481 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
482 return true;
483 }
484
485
486 /* Function vect_analyze_data_ref_dependence.
487
488 Return TRUE if there (might) exist a dependence between a memory-reference
489 DRA and a memory-reference DRB. When versioning for alias may check a
490 dependence at run-time, return FALSE. */
491
492 static bool
493 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
494 loop_vec_info loop_vinfo)
495 {
496 unsigned int i;
497 struct loop *loop = NULL;
498 int vectorization_factor = 0;
499 struct data_reference *dra = DDR_A (ddr);
500 struct data_reference *drb = DDR_B (ddr);
501 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
502 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
503 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
504 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
505 lambda_vector dist_v;
506 unsigned int loop_depth;
507
508 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
509 {
510 /* Independent data accesses. */
511 vect_check_interleaving (dra, drb);
512 return false;
513 }
514
515 if (loop_vinfo)
516 {
517 loop = LOOP_VINFO_LOOP (loop_vinfo);
518 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
519 }
520
521 if ((DR_IS_READ (dra) && DR_IS_READ (drb) && loop_vinfo) || dra == drb)
522 return false;
523
524 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
525 {
526 if (loop_vinfo)
527 {
528 if (vect_print_dump_info (REPORT_DR_DETAILS))
529 {
530 fprintf (vect_dump, "versioning for alias required: "
531 "can't determine dependence between ");
532 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
533 fprintf (vect_dump, " and ");
534 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
535 }
536
537 /* Add to list of ddrs that need to be tested at run-time. */
538 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
539 }
540
541 /* When vectorizing a basic block unknown depnedence can still mean
542 strided access. */
543 if (vect_check_interleaving (dra, drb))
544 return false;
545
546 if (vect_print_dump_info (REPORT_DR_DETAILS))
547 {
548 fprintf (vect_dump, "can't determine dependence between ");
549 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
550 fprintf (vect_dump, " and ");
551 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
552 }
553
554 return true;
555 }
556
557 /* Versioning for alias is not yet supported for basic block SLP, and
558 dependence distance is unapplicable, hence, in case of known data
559 dependence, basic block vectorization is impossible for now. */
560 if (!loop_vinfo)
561 {
562 if (dra != drb && vect_check_interleaving (dra, drb))
563 return false;
564
565 if (vect_print_dump_info (REPORT_DR_DETAILS))
566 {
567 fprintf (vect_dump, "determined dependence between ");
568 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
569 fprintf (vect_dump, " and ");
570 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
571 }
572
573 return true;
574 }
575
576 /* Loop-based vectorization and known data dependence. */
577 if (DDR_NUM_DIST_VECTS (ddr) == 0)
578 {
579 if (vect_print_dump_info (REPORT_DR_DETAILS))
580 {
581 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
582 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
583 fprintf (vect_dump, " and ");
584 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
585 }
586 /* Add to list of ddrs that need to be tested at run-time. */
587 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
588 }
589
590 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
591 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
592 {
593 int dist = dist_v[loop_depth];
594
595 if (vect_print_dump_info (REPORT_DR_DETAILS))
596 fprintf (vect_dump, "dependence distance = %d.", dist);
597
598 /* Same loop iteration. */
599 if (dist % vectorization_factor == 0 && dra_size == drb_size)
600 {
601 /* Two references with distance zero have the same alignment. */
602 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
603 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
604 if (vect_print_dump_info (REPORT_ALIGNMENT))
605 fprintf (vect_dump, "accesses have the same alignment.");
606 if (vect_print_dump_info (REPORT_DR_DETAILS))
607 {
608 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
609 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
610 fprintf (vect_dump, " and ");
611 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
612 }
613
614 /* For interleaving, mark that there is a read-write dependency if
615 necessary. We check before that one of the data-refs is store. */
616 if (DR_IS_READ (dra))
617 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
618 else
619 {
620 if (DR_IS_READ (drb))
621 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
622 }
623
624 continue;
625 }
626
627 if (abs (dist) >= vectorization_factor
628 || (dist > 0 && DDR_REVERSED_P (ddr)))
629 {
630 /* Dependence distance does not create dependence, as far as
631 vectorization is concerned, in this case. If DDR_REVERSED_P the
632 order of the data-refs in DDR was reversed (to make distance
633 vector positive), and the actual distance is negative. */
634 if (vect_print_dump_info (REPORT_DR_DETAILS))
635 fprintf (vect_dump, "dependence distance >= VF or negative.");
636 continue;
637 }
638
639 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
640 {
641 fprintf (vect_dump, "not vectorized, possible dependence "
642 "between data-refs ");
643 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
644 fprintf (vect_dump, " and ");
645 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
646 }
647
648 return true;
649 }
650
651 return false;
652 }
653
654 /* Function vect_analyze_data_ref_dependences.
655
656 Examine all the data references in the loop, and make sure there do not
657 exist any data dependences between them. */
658
659 bool
660 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo,
661 bb_vec_info bb_vinfo)
662 {
663 unsigned int i;
664 VEC (ddr_p, heap) *ddrs = NULL;
665 struct data_dependence_relation *ddr;
666
667 if (vect_print_dump_info (REPORT_DETAILS))
668 fprintf (vect_dump, "=== vect_analyze_dependences ===");
669
670 if (loop_vinfo)
671 ddrs = LOOP_VINFO_DDRS (loop_vinfo);
672 else
673 ddrs = BB_VINFO_DDRS (bb_vinfo);
674
675 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
676 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
677 return false;
678
679 return true;
680 }
681
682
683 /* Function vect_compute_data_ref_alignment
684
685 Compute the misalignment of the data reference DR.
686
687 Output:
688 1. If during the misalignment computation it is found that the data reference
689 cannot be vectorized then false is returned.
690 2. DR_MISALIGNMENT (DR) is defined.
691
692 FOR NOW: No analysis is actually performed. Misalignment is calculated
693 only for trivial cases. TODO. */
694
695 static bool
696 vect_compute_data_ref_alignment (struct data_reference *dr)
697 {
698 gimple stmt = DR_STMT (dr);
699 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
701 struct loop *loop = NULL;
702 tree ref = DR_REF (dr);
703 tree vectype;
704 tree base, base_addr;
705 bool base_aligned;
706 tree misalign;
707 tree aligned_to, alignment;
708
709 if (vect_print_dump_info (REPORT_DETAILS))
710 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
711
712 if (loop_vinfo)
713 loop = LOOP_VINFO_LOOP (loop_vinfo);
714
715 /* Initialize misalignment to unknown. */
716 SET_DR_MISALIGNMENT (dr, -1);
717
718 misalign = DR_INIT (dr);
719 aligned_to = DR_ALIGNED_TO (dr);
720 base_addr = DR_BASE_ADDRESS (dr);
721 vectype = STMT_VINFO_VECTYPE (stmt_info);
722
723 /* In case the dataref is in an inner-loop of the loop that is being
724 vectorized (LOOP), we use the base and misalignment information
725 relative to the outer-loop (LOOP). This is ok only if the misalignment
726 stays the same throughout the execution of the inner-loop, which is why
727 we have to check that the stride of the dataref in the inner-loop evenly
728 divides by the vector size. */
729 if (loop && nested_in_vect_loop_p (loop, stmt))
730 {
731 tree step = DR_STEP (dr);
732 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
733
734 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
735 {
736 if (vect_print_dump_info (REPORT_ALIGNMENT))
737 fprintf (vect_dump, "inner step divides the vector-size.");
738 misalign = STMT_VINFO_DR_INIT (stmt_info);
739 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
740 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
741 }
742 else
743 {
744 if (vect_print_dump_info (REPORT_ALIGNMENT))
745 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
746 misalign = NULL_TREE;
747 }
748 }
749
750 base = build_fold_indirect_ref (base_addr);
751 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
752
753 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
754 || !misalign)
755 {
756 if (vect_print_dump_info (REPORT_ALIGNMENT))
757 {
758 fprintf (vect_dump, "Unknown alignment for access: ");
759 print_generic_expr (vect_dump, base, TDF_SLIM);
760 }
761 return true;
762 }
763
764 if ((DECL_P (base)
765 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
766 alignment) >= 0)
767 || (TREE_CODE (base_addr) == SSA_NAME
768 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
769 TREE_TYPE (base_addr)))),
770 alignment) >= 0))
771 base_aligned = true;
772 else
773 base_aligned = false;
774
775 if (!base_aligned)
776 {
777 /* Do not change the alignment of global variables if
778 flag_section_anchors is enabled. */
779 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
780 || (TREE_STATIC (base) && flag_section_anchors))
781 {
782 if (vect_print_dump_info (REPORT_DETAILS))
783 {
784 fprintf (vect_dump, "can't force alignment of ref: ");
785 print_generic_expr (vect_dump, ref, TDF_SLIM);
786 }
787 return true;
788 }
789
790 /* Force the alignment of the decl.
791 NOTE: This is the only change to the code we make during
792 the analysis phase, before deciding to vectorize the loop. */
793 if (vect_print_dump_info (REPORT_DETAILS))
794 fprintf (vect_dump, "force alignment");
795 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
796 DECL_USER_ALIGN (base) = 1;
797 }
798
799 /* At this point we assume that the base is aligned. */
800 gcc_assert (base_aligned
801 || (TREE_CODE (base) == VAR_DECL
802 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
803
804 /* Modulo alignment. */
805 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
806
807 if (!host_integerp (misalign, 1))
808 {
809 /* Negative or overflowed misalignment value. */
810 if (vect_print_dump_info (REPORT_DETAILS))
811 fprintf (vect_dump, "unexpected misalign value");
812 return false;
813 }
814
815 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
816
817 if (vect_print_dump_info (REPORT_DETAILS))
818 {
819 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
820 print_generic_expr (vect_dump, ref, TDF_SLIM);
821 }
822
823 return true;
824 }
825
826
827 /* Function vect_compute_data_refs_alignment
828
829 Compute the misalignment of data references in the loop.
830 Return FALSE if a data reference is found that cannot be vectorized. */
831
832 static bool
833 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo,
834 bb_vec_info bb_vinfo)
835 {
836 VEC (data_reference_p, heap) *datarefs;
837 struct data_reference *dr;
838 unsigned int i;
839
840 if (loop_vinfo)
841 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
842 else
843 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
844
845 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
846 if (!vect_compute_data_ref_alignment (dr))
847 return false;
848
849 return true;
850 }
851
852
853 /* Function vect_update_misalignment_for_peel
854
855 DR - the data reference whose misalignment is to be adjusted.
856 DR_PEEL - the data reference whose misalignment is being made
857 zero in the vector loop by the peel.
858 NPEEL - the number of iterations in the peel loop if the misalignment
859 of DR_PEEL is known at compile time. */
860
861 static void
862 vect_update_misalignment_for_peel (struct data_reference *dr,
863 struct data_reference *dr_peel, int npeel)
864 {
865 unsigned int i;
866 VEC(dr_p,heap) *same_align_drs;
867 struct data_reference *current_dr;
868 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
869 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
870 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
871 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
872
873 /* For interleaved data accesses the step in the loop must be multiplied by
874 the size of the interleaving group. */
875 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
876 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
877 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
878 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
879
880 /* It can be assumed that the data refs with the same alignment as dr_peel
881 are aligned in the vector loop. */
882 same_align_drs
883 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
884 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
885 {
886 if (current_dr != dr)
887 continue;
888 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
889 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
890 SET_DR_MISALIGNMENT (dr, 0);
891 return;
892 }
893
894 if (known_alignment_for_access_p (dr)
895 && known_alignment_for_access_p (dr_peel))
896 {
897 int misal = DR_MISALIGNMENT (dr);
898 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
899 misal += npeel * dr_size;
900 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
901 SET_DR_MISALIGNMENT (dr, misal);
902 return;
903 }
904
905 if (vect_print_dump_info (REPORT_DETAILS))
906 fprintf (vect_dump, "Setting misalignment to -1.");
907 SET_DR_MISALIGNMENT (dr, -1);
908 }
909
910
911 /* Function vect_verify_datarefs_alignment
912
913 Return TRUE if all data references in the loop can be
914 handled with respect to alignment. */
915
916 bool
917 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
918 {
919 VEC (data_reference_p, heap) *datarefs;
920 struct data_reference *dr;
921 enum dr_alignment_support supportable_dr_alignment;
922 unsigned int i;
923
924 if (loop_vinfo)
925 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
926 else
927 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
928
929 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
930 {
931 gimple stmt = DR_STMT (dr);
932 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
933
934 /* For interleaving, only the alignment of the first access matters. */
935 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
936 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
937 continue;
938
939 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
940 if (!supportable_dr_alignment)
941 {
942 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
943 {
944 if (DR_IS_READ (dr))
945 fprintf (vect_dump,
946 "not vectorized: unsupported unaligned load.");
947 else
948 fprintf (vect_dump,
949 "not vectorized: unsupported unaligned store.");
950 }
951 return false;
952 }
953 if (supportable_dr_alignment != dr_aligned
954 && vect_print_dump_info (REPORT_ALIGNMENT))
955 fprintf (vect_dump, "Vectorizing an unaligned access.");
956 }
957 return true;
958 }
959
960
961 /* Function vector_alignment_reachable_p
962
963 Return true if vector alignment for DR is reachable by peeling
964 a few loop iterations. Return false otherwise. */
965
966 static bool
967 vector_alignment_reachable_p (struct data_reference *dr)
968 {
969 gimple stmt = DR_STMT (dr);
970 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
971 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
972
973 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
974 {
975 /* For interleaved access we peel only if number of iterations in
976 the prolog loop ({VF - misalignment}), is a multiple of the
977 number of the interleaved accesses. */
978 int elem_size, mis_in_elements;
979 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
980
981 /* FORNOW: handle only known alignment. */
982 if (!known_alignment_for_access_p (dr))
983 return false;
984
985 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
986 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
987
988 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
989 return false;
990 }
991
992 /* If misalignment is known at the compile time then allow peeling
993 only if natural alignment is reachable through peeling. */
994 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
995 {
996 HOST_WIDE_INT elmsize =
997 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
998 if (vect_print_dump_info (REPORT_DETAILS))
999 {
1000 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
1001 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
1002 }
1003 if (DR_MISALIGNMENT (dr) % elmsize)
1004 {
1005 if (vect_print_dump_info (REPORT_DETAILS))
1006 fprintf (vect_dump, "data size does not divide the misalignment.\n");
1007 return false;
1008 }
1009 }
1010
1011 if (!known_alignment_for_access_p (dr))
1012 {
1013 tree type = (TREE_TYPE (DR_REF (dr)));
1014 tree ba = DR_BASE_OBJECT (dr);
1015 bool is_packed = false;
1016
1017 if (ba)
1018 is_packed = contains_packed_reference (ba);
1019
1020 if (vect_print_dump_info (REPORT_DETAILS))
1021 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
1022 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
1023 return true;
1024 else
1025 return false;
1026 }
1027
1028 return true;
1029 }
1030
1031 /* Function vect_enhance_data_refs_alignment
1032
1033 This pass will use loop versioning and loop peeling in order to enhance
1034 the alignment of data references in the loop.
1035
1036 FOR NOW: we assume that whatever versioning/peeling takes place, only the
1037 original loop is to be vectorized; Any other loops that are created by
1038 the transformations performed in this pass - are not supposed to be
1039 vectorized. This restriction will be relaxed.
1040
1041 This pass will require a cost model to guide it whether to apply peeling
1042 or versioning or a combination of the two. For example, the scheme that
1043 intel uses when given a loop with several memory accesses, is as follows:
1044 choose one memory access ('p') which alignment you want to force by doing
1045 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
1046 other accesses are not necessarily aligned, or (2) use loop versioning to
1047 generate one loop in which all accesses are aligned, and another loop in
1048 which only 'p' is necessarily aligned.
1049
1050 ("Automatic Intra-Register Vectorization for the Intel Architecture",
1051 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
1052 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
1053
1054 Devising a cost model is the most critical aspect of this work. It will
1055 guide us on which access to peel for, whether to use loop versioning, how
1056 many versions to create, etc. The cost model will probably consist of
1057 generic considerations as well as target specific considerations (on
1058 powerpc for example, misaligned stores are more painful than misaligned
1059 loads).
1060
1061 Here are the general steps involved in alignment enhancements:
1062
1063 -- original loop, before alignment analysis:
1064 for (i=0; i<N; i++){
1065 x = q[i]; # DR_MISALIGNMENT(q) = unknown
1066 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1067 }
1068
1069 -- After vect_compute_data_refs_alignment:
1070 for (i=0; i<N; i++){
1071 x = q[i]; # DR_MISALIGNMENT(q) = 3
1072 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1073 }
1074
1075 -- Possibility 1: we do loop versioning:
1076 if (p is aligned) {
1077 for (i=0; i<N; i++){ # loop 1A
1078 x = q[i]; # DR_MISALIGNMENT(q) = 3
1079 p[i] = y; # DR_MISALIGNMENT(p) = 0
1080 }
1081 }
1082 else {
1083 for (i=0; i<N; i++){ # loop 1B
1084 x = q[i]; # DR_MISALIGNMENT(q) = 3
1085 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1086 }
1087 }
1088
1089 -- Possibility 2: we do loop peeling:
1090 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1091 x = q[i];
1092 p[i] = y;
1093 }
1094 for (i = 3; i < N; i++){ # loop 2A
1095 x = q[i]; # DR_MISALIGNMENT(q) = 0
1096 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1097 }
1098
1099 -- Possibility 3: combination of loop peeling and versioning:
1100 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1101 x = q[i];
1102 p[i] = y;
1103 }
1104 if (p is aligned) {
1105 for (i = 3; i<N; i++){ # loop 3A
1106 x = q[i]; # DR_MISALIGNMENT(q) = 0
1107 p[i] = y; # DR_MISALIGNMENT(p) = 0
1108 }
1109 }
1110 else {
1111 for (i = 3; i<N; i++){ # loop 3B
1112 x = q[i]; # DR_MISALIGNMENT(q) = 0
1113 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1114 }
1115 }
1116
1117 These loops are later passed to loop_transform to be vectorized. The
1118 vectorizer will use the alignment information to guide the transformation
1119 (whether to generate regular loads/stores, or with special handling for
1120 misalignment). */
1121
1122 bool
1123 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1124 {
1125 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1126 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1127 enum dr_alignment_support supportable_dr_alignment;
1128 struct data_reference *dr0 = NULL;
1129 struct data_reference *dr;
1130 unsigned int i;
1131 bool do_peeling = false;
1132 bool do_versioning = false;
1133 bool stat;
1134 gimple stmt;
1135 stmt_vec_info stmt_info;
1136 int vect_versioning_for_alias_required;
1137
1138 if (vect_print_dump_info (REPORT_DETAILS))
1139 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1140
1141 /* While cost model enhancements are expected in the future, the high level
1142 view of the code at this time is as follows:
1143
1144 A) If there is a misaligned access then see if peeling to align
1145 this access can make all data references satisfy
1146 vect_supportable_dr_alignment. If so, update data structures
1147 as needed and return true.
1148
1149 B) If peeling wasn't possible and there is a data reference with an
1150 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1151 then see if loop versioning checks can be used to make all data
1152 references satisfy vect_supportable_dr_alignment. If so, update
1153 data structures as needed and return true.
1154
1155 C) If neither peeling nor versioning were successful then return false if
1156 any data reference does not satisfy vect_supportable_dr_alignment.
1157
1158 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1159
1160 Note, Possibility 3 above (which is peeling and versioning together) is not
1161 being done at this time. */
1162
1163 /* (1) Peeling to force alignment. */
1164
1165 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1166 Considerations:
1167 + How many accesses will become aligned due to the peeling
1168 - How many accesses will become unaligned due to the peeling,
1169 and the cost of misaligned accesses.
1170 - The cost of peeling (the extra runtime checks, the increase
1171 in code size).
1172
1173 The scheme we use FORNOW: peel to force the alignment of the first
1174 unsupported misaligned access in the loop.
1175
1176 TODO: Use a cost model. */
1177
1178 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1179 {
1180 stmt = DR_STMT (dr);
1181 stmt_info = vinfo_for_stmt (stmt);
1182
1183 /* For interleaving, only the alignment of the first access
1184 matters. */
1185 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1186 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1187 continue;
1188
1189 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1190 {
1191 do_peeling = vector_alignment_reachable_p (dr);
1192 if (do_peeling)
1193 dr0 = dr;
1194 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1195 fprintf (vect_dump, "vector alignment may not be reachable");
1196 break;
1197 }
1198 }
1199
1200 vect_versioning_for_alias_required
1201 = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
1202
1203 /* Temporarily, if versioning for alias is required, we disable peeling
1204 until we support peeling and versioning. Often peeling for alignment
1205 will require peeling for loop-bound, which in turn requires that we
1206 know how to adjust the loop ivs after the loop. */
1207 if (vect_versioning_for_alias_required
1208 || !vect_can_advance_ivs_p (loop_vinfo)
1209 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1210 do_peeling = false;
1211
1212 if (do_peeling)
1213 {
1214 int mis;
1215 int npeel = 0;
1216 gimple stmt = DR_STMT (dr0);
1217 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1218 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1219 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1220
1221 if (known_alignment_for_access_p (dr0))
1222 {
1223 /* Since it's known at compile time, compute the number of iterations
1224 in the peeled loop (the peeling factor) for use in updating
1225 DR_MISALIGNMENT values. The peeling factor is the vectorization
1226 factor minus the misalignment as an element count. */
1227 mis = DR_MISALIGNMENT (dr0);
1228 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1229 npeel = nelements - mis;
1230
1231 /* For interleaved data access every iteration accesses all the
1232 members of the group, therefore we divide the number of iterations
1233 by the group size. */
1234 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1235 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1236 npeel /= DR_GROUP_SIZE (stmt_info);
1237
1238 if (vect_print_dump_info (REPORT_DETAILS))
1239 fprintf (vect_dump, "Try peeling by %d", npeel);
1240 }
1241
1242 /* Ensure that all data refs can be vectorized after the peel. */
1243 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1244 {
1245 int save_misalignment;
1246
1247 if (dr == dr0)
1248 continue;
1249
1250 stmt = DR_STMT (dr);
1251 stmt_info = vinfo_for_stmt (stmt);
1252 /* For interleaving, only the alignment of the first access
1253 matters. */
1254 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1255 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1256 continue;
1257
1258 save_misalignment = DR_MISALIGNMENT (dr);
1259 vect_update_misalignment_for_peel (dr, dr0, npeel);
1260 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1261 SET_DR_MISALIGNMENT (dr, save_misalignment);
1262
1263 if (!supportable_dr_alignment)
1264 {
1265 do_peeling = false;
1266 break;
1267 }
1268 }
1269
1270 if (do_peeling)
1271 {
1272 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1273 If the misalignment of DR_i is identical to that of dr0 then set
1274 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1275 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1276 by the peeling factor times the element size of DR_i (MOD the
1277 vectorization factor times the size). Otherwise, the
1278 misalignment of DR_i must be set to unknown. */
1279 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1280 if (dr != dr0)
1281 vect_update_misalignment_for_peel (dr, dr0, npeel);
1282
1283 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1284 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1285 SET_DR_MISALIGNMENT (dr0, 0);
1286 if (vect_print_dump_info (REPORT_ALIGNMENT))
1287 fprintf (vect_dump, "Alignment of access forced using peeling.");
1288
1289 if (vect_print_dump_info (REPORT_DETAILS))
1290 fprintf (vect_dump, "Peeling for alignment will be applied.");
1291
1292 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1293 gcc_assert (stat);
1294 return stat;
1295 }
1296 }
1297
1298
1299 /* (2) Versioning to force alignment. */
1300
1301 /* Try versioning if:
1302 1) flag_tree_vect_loop_version is TRUE
1303 2) optimize loop for speed
1304 3) there is at least one unsupported misaligned data ref with an unknown
1305 misalignment, and
1306 4) all misaligned data refs with a known misalignment are supported, and
1307 5) the number of runtime alignment checks is within reason. */
1308
1309 do_versioning =
1310 flag_tree_vect_loop_version
1311 && optimize_loop_nest_for_speed_p (loop)
1312 && (!loop->inner); /* FORNOW */
1313
1314 if (do_versioning)
1315 {
1316 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1317 {
1318 stmt = DR_STMT (dr);
1319 stmt_info = vinfo_for_stmt (stmt);
1320
1321 /* For interleaving, only the alignment of the first access
1322 matters. */
1323 if (aligned_access_p (dr)
1324 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1325 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1326 continue;
1327
1328 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1329
1330 if (!supportable_dr_alignment)
1331 {
1332 gimple stmt;
1333 int mask;
1334 tree vectype;
1335
1336 if (known_alignment_for_access_p (dr)
1337 || VEC_length (gimple,
1338 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1339 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1340 {
1341 do_versioning = false;
1342 break;
1343 }
1344
1345 stmt = DR_STMT (dr);
1346 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1347 gcc_assert (vectype);
1348
1349 /* The rightmost bits of an aligned address must be zeros.
1350 Construct the mask needed for this test. For example,
1351 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1352 mask must be 15 = 0xf. */
1353 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1354
1355 /* FORNOW: use the same mask to test all potentially unaligned
1356 references in the loop. The vectorizer currently supports
1357 a single vector size, see the reference to
1358 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1359 vectorization factor is computed. */
1360 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1361 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1362 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1363 VEC_safe_push (gimple, heap,
1364 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1365 DR_STMT (dr));
1366 }
1367 }
1368
1369 /* Versioning requires at least one misaligned data reference. */
1370 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo))
1371 do_versioning = false;
1372 else if (!do_versioning)
1373 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1374 }
1375
1376 if (do_versioning)
1377 {
1378 VEC(gimple,heap) *may_misalign_stmts
1379 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1380 gimple stmt;
1381
1382 /* It can now be assumed that the data references in the statements
1383 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1384 of the loop being vectorized. */
1385 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1386 {
1387 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1388 dr = STMT_VINFO_DATA_REF (stmt_info);
1389 SET_DR_MISALIGNMENT (dr, 0);
1390 if (vect_print_dump_info (REPORT_ALIGNMENT))
1391 fprintf (vect_dump, "Alignment of access forced using versioning.");
1392 }
1393
1394 if (vect_print_dump_info (REPORT_DETAILS))
1395 fprintf (vect_dump, "Versioning for alignment will be applied.");
1396
1397 /* Peeling and versioning can't be done together at this time. */
1398 gcc_assert (! (do_peeling && do_versioning));
1399
1400 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1401 gcc_assert (stat);
1402 return stat;
1403 }
1404
1405 /* This point is reached if neither peeling nor versioning is being done. */
1406 gcc_assert (! (do_peeling || do_versioning));
1407
1408 stat = vect_verify_datarefs_alignment (loop_vinfo, NULL);
1409 return stat;
1410 }
1411
1412
1413 /* Function vect_analyze_data_refs_alignment
1414
1415 Analyze the alignment of the data-references in the loop.
1416 Return FALSE if a data reference is found that cannot be vectorized. */
1417
1418 bool
1419 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo,
1420 bb_vec_info bb_vinfo)
1421 {
1422 if (vect_print_dump_info (REPORT_DETAILS))
1423 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1424
1425 if (!vect_compute_data_refs_alignment (loop_vinfo, bb_vinfo))
1426 {
1427 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1428 fprintf (vect_dump,
1429 "not vectorized: can't calculate alignment for data ref.");
1430 return false;
1431 }
1432
1433 return true;
1434 }
1435
1436
1437 /* Analyze groups of strided accesses: check that DR belongs to a group of
1438 strided accesses of legal size, step, etc. Detect gaps, single element
1439 interleaving, and other special cases. Set strided access info.
1440 Collect groups of strided stores for further use in SLP analysis. */
1441
1442 static bool
1443 vect_analyze_group_access (struct data_reference *dr)
1444 {
1445 tree step = DR_STEP (dr);
1446 tree scalar_type = TREE_TYPE (DR_REF (dr));
1447 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1448 gimple stmt = DR_STMT (dr);
1449 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1450 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1451 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
1452 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1453 HOST_WIDE_INT stride;
1454 bool slp_impossible = false;
1455
1456 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1457 interleaving group (including gaps). */
1458 stride = dr_step / type_size;
1459
1460 /* Not consecutive access is possible only if it is a part of interleaving. */
1461 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1462 {
1463 /* Check if it this DR is a part of interleaving, and is a single
1464 element of the group that is accessed in the loop. */
1465
1466 /* Gaps are supported only for loads. STEP must be a multiple of the type
1467 size. The size of the group must be a power of 2. */
1468 if (DR_IS_READ (dr)
1469 && (dr_step % type_size) == 0
1470 && stride > 0
1471 && exact_log2 (stride) != -1)
1472 {
1473 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1474 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1475 if (vect_print_dump_info (REPORT_DR_DETAILS))
1476 {
1477 fprintf (vect_dump, "Detected single element interleaving ");
1478 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1479 fprintf (vect_dump, " step ");
1480 print_generic_expr (vect_dump, step, TDF_SLIM);
1481 }
1482 return true;
1483 }
1484 if (vect_print_dump_info (REPORT_DETAILS))
1485 fprintf (vect_dump, "not consecutive access");
1486 return false;
1487 }
1488
1489 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1490 {
1491 /* First stmt in the interleaving chain. Check the chain. */
1492 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1493 struct data_reference *data_ref = dr;
1494 unsigned int count = 1;
1495 tree next_step;
1496 tree prev_init = DR_INIT (data_ref);
1497 gimple prev = stmt;
1498 HOST_WIDE_INT diff, count_in_bytes, gaps = 0;
1499
1500 while (next)
1501 {
1502 /* Skip same data-refs. In case that two or more stmts share data-ref
1503 (supported only for loads), we vectorize only the first stmt, and
1504 the rest get their vectorized loads from the first one. */
1505 if (!tree_int_cst_compare (DR_INIT (data_ref),
1506 DR_INIT (STMT_VINFO_DATA_REF (
1507 vinfo_for_stmt (next)))))
1508 {
1509 if (!DR_IS_READ (data_ref))
1510 {
1511 if (vect_print_dump_info (REPORT_DETAILS))
1512 fprintf (vect_dump, "Two store stmts share the same dr.");
1513 return false;
1514 }
1515
1516 /* Check that there is no load-store dependencies for this loads
1517 to prevent a case of load-store-load to the same location. */
1518 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1519 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1520 {
1521 if (vect_print_dump_info (REPORT_DETAILS))
1522 fprintf (vect_dump,
1523 "READ_WRITE dependence in interleaving.");
1524 return false;
1525 }
1526
1527 /* For load use the same data-ref load. */
1528 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1529
1530 prev = next;
1531 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1532 continue;
1533 }
1534 prev = next;
1535
1536 /* Check that all the accesses have the same STEP. */
1537 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1538 if (tree_int_cst_compare (step, next_step))
1539 {
1540 if (vect_print_dump_info (REPORT_DETAILS))
1541 fprintf (vect_dump, "not consecutive access in interleaving");
1542 return false;
1543 }
1544
1545 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1546 /* Check that the distance between two accesses is equal to the type
1547 size. Otherwise, we have gaps. */
1548 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1549 - TREE_INT_CST_LOW (prev_init)) / type_size;
1550 if (diff != 1)
1551 {
1552 /* FORNOW: SLP of accesses with gaps is not supported. */
1553 slp_impossible = true;
1554 if (!DR_IS_READ (data_ref))
1555 {
1556 if (vect_print_dump_info (REPORT_DETAILS))
1557 fprintf (vect_dump, "interleaved store with gaps");
1558 return false;
1559 }
1560
1561 gaps += diff - 1;
1562 }
1563
1564 /* Store the gap from the previous member of the group. If there is no
1565 gap in the access, DR_GROUP_GAP is always 1. */
1566 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1567
1568 prev_init = DR_INIT (data_ref);
1569 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1570 /* Count the number of data-refs in the chain. */
1571 count++;
1572 }
1573
1574 /* COUNT is the number of accesses found, we multiply it by the size of
1575 the type to get COUNT_IN_BYTES. */
1576 count_in_bytes = type_size * count;
1577
1578 /* Check that the size of the interleaving (including gaps) is not
1579 greater than STEP. */
1580 if (dr_step && dr_step < count_in_bytes + gaps * type_size)
1581 {
1582 if (vect_print_dump_info (REPORT_DETAILS))
1583 {
1584 fprintf (vect_dump, "interleaving size is greater than step for ");
1585 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1586 }
1587 return false;
1588 }
1589
1590 /* Check that the size of the interleaving is equal to STEP for stores,
1591 i.e., that there are no gaps. */
1592 if (dr_step && dr_step != count_in_bytes)
1593 {
1594 if (DR_IS_READ (dr))
1595 {
1596 slp_impossible = true;
1597 /* There is a gap after the last load in the group. This gap is a
1598 difference between the stride and the number of elements. When
1599 there is no gap, this difference should be 0. */
1600 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1601 }
1602 else
1603 {
1604 if (vect_print_dump_info (REPORT_DETAILS))
1605 fprintf (vect_dump, "interleaved store with gaps");
1606 return false;
1607 }
1608 }
1609
1610 /* Check that STEP is a multiple of type size. */
1611 if (dr_step && (dr_step % type_size) != 0)
1612 {
1613 if (vect_print_dump_info (REPORT_DETAILS))
1614 {
1615 fprintf (vect_dump, "step is not a multiple of type size: step ");
1616 print_generic_expr (vect_dump, step, TDF_SLIM);
1617 fprintf (vect_dump, " size ");
1618 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1619 TDF_SLIM);
1620 }
1621 return false;
1622 }
1623
1624 /* FORNOW: we handle only interleaving that is a power of 2.
1625 We don't fail here if it may be still possible to vectorize the
1626 group using SLP. If not, the size of the group will be checked in
1627 vect_analyze_operations, and the vectorization will fail. */
1628 if (exact_log2 (stride) == -1)
1629 {
1630 if (vect_print_dump_info (REPORT_DETAILS))
1631 fprintf (vect_dump, "interleaving is not a power of 2");
1632
1633 if (slp_impossible)
1634 return false;
1635 }
1636
1637 if (stride == 0)
1638 stride = count;
1639
1640 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1641 if (vect_print_dump_info (REPORT_DETAILS))
1642 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1643
1644 /* SLP: create an SLP data structure for every interleaving group of
1645 stores for further analysis in vect_analyse_slp. */
1646 if (!DR_IS_READ (dr) && !slp_impossible)
1647 {
1648 if (loop_vinfo)
1649 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo),
1650 stmt);
1651 if (bb_vinfo)
1652 VEC_safe_push (gimple, heap, BB_VINFO_STRIDED_STORES (bb_vinfo),
1653 stmt);
1654 }
1655 }
1656
1657 return true;
1658 }
1659
1660
1661 /* Analyze the access pattern of the data-reference DR.
1662 In case of non-consecutive accesses call vect_analyze_group_access() to
1663 analyze groups of strided accesses. */
1664
1665 static bool
1666 vect_analyze_data_ref_access (struct data_reference *dr)
1667 {
1668 tree step = DR_STEP (dr);
1669 tree scalar_type = TREE_TYPE (DR_REF (dr));
1670 gimple stmt = DR_STMT (dr);
1671 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1672 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1673 struct loop *loop = NULL;
1674 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1675
1676 if (loop_vinfo)
1677 loop = LOOP_VINFO_LOOP (loop_vinfo);
1678
1679 if (loop_vinfo && !step)
1680 {
1681 if (vect_print_dump_info (REPORT_DETAILS))
1682 fprintf (vect_dump, "bad data-ref access in loop");
1683 return false;
1684 }
1685
1686 /* Don't allow invariant accesses in loops. */
1687 if (loop_vinfo && dr_step == 0)
1688 return false;
1689
1690 if (loop && nested_in_vect_loop_p (loop, stmt))
1691 {
1692 /* Interleaved accesses are not yet supported within outer-loop
1693 vectorization for references in the inner-loop. */
1694 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1695
1696 /* For the rest of the analysis we use the outer-loop step. */
1697 step = STMT_VINFO_DR_STEP (stmt_info);
1698 dr_step = TREE_INT_CST_LOW (step);
1699
1700 if (dr_step == 0)
1701 {
1702 if (vect_print_dump_info (REPORT_ALIGNMENT))
1703 fprintf (vect_dump, "zero step in outer loop.");
1704 if (DR_IS_READ (dr))
1705 return true;
1706 else
1707 return false;
1708 }
1709 }
1710
1711 /* Consecutive? */
1712 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1713 {
1714 /* Mark that it is not interleaving. */
1715 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1716 return true;
1717 }
1718
1719 if (loop && nested_in_vect_loop_p (loop, stmt))
1720 {
1721 if (vect_print_dump_info (REPORT_ALIGNMENT))
1722 fprintf (vect_dump, "strided access in outer loop.");
1723 return false;
1724 }
1725
1726 /* Not consecutive access - check if it's a part of interleaving group. */
1727 return vect_analyze_group_access (dr);
1728 }
1729
1730
1731 /* Function vect_analyze_data_ref_accesses.
1732
1733 Analyze the access pattern of all the data references in the loop.
1734
1735 FORNOW: the only access pattern that is considered vectorizable is a
1736 simple step 1 (consecutive) access.
1737
1738 FORNOW: handle only arrays and pointer accesses. */
1739
1740 bool
1741 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1742 {
1743 unsigned int i;
1744 VEC (data_reference_p, heap) *datarefs;
1745 struct data_reference *dr;
1746
1747 if (vect_print_dump_info (REPORT_DETAILS))
1748 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1749
1750 if (loop_vinfo)
1751 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1752 else
1753 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1754
1755 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1756 if (!vect_analyze_data_ref_access (dr))
1757 {
1758 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1759 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1760 return false;
1761 }
1762
1763 return true;
1764 }
1765
1766 /* Function vect_prune_runtime_alias_test_list.
1767
1768 Prune a list of ddrs to be tested at run-time by versioning for alias.
1769 Return FALSE if resulting list of ddrs is longer then allowed by
1770 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1771
1772 bool
1773 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1774 {
1775 VEC (ddr_p, heap) * ddrs =
1776 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1777 unsigned i, j;
1778
1779 if (vect_print_dump_info (REPORT_DETAILS))
1780 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1781
1782 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1783 {
1784 bool found;
1785 ddr_p ddr_i;
1786
1787 ddr_i = VEC_index (ddr_p, ddrs, i);
1788 found = false;
1789
1790 for (j = 0; j < i; j++)
1791 {
1792 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1793
1794 if (vect_vfa_range_equal (ddr_i, ddr_j))
1795 {
1796 if (vect_print_dump_info (REPORT_DR_DETAILS))
1797 {
1798 fprintf (vect_dump, "found equal ranges ");
1799 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1800 fprintf (vect_dump, ", ");
1801 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1802 fprintf (vect_dump, " and ");
1803 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1804 fprintf (vect_dump, ", ");
1805 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1806 }
1807 found = true;
1808 break;
1809 }
1810 }
1811
1812 if (found)
1813 {
1814 VEC_ordered_remove (ddr_p, ddrs, i);
1815 continue;
1816 }
1817 i++;
1818 }
1819
1820 if (VEC_length (ddr_p, ddrs) >
1821 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1822 {
1823 if (vect_print_dump_info (REPORT_DR_DETAILS))
1824 {
1825 fprintf (vect_dump,
1826 "disable versioning for alias - max number of generated "
1827 "checks exceeded.");
1828 }
1829
1830 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1831
1832 return false;
1833 }
1834
1835 return true;
1836 }
1837
1838
1839 /* Function vect_analyze_data_refs.
1840
1841 Find all the data references in the loop or basic block.
1842
1843 The general structure of the analysis of data refs in the vectorizer is as
1844 follows:
1845 1- vect_analyze_data_refs(loop/bb): call
1846 compute_data_dependences_for_loop/bb to find and analyze all data-refs
1847 in the loop/bb and their dependences.
1848 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1849 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1850 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1851
1852 */
1853
1854 bool
1855 vect_analyze_data_refs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
1856 {
1857 struct loop *loop = NULL;
1858 basic_block bb = NULL;
1859 unsigned int i;
1860 VEC (data_reference_p, heap) *datarefs;
1861 struct data_reference *dr;
1862 tree scalar_type;
1863
1864 if (vect_print_dump_info (REPORT_DETAILS))
1865 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1866
1867 if (loop_vinfo)
1868 {
1869 loop = LOOP_VINFO_LOOP (loop_vinfo);
1870 compute_data_dependences_for_loop (loop, true,
1871 &LOOP_VINFO_DATAREFS (loop_vinfo),
1872 &LOOP_VINFO_DDRS (loop_vinfo));
1873 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1874 }
1875 else
1876 {
1877 bb = BB_VINFO_BB (bb_vinfo);
1878 compute_data_dependences_for_bb (bb, true,
1879 &BB_VINFO_DATAREFS (bb_vinfo),
1880 &BB_VINFO_DDRS (bb_vinfo));
1881 datarefs = BB_VINFO_DATAREFS (bb_vinfo);
1882 }
1883
1884 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1885 from stmt_vec_info struct to DR and vectype. */
1886
1887 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1888 {
1889 gimple stmt;
1890 stmt_vec_info stmt_info;
1891 tree base, offset, init;
1892
1893 if (!dr || !DR_REF (dr))
1894 {
1895 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1896 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1897 return false;
1898 }
1899
1900 stmt = DR_STMT (dr);
1901 stmt_info = vinfo_for_stmt (stmt);
1902
1903 /* Check that analysis of the data-ref succeeded. */
1904 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1905 || !DR_STEP (dr))
1906 {
1907 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1908 {
1909 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1910 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1911 }
1912 return false;
1913 }
1914
1915 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1916 {
1917 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
1918 fprintf (vect_dump, "not vectorized: base addr of dr is a "
1919 "constant");
1920 return false;
1921 }
1922
1923 base = unshare_expr (DR_BASE_ADDRESS (dr));
1924 offset = unshare_expr (DR_OFFSET (dr));
1925 init = unshare_expr (DR_INIT (dr));
1926
1927 /* Update DR field in stmt_vec_info struct. */
1928
1929 /* If the dataref is in an inner-loop of the loop that is considered for
1930 for vectorization, we also want to analyze the access relative to
1931 the outer-loop (DR contains information only relative to the
1932 inner-most enclosing loop). We do that by building a reference to the
1933 first location accessed by the inner-loop, and analyze it relative to
1934 the outer-loop. */
1935 if (loop && nested_in_vect_loop_p (loop, stmt))
1936 {
1937 tree outer_step, outer_base, outer_init;
1938 HOST_WIDE_INT pbitsize, pbitpos;
1939 tree poffset;
1940 enum machine_mode pmode;
1941 int punsignedp, pvolatilep;
1942 affine_iv base_iv, offset_iv;
1943 tree dinit;
1944
1945 /* Build a reference to the first location accessed by the
1946 inner-loop: *(BASE+INIT). (The first location is actually
1947 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1948 tree inner_base = build_fold_indirect_ref
1949 (fold_build2 (POINTER_PLUS_EXPR,
1950 TREE_TYPE (base), base,
1951 fold_convert (sizetype, init)));
1952
1953 if (vect_print_dump_info (REPORT_DETAILS))
1954 {
1955 fprintf (vect_dump, "analyze in outer-loop: ");
1956 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1957 }
1958
1959 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1960 &poffset, &pmode, &punsignedp, &pvolatilep, false);
1961 gcc_assert (outer_base != NULL_TREE);
1962
1963 if (pbitpos % BITS_PER_UNIT != 0)
1964 {
1965 if (vect_print_dump_info (REPORT_DETAILS))
1966 fprintf (vect_dump, "failed: bit offset alignment.\n");
1967 return false;
1968 }
1969
1970 outer_base = build_fold_addr_expr (outer_base);
1971 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1972 &base_iv, false))
1973 {
1974 if (vect_print_dump_info (REPORT_DETAILS))
1975 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1976 return false;
1977 }
1978
1979 if (offset)
1980 {
1981 if (poffset)
1982 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
1983 poffset);
1984 else
1985 poffset = offset;
1986 }
1987
1988 if (!poffset)
1989 {
1990 offset_iv.base = ssize_int (0);
1991 offset_iv.step = ssize_int (0);
1992 }
1993 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
1994 &offset_iv, false))
1995 {
1996 if (vect_print_dump_info (REPORT_DETAILS))
1997 fprintf (vect_dump, "evolution of offset is not affine.\n");
1998 return false;
1999 }
2000
2001 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
2002 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
2003 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2004 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
2005 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
2006
2007 outer_step = size_binop (PLUS_EXPR,
2008 fold_convert (ssizetype, base_iv.step),
2009 fold_convert (ssizetype, offset_iv.step));
2010
2011 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
2012 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
2013 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
2014 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
2015 STMT_VINFO_DR_OFFSET (stmt_info) =
2016 fold_convert (ssizetype, offset_iv.base);
2017 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
2018 size_int (highest_pow2_factor (offset_iv.base));
2019
2020 if (vect_print_dump_info (REPORT_DETAILS))
2021 {
2022 fprintf (vect_dump, "\touter base_address: ");
2023 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
2024 fprintf (vect_dump, "\n\touter offset from base address: ");
2025 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
2026 fprintf (vect_dump, "\n\touter constant offset from base address: ");
2027 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
2028 fprintf (vect_dump, "\n\touter step: ");
2029 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
2030 fprintf (vect_dump, "\n\touter aligned to: ");
2031 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
2032 }
2033 }
2034
2035 if (STMT_VINFO_DATA_REF (stmt_info))
2036 {
2037 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2038 {
2039 fprintf (vect_dump,
2040 "not vectorized: more than one data ref in stmt: ");
2041 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2042 }
2043 return false;
2044 }
2045
2046 STMT_VINFO_DATA_REF (stmt_info) = dr;
2047
2048 /* Set vectype for STMT. */
2049 scalar_type = TREE_TYPE (DR_REF (dr));
2050 STMT_VINFO_VECTYPE (stmt_info) =
2051 get_vectype_for_scalar_type (scalar_type);
2052 if (!STMT_VINFO_VECTYPE (stmt_info))
2053 {
2054 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS))
2055 {
2056 fprintf (vect_dump,
2057 "not vectorized: no vectype for stmt: ");
2058 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
2059 fprintf (vect_dump, " scalar_type: ");
2060 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
2061 }
2062 return false;
2063 }
2064 }
2065
2066 return true;
2067 }
2068
2069
2070 /* Function vect_get_new_vect_var.
2071
2072 Returns a name for a new variable. The current naming scheme appends the
2073 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
2074 the name of vectorizer generated variables, and appends that to NAME if
2075 provided. */
2076
2077 tree
2078 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
2079 {
2080 const char *prefix;
2081 tree new_vect_var;
2082
2083 switch (var_kind)
2084 {
2085 case vect_simple_var:
2086 prefix = "vect_";
2087 break;
2088 case vect_scalar_var:
2089 prefix = "stmp_";
2090 break;
2091 case vect_pointer_var:
2092 prefix = "vect_p";
2093 break;
2094 default:
2095 gcc_unreachable ();
2096 }
2097
2098 if (name)
2099 {
2100 char* tmp = concat (prefix, name, NULL);
2101 new_vect_var = create_tmp_var (type, tmp);
2102 free (tmp);
2103 }
2104 else
2105 new_vect_var = create_tmp_var (type, prefix);
2106
2107 /* Mark vector typed variable as a gimple register variable. */
2108 if (TREE_CODE (type) == VECTOR_TYPE)
2109 DECL_GIMPLE_REG_P (new_vect_var) = true;
2110
2111 return new_vect_var;
2112 }
2113
2114
2115 /* Function vect_create_addr_base_for_vector_ref.
2116
2117 Create an expression that computes the address of the first memory location
2118 that will be accessed for a data reference.
2119
2120 Input:
2121 STMT: The statement containing the data reference.
2122 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2123 OFFSET: Optional. If supplied, it is be added to the initial address.
2124 LOOP: Specify relative to which loop-nest should the address be computed.
2125 For example, when the dataref is in an inner-loop nested in an
2126 outer-loop that is now being vectorized, LOOP can be either the
2127 outer-loop, or the inner-loop. The first memory location accessed
2128 by the following dataref ('in' points to short):
2129
2130 for (i=0; i<N; i++)
2131 for (j=0; j<M; j++)
2132 s += in[i+j]
2133
2134 is as follows:
2135 if LOOP=i_loop: &in (relative to i_loop)
2136 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2137
2138 Output:
2139 1. Return an SSA_NAME whose value is the address of the memory location of
2140 the first vector of the data reference.
2141 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2142 these statement(s) which define the returned SSA_NAME.
2143
2144 FORNOW: We are only handling array accesses with step 1. */
2145
2146 tree
2147 vect_create_addr_base_for_vector_ref (gimple stmt,
2148 gimple_seq *new_stmt_list,
2149 tree offset,
2150 struct loop *loop)
2151 {
2152 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2153 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2154 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2155 tree base_name;
2156 tree data_ref_base_var;
2157 tree vec_stmt;
2158 tree addr_base, addr_expr;
2159 tree dest;
2160 gimple_seq seq = NULL;
2161 tree base_offset = unshare_expr (DR_OFFSET (dr));
2162 tree init = unshare_expr (DR_INIT (dr));
2163 tree vect_ptr_type;
2164 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2165 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2166
2167 if (loop_vinfo && loop && loop != (gimple_bb (stmt))->loop_father)
2168 {
2169 struct loop *outer_loop = LOOP_VINFO_LOOP (loop_vinfo);
2170
2171 gcc_assert (nested_in_vect_loop_p (outer_loop, stmt));
2172
2173 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2174 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2175 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2176 }
2177
2178 if (loop_vinfo)
2179 base_name = build_fold_indirect_ref (data_ref_base);
2180 else
2181 {
2182 base_offset = ssize_int (0);
2183 init = ssize_int (0);
2184 base_name = build_fold_indirect_ref (unshare_expr (DR_REF (dr)));
2185 }
2186
2187 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2188 add_referenced_var (data_ref_base_var);
2189 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2190 data_ref_base_var);
2191 gimple_seq_add_seq (new_stmt_list, seq);
2192
2193 /* Create base_offset */
2194 base_offset = size_binop (PLUS_EXPR,
2195 fold_convert (sizetype, base_offset),
2196 fold_convert (sizetype, init));
2197 dest = create_tmp_var (sizetype, "base_off");
2198 add_referenced_var (dest);
2199 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2200 gimple_seq_add_seq (new_stmt_list, seq);
2201
2202 if (offset)
2203 {
2204 tree tmp = create_tmp_var (sizetype, "offset");
2205
2206 add_referenced_var (tmp);
2207 offset = fold_build2 (MULT_EXPR, sizetype,
2208 fold_convert (sizetype, offset), step);
2209 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2210 base_offset, offset);
2211 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2212 gimple_seq_add_seq (new_stmt_list, seq);
2213 }
2214
2215 /* base + base_offset */
2216 if (loop_vinfo)
2217 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2218 data_ref_base, base_offset);
2219 else
2220 {
2221 if (TREE_CODE (DR_REF (dr)) == INDIRECT_REF)
2222 addr_base = unshare_expr (TREE_OPERAND (DR_REF (dr), 0));
2223 else
2224 addr_base = build1 (ADDR_EXPR,
2225 build_pointer_type (TREE_TYPE (DR_REF (dr))),
2226 unshare_expr (DR_REF (dr)));
2227 }
2228
2229 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2230
2231 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2232 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2233 get_name (base_name));
2234 add_referenced_var (addr_expr);
2235 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr);
2236 gimple_seq_add_seq (new_stmt_list, seq);
2237
2238 if (vect_print_dump_info (REPORT_DETAILS))
2239 {
2240 fprintf (vect_dump, "created ");
2241 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2242 }
2243
2244 return vec_stmt;
2245 }
2246
2247
2248 /* Function vect_create_data_ref_ptr.
2249
2250 Create a new pointer to vector type (vp), that points to the first location
2251 accessed in the loop by STMT, along with the def-use update chain to
2252 appropriately advance the pointer through the loop iterations. Also set
2253 aliasing information for the pointer. This vector pointer is used by the
2254 callers to this function to create a memory reference expression for vector
2255 load/store access.
2256
2257 Input:
2258 1. STMT: a stmt that references memory. Expected to be of the form
2259 GIMPLE_ASSIGN <name, data-ref> or
2260 GIMPLE_ASSIGN <data-ref, name>.
2261 2. AT_LOOP: the loop where the vector memref is to be created.
2262 3. OFFSET (optional): an offset to be added to the initial address accessed
2263 by the data-ref in STMT.
2264 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2265 pointing to the initial address.
2266 5. TYPE: if not NULL indicates the required type of the data-ref.
2267
2268 Output:
2269 1. Declare a new ptr to vector_type, and have it point to the base of the
2270 data reference (initial addressed accessed by the data reference).
2271 For example, for vector of type V8HI, the following code is generated:
2272
2273 v8hi *vp;
2274 vp = (v8hi *)initial_address;
2275
2276 if OFFSET is not supplied:
2277 initial_address = &a[init];
2278 if OFFSET is supplied:
2279 initial_address = &a[init + OFFSET];
2280
2281 Return the initial_address in INITIAL_ADDRESS.
2282
2283 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2284 update the pointer in each iteration of the loop.
2285
2286 Return the increment stmt that updates the pointer in PTR_INCR.
2287
2288 3. Set INV_P to true if the access pattern of the data reference in the
2289 vectorized loop is invariant. Set it to false otherwise.
2290
2291 4. Return the pointer. */
2292
2293 tree
2294 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2295 tree offset, tree *initial_address, gimple *ptr_incr,
2296 bool only_init, bool *inv_p)
2297 {
2298 tree base_name;
2299 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2300 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2301 struct loop *loop = NULL;
2302 bool nested_in_vect_loop = false;
2303 struct loop *containing_loop = NULL;
2304 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2305 tree vect_ptr_type;
2306 tree vect_ptr;
2307 tree new_temp;
2308 gimple vec_stmt;
2309 gimple_seq new_stmt_list = NULL;
2310 edge pe = NULL;
2311 basic_block new_bb;
2312 tree vect_ptr_init;
2313 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2314 tree vptr;
2315 gimple_stmt_iterator incr_gsi;
2316 bool insert_after;
2317 tree indx_before_incr, indx_after_incr;
2318 gimple incr;
2319 tree step;
2320 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2321 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
2322
2323 if (loop_vinfo)
2324 {
2325 loop = LOOP_VINFO_LOOP (loop_vinfo);
2326 nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2327 containing_loop = (gimple_bb (stmt))->loop_father;
2328 pe = loop_preheader_edge (loop);
2329 }
2330 else
2331 {
2332 gcc_assert (bb_vinfo);
2333 only_init = true;
2334 *ptr_incr = NULL;
2335 }
2336
2337 /* Check the step (evolution) of the load in LOOP, and record
2338 whether it's invariant. */
2339 if (nested_in_vect_loop)
2340 step = STMT_VINFO_DR_STEP (stmt_info);
2341 else
2342 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2343
2344 if (tree_int_cst_compare (step, size_zero_node) == 0)
2345 *inv_p = true;
2346 else
2347 *inv_p = false;
2348
2349 /* Create an expression for the first address accessed by this load
2350 in LOOP. */
2351 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2352
2353 if (vect_print_dump_info (REPORT_DETAILS))
2354 {
2355 tree data_ref_base = base_name;
2356 fprintf (vect_dump, "create vector-pointer variable to type: ");
2357 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2358 if (TREE_CODE (data_ref_base) == VAR_DECL
2359 || TREE_CODE (data_ref_base) == ARRAY_REF)
2360 fprintf (vect_dump, " vectorizing an array ref: ");
2361 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2362 fprintf (vect_dump, " vectorizing a record based array ref: ");
2363 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2364 fprintf (vect_dump, " vectorizing a pointer ref: ");
2365 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2366 }
2367
2368 /** (1) Create the new vector-pointer variable: **/
2369 vect_ptr_type = build_pointer_type (vectype);
2370 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2371 get_name (base_name));
2372
2373 /* Vector types inherit the alias set of their component type by default so
2374 we need to use a ref-all pointer if the data reference does not conflict
2375 with the created vector data reference because it is not addressable. */
2376 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2377 get_alias_set (DR_REF (dr))))
2378 {
2379 vect_ptr_type
2380 = build_pointer_type_for_mode (vectype,
2381 TYPE_MODE (vect_ptr_type), true);
2382 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2383 get_name (base_name));
2384 }
2385
2386 /* Likewise for any of the data references in the stmt group. */
2387 else if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2388 {
2389 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2390 do
2391 {
2392 tree lhs = gimple_assign_lhs (orig_stmt);
2393 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2394 get_alias_set (lhs)))
2395 {
2396 vect_ptr_type
2397 = build_pointer_type_for_mode (vectype,
2398 TYPE_MODE (vect_ptr_type), true);
2399 vect_ptr
2400 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2401 get_name (base_name));
2402 break;
2403 }
2404
2405 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2406 }
2407 while (orig_stmt);
2408 }
2409
2410 add_referenced_var (vect_ptr);
2411
2412 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2413 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2414 def-use update cycles for the pointer: One relative to the outer-loop
2415 (LOOP), which is what steps (3) and (4) below do. The other is relative
2416 to the inner-loop (which is the inner-most loop containing the dataref),
2417 and this is done be step (5) below.
2418
2419 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2420 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2421 redundant. Steps (3),(4) create the following:
2422
2423 vp0 = &base_addr;
2424 LOOP: vp1 = phi(vp0,vp2)
2425 ...
2426 ...
2427 vp2 = vp1 + step
2428 goto LOOP
2429
2430 If there is an inner-loop nested in loop, then step (5) will also be
2431 applied, and an additional update in the inner-loop will be created:
2432
2433 vp0 = &base_addr;
2434 LOOP: vp1 = phi(vp0,vp2)
2435 ...
2436 inner: vp3 = phi(vp1,vp4)
2437 vp4 = vp3 + inner_step
2438 if () goto inner
2439 ...
2440 vp2 = vp1 + step
2441 if () goto LOOP */
2442
2443 /** (3) Calculate the initial address the vector-pointer, and set
2444 the vector-pointer to point to it before the loop: **/
2445
2446 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2447
2448 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2449 offset, loop);
2450 if (new_stmt_list)
2451 {
2452 if (pe)
2453 {
2454 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2455 gcc_assert (!new_bb);
2456 }
2457 else
2458 gsi_insert_seq_before (&gsi, new_stmt_list, GSI_SAME_STMT);
2459 }
2460
2461 *initial_address = new_temp;
2462
2463 /* Create: p = (vectype *) initial_base */
2464 vec_stmt = gimple_build_assign (vect_ptr,
2465 fold_convert (vect_ptr_type, new_temp));
2466 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2467 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2468 if (pe)
2469 {
2470 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2471 gcc_assert (!new_bb);
2472 }
2473 else
2474 gsi_insert_before (&gsi, vec_stmt, GSI_SAME_STMT);
2475
2476 /** (4) Handle the updating of the vector-pointer inside the loop.
2477 This is needed when ONLY_INIT is false, and also when AT_LOOP
2478 is the inner-loop nested in LOOP (during outer-loop vectorization).
2479 **/
2480
2481 /* No update in loop is required. */
2482 if (only_init && (!loop_vinfo || at_loop == loop))
2483 {
2484 /* Copy the points-to information if it exists. */
2485 if (DR_PTR_INFO (dr))
2486 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2487 vptr = vect_ptr_init;
2488 }
2489 else
2490 {
2491 /* The step of the vector pointer is the Vector Size. */
2492 tree step = TYPE_SIZE_UNIT (vectype);
2493 /* One exception to the above is when the scalar step of the load in
2494 LOOP is zero. In this case the step here is also zero. */
2495 if (*inv_p)
2496 step = size_zero_node;
2497
2498 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2499
2500 create_iv (vect_ptr_init,
2501 fold_convert (vect_ptr_type, step),
2502 vect_ptr, loop, &incr_gsi, insert_after,
2503 &indx_before_incr, &indx_after_incr);
2504 incr = gsi_stmt (incr_gsi);
2505 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2506
2507 /* Copy the points-to information if it exists. */
2508 if (DR_PTR_INFO (dr))
2509 {
2510 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2511 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2512 }
2513 if (ptr_incr)
2514 *ptr_incr = incr;
2515
2516 vptr = indx_before_incr;
2517 }
2518
2519 if (!nested_in_vect_loop || only_init)
2520 return vptr;
2521
2522
2523 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2524 nested in LOOP, if exists: **/
2525
2526 gcc_assert (nested_in_vect_loop);
2527 if (!only_init)
2528 {
2529 standard_iv_increment_position (containing_loop, &incr_gsi,
2530 &insert_after);
2531 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2532 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2533 &indx_after_incr);
2534 incr = gsi_stmt (incr_gsi);
2535 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo, NULL));
2536
2537 /* Copy the points-to information if it exists. */
2538 if (DR_PTR_INFO (dr))
2539 {
2540 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2541 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2542 }
2543 if (ptr_incr)
2544 *ptr_incr = incr;
2545
2546 return indx_before_incr;
2547 }
2548 else
2549 gcc_unreachable ();
2550 }
2551
2552
2553 /* Function bump_vector_ptr
2554
2555 Increment a pointer (to a vector type) by vector-size. If requested,
2556 i.e. if PTR-INCR is given, then also connect the new increment stmt
2557 to the existing def-use update-chain of the pointer, by modifying
2558 the PTR_INCR as illustrated below:
2559
2560 The pointer def-use update-chain before this function:
2561 DATAREF_PTR = phi (p_0, p_2)
2562 ....
2563 PTR_INCR: p_2 = DATAREF_PTR + step
2564
2565 The pointer def-use update-chain after this function:
2566 DATAREF_PTR = phi (p_0, p_2)
2567 ....
2568 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2569 ....
2570 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2571
2572 Input:
2573 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2574 in the loop.
2575 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2576 the loop. The increment amount across iterations is expected
2577 to be vector_size.
2578 BSI - location where the new update stmt is to be placed.
2579 STMT - the original scalar memory-access stmt that is being vectorized.
2580 BUMP - optional. The offset by which to bump the pointer. If not given,
2581 the offset is assumed to be vector_size.
2582
2583 Output: Return NEW_DATAREF_PTR as illustrated above.
2584
2585 */
2586
2587 tree
2588 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2589 gimple stmt, tree bump)
2590 {
2591 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2592 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2593 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2594 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2595 tree update = TYPE_SIZE_UNIT (vectype);
2596 gimple incr_stmt;
2597 ssa_op_iter iter;
2598 use_operand_p use_p;
2599 tree new_dataref_ptr;
2600
2601 if (bump)
2602 update = bump;
2603
2604 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2605 dataref_ptr, update);
2606 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2607 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2608 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2609
2610 /* Copy the points-to information if it exists. */
2611 if (DR_PTR_INFO (dr))
2612 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2613
2614 if (!ptr_incr)
2615 return new_dataref_ptr;
2616
2617 /* Update the vector-pointer's cross-iteration increment. */
2618 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2619 {
2620 tree use = USE_FROM_PTR (use_p);
2621
2622 if (use == dataref_ptr)
2623 SET_USE (use_p, new_dataref_ptr);
2624 else
2625 gcc_assert (tree_int_cst_compare (use, update) == 0);
2626 }
2627
2628 return new_dataref_ptr;
2629 }
2630
2631
2632 /* Function vect_create_destination_var.
2633
2634 Create a new temporary of type VECTYPE. */
2635
2636 tree
2637 vect_create_destination_var (tree scalar_dest, tree vectype)
2638 {
2639 tree vec_dest;
2640 const char *new_name;
2641 tree type;
2642 enum vect_var_kind kind;
2643
2644 kind = vectype ? vect_simple_var : vect_scalar_var;
2645 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2646
2647 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2648
2649 new_name = get_name (scalar_dest);
2650 if (!new_name)
2651 new_name = "var_";
2652 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2653 add_referenced_var (vec_dest);
2654
2655 return vec_dest;
2656 }
2657
2658 /* Function vect_strided_store_supported.
2659
2660 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2661 and FALSE otherwise. */
2662
2663 bool
2664 vect_strided_store_supported (tree vectype)
2665 {
2666 optab interleave_high_optab, interleave_low_optab;
2667 int mode;
2668
2669 mode = (int) TYPE_MODE (vectype);
2670
2671 /* Check that the operation is supported. */
2672 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2673 vectype, optab_default);
2674 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2675 vectype, optab_default);
2676 if (!interleave_high_optab || !interleave_low_optab)
2677 {
2678 if (vect_print_dump_info (REPORT_DETAILS))
2679 fprintf (vect_dump, "no optab for interleave.");
2680 return false;
2681 }
2682
2683 if (optab_handler (interleave_high_optab, mode)->insn_code
2684 == CODE_FOR_nothing
2685 || optab_handler (interleave_low_optab, mode)->insn_code
2686 == CODE_FOR_nothing)
2687 {
2688 if (vect_print_dump_info (REPORT_DETAILS))
2689 fprintf (vect_dump, "interleave op not supported by target.");
2690 return false;
2691 }
2692
2693 return true;
2694 }
2695
2696
2697 /* Function vect_permute_store_chain.
2698
2699 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2700 a power of 2, generate interleave_high/low stmts to reorder the data
2701 correctly for the stores. Return the final references for stores in
2702 RESULT_CHAIN.
2703
2704 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2705 The input is 4 vectors each containing 8 elements. We assign a number to each
2706 element, the input sequence is:
2707
2708 1st vec: 0 1 2 3 4 5 6 7
2709 2nd vec: 8 9 10 11 12 13 14 15
2710 3rd vec: 16 17 18 19 20 21 22 23
2711 4th vec: 24 25 26 27 28 29 30 31
2712
2713 The output sequence should be:
2714
2715 1st vec: 0 8 16 24 1 9 17 25
2716 2nd vec: 2 10 18 26 3 11 19 27
2717 3rd vec: 4 12 20 28 5 13 21 30
2718 4th vec: 6 14 22 30 7 15 23 31
2719
2720 i.e., we interleave the contents of the four vectors in their order.
2721
2722 We use interleave_high/low instructions to create such output. The input of
2723 each interleave_high/low operation is two vectors:
2724 1st vec 2nd vec
2725 0 1 2 3 4 5 6 7
2726 the even elements of the result vector are obtained left-to-right from the
2727 high/low elements of the first vector. The odd elements of the result are
2728 obtained left-to-right from the high/low elements of the second vector.
2729 The output of interleave_high will be: 0 4 1 5
2730 and of interleave_low: 2 6 3 7
2731
2732
2733 The permutation is done in log LENGTH stages. In each stage interleave_high
2734 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2735 where the first argument is taken from the first half of DR_CHAIN and the
2736 second argument from it's second half.
2737 In our example,
2738
2739 I1: interleave_high (1st vec, 3rd vec)
2740 I2: interleave_low (1st vec, 3rd vec)
2741 I3: interleave_high (2nd vec, 4th vec)
2742 I4: interleave_low (2nd vec, 4th vec)
2743
2744 The output for the first stage is:
2745
2746 I1: 0 16 1 17 2 18 3 19
2747 I2: 4 20 5 21 6 22 7 23
2748 I3: 8 24 9 25 10 26 11 27
2749 I4: 12 28 13 29 14 30 15 31
2750
2751 The output of the second stage, i.e. the final result is:
2752
2753 I1: 0 8 16 24 1 9 17 25
2754 I2: 2 10 18 26 3 11 19 27
2755 I3: 4 12 20 28 5 13 21 30
2756 I4: 6 14 22 30 7 15 23 31. */
2757
2758 bool
2759 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2760 unsigned int length,
2761 gimple stmt,
2762 gimple_stmt_iterator *gsi,
2763 VEC(tree,heap) **result_chain)
2764 {
2765 tree perm_dest, vect1, vect2, high, low;
2766 gimple perm_stmt;
2767 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2768 int i;
2769 unsigned int j;
2770 enum tree_code high_code, low_code;
2771
2772 /* Check that the operation is supported. */
2773 if (!vect_strided_store_supported (vectype))
2774 return false;
2775
2776 *result_chain = VEC_copy (tree, heap, dr_chain);
2777
2778 for (i = 0; i < exact_log2 (length); i++)
2779 {
2780 for (j = 0; j < length/2; j++)
2781 {
2782 vect1 = VEC_index (tree, dr_chain, j);
2783 vect2 = VEC_index (tree, dr_chain, j+length/2);
2784
2785 /* Create interleaving stmt:
2786 in the case of big endian:
2787 high = interleave_high (vect1, vect2)
2788 and in the case of little endian:
2789 high = interleave_low (vect1, vect2). */
2790 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2791 DECL_GIMPLE_REG_P (perm_dest) = 1;
2792 add_referenced_var (perm_dest);
2793 if (BYTES_BIG_ENDIAN)
2794 {
2795 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2796 low_code = VEC_INTERLEAVE_LOW_EXPR;
2797 }
2798 else
2799 {
2800 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2801 high_code = VEC_INTERLEAVE_LOW_EXPR;
2802 }
2803 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2804 vect1, vect2);
2805 high = make_ssa_name (perm_dest, perm_stmt);
2806 gimple_assign_set_lhs (perm_stmt, high);
2807 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2808 VEC_replace (tree, *result_chain, 2*j, high);
2809
2810 /* Create interleaving stmt:
2811 in the case of big endian:
2812 low = interleave_low (vect1, vect2)
2813 and in the case of little endian:
2814 low = interleave_high (vect1, vect2). */
2815 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2816 DECL_GIMPLE_REG_P (perm_dest) = 1;
2817 add_referenced_var (perm_dest);
2818 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2819 vect1, vect2);
2820 low = make_ssa_name (perm_dest, perm_stmt);
2821 gimple_assign_set_lhs (perm_stmt, low);
2822 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2823 VEC_replace (tree, *result_chain, 2*j+1, low);
2824 }
2825 dr_chain = VEC_copy (tree, heap, *result_chain);
2826 }
2827 return true;
2828 }
2829
2830 /* Function vect_setup_realignment
2831
2832 This function is called when vectorizing an unaligned load using
2833 the dr_explicit_realign[_optimized] scheme.
2834 This function generates the following code at the loop prolog:
2835
2836 p = initial_addr;
2837 x msq_init = *(floor(p)); # prolog load
2838 realignment_token = call target_builtin;
2839 loop:
2840 x msq = phi (msq_init, ---)
2841
2842 The stmts marked with x are generated only for the case of
2843 dr_explicit_realign_optimized.
2844
2845 The code above sets up a new (vector) pointer, pointing to the first
2846 location accessed by STMT, and a "floor-aligned" load using that pointer.
2847 It also generates code to compute the "realignment-token" (if the relevant
2848 target hook was defined), and creates a phi-node at the loop-header bb
2849 whose arguments are the result of the prolog-load (created by this
2850 function) and the result of a load that takes place in the loop (to be
2851 created by the caller to this function).
2852
2853 For the case of dr_explicit_realign_optimized:
2854 The caller to this function uses the phi-result (msq) to create the
2855 realignment code inside the loop, and sets up the missing phi argument,
2856 as follows:
2857 loop:
2858 msq = phi (msq_init, lsq)
2859 lsq = *(floor(p')); # load in loop
2860 result = realign_load (msq, lsq, realignment_token);
2861
2862 For the case of dr_explicit_realign:
2863 loop:
2864 msq = *(floor(p)); # load in loop
2865 p' = p + (VS-1);
2866 lsq = *(floor(p')); # load in loop
2867 result = realign_load (msq, lsq, realignment_token);
2868
2869 Input:
2870 STMT - (scalar) load stmt to be vectorized. This load accesses
2871 a memory location that may be unaligned.
2872 BSI - place where new code is to be inserted.
2873 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2874 is used.
2875
2876 Output:
2877 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2878 target hook, if defined.
2879 Return value - the result of the loop-header phi node. */
2880
2881 tree
2882 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2883 tree *realignment_token,
2884 enum dr_alignment_support alignment_support_scheme,
2885 tree init_addr,
2886 struct loop **at_loop)
2887 {
2888 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2889 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2890 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2891 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2892 edge pe;
2893 tree scalar_dest = gimple_assign_lhs (stmt);
2894 tree vec_dest;
2895 gimple inc;
2896 tree ptr;
2897 tree data_ref;
2898 gimple new_stmt;
2899 basic_block new_bb;
2900 tree msq_init = NULL_TREE;
2901 tree new_temp;
2902 gimple phi_stmt;
2903 tree msq = NULL_TREE;
2904 gimple_seq stmts = NULL;
2905 bool inv_p;
2906 bool compute_in_loop = false;
2907 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2908 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2909 struct loop *loop_for_initial_load;
2910
2911 gcc_assert (alignment_support_scheme == dr_explicit_realign
2912 || alignment_support_scheme == dr_explicit_realign_optimized);
2913
2914 /* We need to generate three things:
2915 1. the misalignment computation
2916 2. the extra vector load (for the optimized realignment scheme).
2917 3. the phi node for the two vectors from which the realignment is
2918 done (for the optimized realignment scheme).
2919 */
2920
2921 /* 1. Determine where to generate the misalignment computation.
2922
2923 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2924 calculation will be generated by this function, outside the loop (in the
2925 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2926 caller, inside the loop.
2927
2928 Background: If the misalignment remains fixed throughout the iterations of
2929 the loop, then both realignment schemes are applicable, and also the
2930 misalignment computation can be done outside LOOP. This is because we are
2931 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2932 are a multiple of VS (the Vector Size), and therefore the misalignment in
2933 different vectorized LOOP iterations is always the same.
2934 The problem arises only if the memory access is in an inner-loop nested
2935 inside LOOP, which is now being vectorized using outer-loop vectorization.
2936 This is the only case when the misalignment of the memory access may not
2937 remain fixed throughout the iterations of the inner-loop (as explained in
2938 detail in vect_supportable_dr_alignment). In this case, not only is the
2939 optimized realignment scheme not applicable, but also the misalignment
2940 computation (and generation of the realignment token that is passed to
2941 REALIGN_LOAD) have to be done inside the loop.
2942
2943 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2944 or not, which in turn determines if the misalignment is computed inside
2945 the inner-loop, or outside LOOP. */
2946
2947 if (init_addr != NULL_TREE)
2948 {
2949 compute_in_loop = true;
2950 gcc_assert (alignment_support_scheme == dr_explicit_realign);
2951 }
2952
2953
2954 /* 2. Determine where to generate the extra vector load.
2955
2956 For the optimized realignment scheme, instead of generating two vector
2957 loads in each iteration, we generate a single extra vector load in the
2958 preheader of the loop, and in each iteration reuse the result of the
2959 vector load from the previous iteration. In case the memory access is in
2960 an inner-loop nested inside LOOP, which is now being vectorized using
2961 outer-loop vectorization, we need to determine whether this initial vector
2962 load should be generated at the preheader of the inner-loop, or can be
2963 generated at the preheader of LOOP. If the memory access has no evolution
2964 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2965 to be generated inside LOOP (in the preheader of the inner-loop). */
2966
2967 if (nested_in_vect_loop)
2968 {
2969 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2970 bool invariant_in_outerloop =
2971 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2972 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2973 }
2974 else
2975 loop_for_initial_load = loop;
2976 if (at_loop)
2977 *at_loop = loop_for_initial_load;
2978
2979 /* 3. For the case of the optimized realignment, create the first vector
2980 load at the loop preheader. */
2981
2982 if (alignment_support_scheme == dr_explicit_realign_optimized)
2983 {
2984 /* Create msq_init = *(floor(p1)) in the loop preheader */
2985
2986 gcc_assert (!compute_in_loop);
2987 pe = loop_preheader_edge (loop_for_initial_load);
2988 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2989 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
2990 &init_addr, &inc, true, &inv_p);
2991 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2992 new_stmt = gimple_build_assign (vec_dest, data_ref);
2993 new_temp = make_ssa_name (vec_dest, new_stmt);
2994 gimple_assign_set_lhs (new_stmt, new_temp);
2995 mark_symbols_for_renaming (new_stmt);
2996 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2997 gcc_assert (!new_bb);
2998 msq_init = gimple_assign_lhs (new_stmt);
2999 }
3000
3001 /* 4. Create realignment token using a target builtin, if available.
3002 It is done either inside the containing loop, or before LOOP (as
3003 determined above). */
3004
3005 if (targetm.vectorize.builtin_mask_for_load)
3006 {
3007 tree builtin_decl;
3008
3009 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
3010 if (compute_in_loop)
3011 gcc_assert (init_addr); /* already computed by the caller. */
3012 else
3013 {
3014 /* Generate the INIT_ADDR computation outside LOOP. */
3015 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
3016 NULL_TREE, loop);
3017 pe = loop_preheader_edge (loop);
3018 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
3019 gcc_assert (!new_bb);
3020 }
3021
3022 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
3023 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
3024 vec_dest =
3025 vect_create_destination_var (scalar_dest,
3026 gimple_call_return_type (new_stmt));
3027 new_temp = make_ssa_name (vec_dest, new_stmt);
3028 gimple_call_set_lhs (new_stmt, new_temp);
3029
3030 if (compute_in_loop)
3031 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
3032 else
3033 {
3034 /* Generate the misalignment computation outside LOOP. */
3035 pe = loop_preheader_edge (loop);
3036 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
3037 gcc_assert (!new_bb);
3038 }
3039
3040 *realignment_token = gimple_call_lhs (new_stmt);
3041
3042 /* The result of the CALL_EXPR to this builtin is determined from
3043 the value of the parameter and no global variables are touched
3044 which makes the builtin a "const" function. Requiring the
3045 builtin to have the "const" attribute makes it unnecessary
3046 to call mark_call_clobbered. */
3047 gcc_assert (TREE_READONLY (builtin_decl));
3048 }
3049
3050 if (alignment_support_scheme == dr_explicit_realign)
3051 return msq;
3052
3053 gcc_assert (!compute_in_loop);
3054 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
3055
3056
3057 /* 5. Create msq = phi <msq_init, lsq> in loop */
3058
3059 pe = loop_preheader_edge (containing_loop);
3060 vec_dest = vect_create_destination_var (scalar_dest, vectype);
3061 msq = make_ssa_name (vec_dest, NULL);
3062 phi_stmt = create_phi_node (msq, containing_loop->header);
3063 SSA_NAME_DEF_STMT (msq) = phi_stmt;
3064 add_phi_arg (phi_stmt, msq_init, pe, UNKNOWN_LOCATION);
3065
3066 return msq;
3067 }
3068
3069
3070 /* Function vect_strided_load_supported.
3071
3072 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
3073 and FALSE otherwise. */
3074
3075 bool
3076 vect_strided_load_supported (tree vectype)
3077 {
3078 optab perm_even_optab, perm_odd_optab;
3079 int mode;
3080
3081 mode = (int) TYPE_MODE (vectype);
3082
3083 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
3084 optab_default);
3085 if (!perm_even_optab)
3086 {
3087 if (vect_print_dump_info (REPORT_DETAILS))
3088 fprintf (vect_dump, "no optab for perm_even.");
3089 return false;
3090 }
3091
3092 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
3093 {
3094 if (vect_print_dump_info (REPORT_DETAILS))
3095 fprintf (vect_dump, "perm_even op not supported by target.");
3096 return false;
3097 }
3098
3099 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
3100 optab_default);
3101 if (!perm_odd_optab)
3102 {
3103 if (vect_print_dump_info (REPORT_DETAILS))
3104 fprintf (vect_dump, "no optab for perm_odd.");
3105 return false;
3106 }
3107
3108 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
3109 {
3110 if (vect_print_dump_info (REPORT_DETAILS))
3111 fprintf (vect_dump, "perm_odd op not supported by target.");
3112 return false;
3113 }
3114 return true;
3115 }
3116
3117
3118 /* Function vect_permute_load_chain.
3119
3120 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
3121 a power of 2, generate extract_even/odd stmts to reorder the input data
3122 correctly. Return the final references for loads in RESULT_CHAIN.
3123
3124 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
3125 The input is 4 vectors each containing 8 elements. We assign a number to each
3126 element, the input sequence is:
3127
3128 1st vec: 0 1 2 3 4 5 6 7
3129 2nd vec: 8 9 10 11 12 13 14 15
3130 3rd vec: 16 17 18 19 20 21 22 23
3131 4th vec: 24 25 26 27 28 29 30 31
3132
3133 The output sequence should be:
3134
3135 1st vec: 0 4 8 12 16 20 24 28
3136 2nd vec: 1 5 9 13 17 21 25 29
3137 3rd vec: 2 6 10 14 18 22 26 30
3138 4th vec: 3 7 11 15 19 23 27 31
3139
3140 i.e., the first output vector should contain the first elements of each
3141 interleaving group, etc.
3142
3143 We use extract_even/odd instructions to create such output. The input of each
3144 extract_even/odd operation is two vectors
3145 1st vec 2nd vec
3146 0 1 2 3 4 5 6 7
3147
3148 and the output is the vector of extracted even/odd elements. The output of
3149 extract_even will be: 0 2 4 6
3150 and of extract_odd: 1 3 5 7
3151
3152
3153 The permutation is done in log LENGTH stages. In each stage extract_even and
3154 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3155 order. In our example,
3156
3157 E1: extract_even (1st vec, 2nd vec)
3158 E2: extract_odd (1st vec, 2nd vec)
3159 E3: extract_even (3rd vec, 4th vec)
3160 E4: extract_odd (3rd vec, 4th vec)
3161
3162 The output for the first stage will be:
3163
3164 E1: 0 2 4 6 8 10 12 14
3165 E2: 1 3 5 7 9 11 13 15
3166 E3: 16 18 20 22 24 26 28 30
3167 E4: 17 19 21 23 25 27 29 31
3168
3169 In order to proceed and create the correct sequence for the next stage (or
3170 for the correct output, if the second stage is the last one, as in our
3171 example), we first put the output of extract_even operation and then the
3172 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3173 The input for the second stage is:
3174
3175 1st vec (E1): 0 2 4 6 8 10 12 14
3176 2nd vec (E3): 16 18 20 22 24 26 28 30
3177 3rd vec (E2): 1 3 5 7 9 11 13 15
3178 4th vec (E4): 17 19 21 23 25 27 29 31
3179
3180 The output of the second stage:
3181
3182 E1: 0 4 8 12 16 20 24 28
3183 E2: 2 6 10 14 18 22 26 30
3184 E3: 1 5 9 13 17 21 25 29
3185 E4: 3 7 11 15 19 23 27 31
3186
3187 And RESULT_CHAIN after reordering:
3188
3189 1st vec (E1): 0 4 8 12 16 20 24 28
3190 2nd vec (E3): 1 5 9 13 17 21 25 29
3191 3rd vec (E2): 2 6 10 14 18 22 26 30
3192 4th vec (E4): 3 7 11 15 19 23 27 31. */
3193
3194 bool
3195 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3196 unsigned int length,
3197 gimple stmt,
3198 gimple_stmt_iterator *gsi,
3199 VEC(tree,heap) **result_chain)
3200 {
3201 tree perm_dest, data_ref, first_vect, second_vect;
3202 gimple perm_stmt;
3203 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3204 int i;
3205 unsigned int j;
3206
3207 /* Check that the operation is supported. */
3208 if (!vect_strided_load_supported (vectype))
3209 return false;
3210
3211 *result_chain = VEC_copy (tree, heap, dr_chain);
3212 for (i = 0; i < exact_log2 (length); i++)
3213 {
3214 for (j = 0; j < length; j +=2)
3215 {
3216 first_vect = VEC_index (tree, dr_chain, j);
3217 second_vect = VEC_index (tree, dr_chain, j+1);
3218
3219 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3220 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3221 DECL_GIMPLE_REG_P (perm_dest) = 1;
3222 add_referenced_var (perm_dest);
3223
3224 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3225 perm_dest, first_vect,
3226 second_vect);
3227
3228 data_ref = make_ssa_name (perm_dest, perm_stmt);
3229 gimple_assign_set_lhs (perm_stmt, data_ref);
3230 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3231 mark_symbols_for_renaming (perm_stmt);
3232
3233 VEC_replace (tree, *result_chain, j/2, data_ref);
3234
3235 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3236 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3237 DECL_GIMPLE_REG_P (perm_dest) = 1;
3238 add_referenced_var (perm_dest);
3239
3240 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3241 perm_dest, first_vect,
3242 second_vect);
3243 data_ref = make_ssa_name (perm_dest, perm_stmt);
3244 gimple_assign_set_lhs (perm_stmt, data_ref);
3245 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3246 mark_symbols_for_renaming (perm_stmt);
3247
3248 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3249 }
3250 dr_chain = VEC_copy (tree, heap, *result_chain);
3251 }
3252 return true;
3253 }
3254
3255
3256 /* Function vect_transform_strided_load.
3257
3258 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3259 to perform their permutation and ascribe the result vectorized statements to
3260 the scalar statements.
3261 */
3262
3263 bool
3264 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3265 gimple_stmt_iterator *gsi)
3266 {
3267 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3268 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3269 gimple next_stmt, new_stmt;
3270 VEC(tree,heap) *result_chain = NULL;
3271 unsigned int i, gap_count;
3272 tree tmp_data_ref;
3273
3274 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3275 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3276 vectors, that are ready for vector computation. */
3277 result_chain = VEC_alloc (tree, heap, size);
3278 /* Permute. */
3279 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3280 return false;
3281
3282 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3283 Since we scan the chain starting from it's first node, their order
3284 corresponds the order of data-refs in RESULT_CHAIN. */
3285 next_stmt = first_stmt;
3286 gap_count = 1;
3287 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3288 {
3289 if (!next_stmt)
3290 break;
3291
3292 /* Skip the gaps. Loads created for the gaps will be removed by dead
3293 code elimination pass later. No need to check for the first stmt in
3294 the group, since it always exists.
3295 DR_GROUP_GAP is the number of steps in elements from the previous
3296 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3297 correspond to the gaps.
3298 */
3299 if (next_stmt != first_stmt
3300 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3301 {
3302 gap_count++;
3303 continue;
3304 }
3305
3306 while (next_stmt)
3307 {
3308 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3309 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3310 copies, and we put the new vector statement in the first available
3311 RELATED_STMT. */
3312 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3313 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3314 else
3315 {
3316 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3317 {
3318 gimple prev_stmt =
3319 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3320 gimple rel_stmt =
3321 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3322 while (rel_stmt)
3323 {
3324 prev_stmt = rel_stmt;
3325 rel_stmt =
3326 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3327 }
3328
3329 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3330 new_stmt;
3331 }
3332 }
3333
3334 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3335 gap_count = 1;
3336 /* If NEXT_STMT accesses the same DR as the previous statement,
3337 put the same TMP_DATA_REF as its vectorized statement; otherwise
3338 get the next data-ref from RESULT_CHAIN. */
3339 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3340 break;
3341 }
3342 }
3343
3344 VEC_free (tree, heap, result_chain);
3345 return true;
3346 }
3347
3348 /* Function vect_force_dr_alignment_p.
3349
3350 Returns whether the alignment of a DECL can be forced to be aligned
3351 on ALIGNMENT bit boundary. */
3352
3353 bool
3354 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3355 {
3356 if (TREE_CODE (decl) != VAR_DECL)
3357 return false;
3358
3359 if (DECL_EXTERNAL (decl))
3360 return false;
3361
3362 if (TREE_ASM_WRITTEN (decl))
3363 return false;
3364
3365 if (TREE_STATIC (decl))
3366 return (alignment <= MAX_OFILE_ALIGNMENT);
3367 else
3368 return (alignment <= MAX_STACK_ALIGNMENT);
3369 }
3370
3371 /* Function vect_supportable_dr_alignment
3372
3373 Return whether the data reference DR is supported with respect to its
3374 alignment. */
3375
3376 enum dr_alignment_support
3377 vect_supportable_dr_alignment (struct data_reference *dr)
3378 {
3379 gimple stmt = DR_STMT (dr);
3380 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3381 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3382 enum machine_mode mode = TYPE_MODE (vectype);
3383 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3384 struct loop *vect_loop = NULL;
3385 bool nested_in_vect_loop = false;
3386
3387 if (aligned_access_p (dr))
3388 return dr_aligned;
3389
3390 if (!loop_vinfo)
3391 /* FORNOW: Misaligned accesses are supported only in loops. */
3392 return dr_unaligned_unsupported;
3393
3394 vect_loop = LOOP_VINFO_LOOP (loop_vinfo);
3395 nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3396
3397 /* Possibly unaligned access. */
3398
3399 /* We can choose between using the implicit realignment scheme (generating
3400 a misaligned_move stmt) and the explicit realignment scheme (generating
3401 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3402 realignment scheme: optimized, and unoptimized.
3403 We can optimize the realignment only if the step between consecutive
3404 vector loads is equal to the vector size. Since the vector memory
3405 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3406 is guaranteed that the misalignment amount remains the same throughout the
3407 execution of the vectorized loop. Therefore, we can create the
3408 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3409 at the loop preheader.
3410
3411 However, in the case of outer-loop vectorization, when vectorizing a
3412 memory access in the inner-loop nested within the LOOP that is now being
3413 vectorized, while it is guaranteed that the misalignment of the
3414 vectorized memory access will remain the same in different outer-loop
3415 iterations, it is *not* guaranteed that is will remain the same throughout
3416 the execution of the inner-loop. This is because the inner-loop advances
3417 with the original scalar step (and not in steps of VS). If the inner-loop
3418 step happens to be a multiple of VS, then the misalignment remains fixed
3419 and we can use the optimized realignment scheme. For example:
3420
3421 for (i=0; i<N; i++)
3422 for (j=0; j<M; j++)
3423 s += a[i+j];
3424
3425 When vectorizing the i-loop in the above example, the step between
3426 consecutive vector loads is 1, and so the misalignment does not remain
3427 fixed across the execution of the inner-loop, and the realignment cannot
3428 be optimized (as illustrated in the following pseudo vectorized loop):
3429
3430 for (i=0; i<N; i+=4)
3431 for (j=0; j<M; j++){
3432 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3433 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3434 // (assuming that we start from an aligned address).
3435 }
3436
3437 We therefore have to use the unoptimized realignment scheme:
3438
3439 for (i=0; i<N; i+=4)
3440 for (j=k; j<M; j+=4)
3441 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3442 // that the misalignment of the initial address is
3443 // 0).
3444
3445 The loop can then be vectorized as follows:
3446
3447 for (k=0; k<4; k++){
3448 rt = get_realignment_token (&vp[k]);
3449 for (i=0; i<N; i+=4){
3450 v1 = vp[i+k];
3451 for (j=k; j<M; j+=4){
3452 v2 = vp[i+j+VS-1];
3453 va = REALIGN_LOAD <v1,v2,rt>;
3454 vs += va;
3455 v1 = v2;
3456 }
3457 }
3458 } */
3459
3460 if (DR_IS_READ (dr))
3461 {
3462 bool is_packed = false;
3463 tree type = (TREE_TYPE (DR_REF (dr)));
3464
3465 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3466 CODE_FOR_nothing
3467 && (!targetm.vectorize.builtin_mask_for_load
3468 || targetm.vectorize.builtin_mask_for_load ()))
3469 {
3470 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3471 if (nested_in_vect_loop
3472 && (TREE_INT_CST_LOW (DR_STEP (dr))
3473 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3474 return dr_explicit_realign;
3475 else
3476 return dr_explicit_realign_optimized;
3477 }
3478 if (!known_alignment_for_access_p (dr))
3479 {
3480 tree ba = DR_BASE_OBJECT (dr);
3481
3482 if (ba)
3483 is_packed = contains_packed_reference (ba);
3484 }
3485
3486 if (targetm.vectorize.
3487 builtin_support_vector_misalignment (mode, type,
3488 DR_MISALIGNMENT (dr), is_packed))
3489 /* Can't software pipeline the loads, but can at least do them. */
3490 return dr_unaligned_supported;
3491 }
3492 else
3493 {
3494 bool is_packed = false;
3495 tree type = (TREE_TYPE (DR_REF (dr)));
3496
3497 if (!known_alignment_for_access_p (dr))
3498 {
3499 tree ba = DR_BASE_OBJECT (dr);
3500
3501 if (ba)
3502 is_packed = contains_packed_reference (ba);
3503 }
3504
3505 if (targetm.vectorize.
3506 builtin_support_vector_misalignment (mode, type,
3507 DR_MISALIGNMENT (dr), is_packed))
3508 return dr_unaligned_supported;
3509 }
3510
3511 /* Unsupported. */
3512 return dr_unaligned_unsupported;
3513 }