]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-data-refs.c
./:
[thirdparty/gcc.git] / gcc / tree-vect-data-refs.c
1 /* Data References Analysis and Manipulation Utilities for Vectorization.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software
3 Foundation, Inc.
4 Contributed by Dorit Naishlos <dorit@il.ibm.com>
5 and Ira Rosen <irar@il.ibm.com>
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "tm.h"
27 #include "ggc.h"
28 #include "tree.h"
29 #include "target.h"
30 #include "basic-block.h"
31 #include "diagnostic.h"
32 #include "tree-flow.h"
33 #include "tree-dump.h"
34 #include "cfgloop.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "tree-chrec.h"
38 #include "tree-scalar-evolution.h"
39 #include "tree-vectorizer.h"
40 #include "toplev.h"
41
42
43 /* Return the smallest scalar part of STMT.
44 This is used to determine the vectype of the stmt. We generally set the
45 vectype according to the type of the result (lhs). For stmts whose
46 result-type is different than the type of the arguments (e.g., demotion,
47 promotion), vectype will be reset appropriately (later). Note that we have
48 to visit the smallest datatype in this function, because that determines the
49 VF. If the smallest datatype in the loop is present only as the rhs of a
50 promotion operation - we'd miss it.
51 Such a case, where a variable of this datatype does not appear in the lhs
52 anywhere in the loop, can only occur if it's an invariant: e.g.:
53 'int_x = (int) short_inv', which we'd expect to have been optimized away by
54 invariant motion. However, we cannot rely on invariant motion to always take
55 invariants out of the loop, and so in the case of promotion we also have to
56 check the rhs.
57 LHS_SIZE_UNIT and RHS_SIZE_UNIT contain the sizes of the corresponding
58 types. */
59
60 tree
61 vect_get_smallest_scalar_type (gimple stmt, HOST_WIDE_INT *lhs_size_unit,
62 HOST_WIDE_INT *rhs_size_unit)
63 {
64 tree scalar_type = gimple_expr_type (stmt);
65 HOST_WIDE_INT lhs, rhs;
66
67 lhs = rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
68
69 if (is_gimple_assign (stmt)
70 && (gimple_assign_cast_p (stmt)
71 || gimple_assign_rhs_code (stmt) == WIDEN_MULT_EXPR
72 || gimple_assign_rhs_code (stmt) == FLOAT_EXPR))
73 {
74 tree rhs_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
75
76 rhs = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (rhs_type));
77 if (rhs < lhs)
78 scalar_type = rhs_type;
79 }
80
81 *lhs_size_unit = lhs;
82 *rhs_size_unit = rhs;
83 return scalar_type;
84 }
85
86
87 /* Find the place of the data-ref in STMT in the interleaving chain that starts
88 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
89
90 int
91 vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
92 {
93 gimple next_stmt = first_stmt;
94 int result = 0;
95
96 if (first_stmt != DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
97 return -1;
98
99 while (next_stmt && next_stmt != stmt)
100 {
101 result++;
102 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
103 }
104
105 if (next_stmt)
106 return result;
107 else
108 return -1;
109 }
110
111
112 /* Function vect_insert_into_interleaving_chain.
113
114 Insert DRA into the interleaving chain of DRB according to DRA's INIT. */
115
116 static void
117 vect_insert_into_interleaving_chain (struct data_reference *dra,
118 struct data_reference *drb)
119 {
120 gimple prev, next;
121 tree next_init;
122 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
123 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
124
125 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
126 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
127 while (next)
128 {
129 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
130 if (tree_int_cst_compare (next_init, DR_INIT (dra)) > 0)
131 {
132 /* Insert here. */
133 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
134 DR_GROUP_NEXT_DR (stmtinfo_a) = next;
135 return;
136 }
137 prev = next;
138 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
139 }
140
141 /* We got to the end of the list. Insert here. */
142 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = DR_STMT (dra);
143 DR_GROUP_NEXT_DR (stmtinfo_a) = NULL;
144 }
145
146
147 /* Function vect_update_interleaving_chain.
148
149 For two data-refs DRA and DRB that are a part of a chain interleaved data
150 accesses, update the interleaving chain. DRB's INIT is smaller than DRA's.
151
152 There are four possible cases:
153 1. New stmts - both DRA and DRB are not a part of any chain:
154 FIRST_DR = DRB
155 NEXT_DR (DRB) = DRA
156 2. DRB is a part of a chain and DRA is not:
157 no need to update FIRST_DR
158 no need to insert DRB
159 insert DRA according to init
160 3. DRA is a part of a chain and DRB is not:
161 if (init of FIRST_DR > init of DRB)
162 FIRST_DR = DRB
163 NEXT(FIRST_DR) = previous FIRST_DR
164 else
165 insert DRB according to its init
166 4. both DRA and DRB are in some interleaving chains:
167 choose the chain with the smallest init of FIRST_DR
168 insert the nodes of the second chain into the first one. */
169
170 static void
171 vect_update_interleaving_chain (struct data_reference *drb,
172 struct data_reference *dra)
173 {
174 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
175 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
176 tree next_init, init_dra_chain, init_drb_chain;
177 gimple first_a, first_b;
178 tree node_init;
179 gimple node, prev, next, first_stmt;
180
181 /* 1. New stmts - both DRA and DRB are not a part of any chain. */
182 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
183 {
184 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_STMT (drb);
185 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
186 DR_GROUP_NEXT_DR (stmtinfo_b) = DR_STMT (dra);
187 return;
188 }
189
190 /* 2. DRB is a part of a chain and DRA is not. */
191 if (!DR_GROUP_FIRST_DR (stmtinfo_a) && DR_GROUP_FIRST_DR (stmtinfo_b))
192 {
193 DR_GROUP_FIRST_DR (stmtinfo_a) = DR_GROUP_FIRST_DR (stmtinfo_b);
194 /* Insert DRA into the chain of DRB. */
195 vect_insert_into_interleaving_chain (dra, drb);
196 return;
197 }
198
199 /* 3. DRA is a part of a chain and DRB is not. */
200 if (DR_GROUP_FIRST_DR (stmtinfo_a) && !DR_GROUP_FIRST_DR (stmtinfo_b))
201 {
202 gimple old_first_stmt = DR_GROUP_FIRST_DR (stmtinfo_a);
203 tree init_old = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (
204 old_first_stmt)));
205 gimple tmp;
206
207 if (tree_int_cst_compare (init_old, DR_INIT (drb)) > 0)
208 {
209 /* DRB's init is smaller than the init of the stmt previously marked
210 as the first stmt of the interleaving chain of DRA. Therefore, we
211 update FIRST_STMT and put DRB in the head of the list. */
212 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_STMT (drb);
213 DR_GROUP_NEXT_DR (stmtinfo_b) = old_first_stmt;
214
215 /* Update all the stmts in the list to point to the new FIRST_STMT. */
216 tmp = old_first_stmt;
217 while (tmp)
218 {
219 DR_GROUP_FIRST_DR (vinfo_for_stmt (tmp)) = DR_STMT (drb);
220 tmp = DR_GROUP_NEXT_DR (vinfo_for_stmt (tmp));
221 }
222 }
223 else
224 {
225 /* Insert DRB in the list of DRA. */
226 vect_insert_into_interleaving_chain (drb, dra);
227 DR_GROUP_FIRST_DR (stmtinfo_b) = DR_GROUP_FIRST_DR (stmtinfo_a);
228 }
229 return;
230 }
231
232 /* 4. both DRA and DRB are in some interleaving chains. */
233 first_a = DR_GROUP_FIRST_DR (stmtinfo_a);
234 first_b = DR_GROUP_FIRST_DR (stmtinfo_b);
235 if (first_a == first_b)
236 return;
237 init_dra_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_a)));
238 init_drb_chain = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (first_b)));
239
240 if (tree_int_cst_compare (init_dra_chain, init_drb_chain) > 0)
241 {
242 /* Insert the nodes of DRA chain into the DRB chain.
243 After inserting a node, continue from this node of the DRB chain (don't
244 start from the beginning. */
245 node = DR_GROUP_FIRST_DR (stmtinfo_a);
246 prev = DR_GROUP_FIRST_DR (stmtinfo_b);
247 first_stmt = first_b;
248 }
249 else
250 {
251 /* Insert the nodes of DRB chain into the DRA chain.
252 After inserting a node, continue from this node of the DRA chain (don't
253 start from the beginning. */
254 node = DR_GROUP_FIRST_DR (stmtinfo_b);
255 prev = DR_GROUP_FIRST_DR (stmtinfo_a);
256 first_stmt = first_a;
257 }
258
259 while (node)
260 {
261 node_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (node)));
262 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
263 while (next)
264 {
265 next_init = DR_INIT (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
266 if (tree_int_cst_compare (next_init, node_init) > 0)
267 {
268 /* Insert here. */
269 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
270 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = next;
271 prev = node;
272 break;
273 }
274 prev = next;
275 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (prev));
276 }
277 if (!next)
278 {
279 /* We got to the end of the list. Insert here. */
280 DR_GROUP_NEXT_DR (vinfo_for_stmt (prev)) = node;
281 DR_GROUP_NEXT_DR (vinfo_for_stmt (node)) = NULL;
282 prev = node;
283 }
284 DR_GROUP_FIRST_DR (vinfo_for_stmt (node)) = first_stmt;
285 node = DR_GROUP_NEXT_DR (vinfo_for_stmt (node));
286 }
287 }
288
289
290 /* Function vect_equal_offsets.
291
292 Check if OFFSET1 and OFFSET2 are identical expressions. */
293
294 static bool
295 vect_equal_offsets (tree offset1, tree offset2)
296 {
297 bool res0, res1;
298
299 STRIP_NOPS (offset1);
300 STRIP_NOPS (offset2);
301
302 if (offset1 == offset2)
303 return true;
304
305 if (TREE_CODE (offset1) != TREE_CODE (offset2)
306 || !BINARY_CLASS_P (offset1)
307 || !BINARY_CLASS_P (offset2))
308 return false;
309
310 res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
311 TREE_OPERAND (offset2, 0));
312 res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
313 TREE_OPERAND (offset2, 1));
314
315 return (res0 && res1);
316 }
317
318
319 /* Function vect_check_interleaving.
320
321 Check if DRA and DRB are a part of interleaving. In case they are, insert
322 DRA and DRB in an interleaving chain. */
323
324 static void
325 vect_check_interleaving (struct data_reference *dra,
326 struct data_reference *drb)
327 {
328 HOST_WIDE_INT type_size_a, type_size_b, diff_mod_size, step, init_a, init_b;
329
330 /* Check that the data-refs have same first location (except init) and they
331 are both either store or load (not load and store). */
332 if ((DR_BASE_ADDRESS (dra) != DR_BASE_ADDRESS (drb)
333 && (TREE_CODE (DR_BASE_ADDRESS (dra)) != ADDR_EXPR
334 || TREE_CODE (DR_BASE_ADDRESS (drb)) != ADDR_EXPR
335 || TREE_OPERAND (DR_BASE_ADDRESS (dra), 0)
336 != TREE_OPERAND (DR_BASE_ADDRESS (drb),0)))
337 || !vect_equal_offsets (DR_OFFSET (dra), DR_OFFSET (drb))
338 || !tree_int_cst_compare (DR_INIT (dra), DR_INIT (drb))
339 || DR_IS_READ (dra) != DR_IS_READ (drb))
340 return;
341
342 /* Check:
343 1. data-refs are of the same type
344 2. their steps are equal
345 3. the step is greater than the difference between data-refs' inits */
346 type_size_a = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dra))));
347 type_size_b = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (drb))));
348
349 if (type_size_a != type_size_b
350 || tree_int_cst_compare (DR_STEP (dra), DR_STEP (drb))
351 || !types_compatible_p (TREE_TYPE (DR_REF (dra)),
352 TREE_TYPE (DR_REF (drb))))
353 return;
354
355 init_a = TREE_INT_CST_LOW (DR_INIT (dra));
356 init_b = TREE_INT_CST_LOW (DR_INIT (drb));
357 step = TREE_INT_CST_LOW (DR_STEP (dra));
358
359 if (init_a > init_b)
360 {
361 /* If init_a == init_b + the size of the type * k, we have an interleaving,
362 and DRB is accessed before DRA. */
363 diff_mod_size = (init_a - init_b) % type_size_a;
364
365 if ((init_a - init_b) > step)
366 return;
367
368 if (diff_mod_size == 0)
369 {
370 vect_update_interleaving_chain (drb, dra);
371 if (vect_print_dump_info (REPORT_DR_DETAILS))
372 {
373 fprintf (vect_dump, "Detected interleaving ");
374 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
375 fprintf (vect_dump, " and ");
376 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
377 }
378 return;
379 }
380 }
381 else
382 {
383 /* If init_b == init_a + the size of the type * k, we have an
384 interleaving, and DRA is accessed before DRB. */
385 diff_mod_size = (init_b - init_a) % type_size_a;
386
387 if ((init_b - init_a) > step)
388 return;
389
390 if (diff_mod_size == 0)
391 {
392 vect_update_interleaving_chain (dra, drb);
393 if (vect_print_dump_info (REPORT_DR_DETAILS))
394 {
395 fprintf (vect_dump, "Detected interleaving ");
396 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
397 fprintf (vect_dump, " and ");
398 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
399 }
400 return;
401 }
402 }
403 }
404
405 /* Check if data references pointed by DR_I and DR_J are same or
406 belong to same interleaving group. Return FALSE if drs are
407 different, otherwise return TRUE. */
408
409 static bool
410 vect_same_range_drs (data_reference_p dr_i, data_reference_p dr_j)
411 {
412 gimple stmt_i = DR_STMT (dr_i);
413 gimple stmt_j = DR_STMT (dr_j);
414
415 if (operand_equal_p (DR_REF (dr_i), DR_REF (dr_j), 0)
416 || (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
417 && DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j))
418 && (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_i))
419 == DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_j)))))
420 return true;
421 else
422 return false;
423 }
424
425 /* If address ranges represented by DDR_I and DDR_J are equal,
426 return TRUE, otherwise return FALSE. */
427
428 static bool
429 vect_vfa_range_equal (ddr_p ddr_i, ddr_p ddr_j)
430 {
431 if ((vect_same_range_drs (DDR_A (ddr_i), DDR_A (ddr_j))
432 && vect_same_range_drs (DDR_B (ddr_i), DDR_B (ddr_j)))
433 || (vect_same_range_drs (DDR_A (ddr_i), DDR_B (ddr_j))
434 && vect_same_range_drs (DDR_B (ddr_i), DDR_A (ddr_j))))
435 return true;
436 else
437 return false;
438 }
439
440 /* Insert DDR into LOOP_VINFO list of ddrs that may alias and need to be
441 tested at run-time. Return TRUE if DDR was successfully inserted.
442 Return false if versioning is not supported. */
443
444 static bool
445 vect_mark_for_runtime_alias_test (ddr_p ddr, loop_vec_info loop_vinfo)
446 {
447 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
448
449 if ((unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS) == 0)
450 return false;
451
452 if (vect_print_dump_info (REPORT_DR_DETAILS))
453 {
454 fprintf (vect_dump, "mark for run-time aliasing test between ");
455 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr)), TDF_SLIM);
456 fprintf (vect_dump, " and ");
457 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr)), TDF_SLIM);
458 }
459
460 if (optimize_loop_nest_for_size_p (loop))
461 {
462 if (vect_print_dump_info (REPORT_DR_DETAILS))
463 fprintf (vect_dump, "versioning not supported when optimizing for size.");
464 return false;
465 }
466
467 /* FORNOW: We don't support versioning with outer-loop vectorization. */
468 if (loop->inner)
469 {
470 if (vect_print_dump_info (REPORT_DR_DETAILS))
471 fprintf (vect_dump, "versioning not yet supported for outer-loops.");
472 return false;
473 }
474
475 VEC_safe_push (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), ddr);
476 return true;
477 }
478
479 /* Function vect_analyze_data_ref_dependence.
480
481 Return TRUE if there (might) exist a dependence between a memory-reference
482 DRA and a memory-reference DRB. When versioning for alias may check a
483 dependence at run-time, return FALSE. */
484
485 static bool
486 vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
487 loop_vec_info loop_vinfo)
488 {
489 unsigned int i;
490 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
491 int vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
492 struct data_reference *dra = DDR_A (ddr);
493 struct data_reference *drb = DDR_B (ddr);
494 stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
495 stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
496 int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
497 int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
498 lambda_vector dist_v;
499 unsigned int loop_depth;
500
501 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
502 {
503 /* Independent data accesses. */
504 vect_check_interleaving (dra, drb);
505 return false;
506 }
507
508 if ((DR_IS_READ (dra) && DR_IS_READ (drb)) || dra == drb)
509 return false;
510
511 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
512 {
513 if (vect_print_dump_info (REPORT_DR_DETAILS))
514 {
515 fprintf (vect_dump,
516 "versioning for alias required: can't determine dependence between ");
517 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
518 fprintf (vect_dump, " and ");
519 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
520 }
521 /* Add to list of ddrs that need to be tested at run-time. */
522 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
523 }
524
525 if (DDR_NUM_DIST_VECTS (ddr) == 0)
526 {
527 if (vect_print_dump_info (REPORT_DR_DETAILS))
528 {
529 fprintf (vect_dump, "versioning for alias required: bad dist vector for ");
530 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
531 fprintf (vect_dump, " and ");
532 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
533 }
534 /* Add to list of ddrs that need to be tested at run-time. */
535 return !vect_mark_for_runtime_alias_test (ddr, loop_vinfo);
536 }
537
538 loop_depth = index_in_loop_nest (loop->num, DDR_LOOP_NEST (ddr));
539 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
540 {
541 int dist = dist_v[loop_depth];
542
543 if (vect_print_dump_info (REPORT_DR_DETAILS))
544 fprintf (vect_dump, "dependence distance = %d.", dist);
545
546 /* Same loop iteration. */
547 if (dist % vectorization_factor == 0 && dra_size == drb_size)
548 {
549 /* Two references with distance zero have the same alignment. */
550 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
551 VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_b), dra);
552 if (vect_print_dump_info (REPORT_ALIGNMENT))
553 fprintf (vect_dump, "accesses have the same alignment.");
554 if (vect_print_dump_info (REPORT_DR_DETAILS))
555 {
556 fprintf (vect_dump, "dependence distance modulo vf == 0 between ");
557 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
558 fprintf (vect_dump, " and ");
559 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
560 }
561
562 /* For interleaving, mark that there is a read-write dependency if
563 necessary. We check before that one of the data-refs is store. */
564 if (DR_IS_READ (dra))
565 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_a) = true;
566 else
567 {
568 if (DR_IS_READ (drb))
569 DR_GROUP_READ_WRITE_DEPENDENCE (stmtinfo_b) = true;
570 }
571
572 continue;
573 }
574
575 if (abs (dist) >= vectorization_factor
576 || (dist > 0 && DDR_REVERSED_P (ddr)))
577 {
578 /* Dependence distance does not create dependence, as far as
579 vectorization is concerned, in this case. If DDR_REVERSED_P the
580 order of the data-refs in DDR was reversed (to make distance
581 vector positive), and the actual distance is negative. */
582 if (vect_print_dump_info (REPORT_DR_DETAILS))
583 fprintf (vect_dump, "dependence distance >= VF or negative.");
584 continue;
585 }
586
587 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
588 {
589 fprintf (vect_dump,
590 "not vectorized, possible dependence "
591 "between data-refs ");
592 print_generic_expr (vect_dump, DR_REF (dra), TDF_SLIM);
593 fprintf (vect_dump, " and ");
594 print_generic_expr (vect_dump, DR_REF (drb), TDF_SLIM);
595 }
596
597 return true;
598 }
599
600 return false;
601 }
602
603 /* Function vect_analyze_data_ref_dependences.
604
605 Examine all the data references in the loop, and make sure there do not
606 exist any data dependences between them. */
607
608 bool
609 vect_analyze_data_ref_dependences (loop_vec_info loop_vinfo)
610 {
611 unsigned int i;
612 VEC (ddr_p, heap) * ddrs = LOOP_VINFO_DDRS (loop_vinfo);
613 struct data_dependence_relation *ddr;
614
615 if (vect_print_dump_info (REPORT_DETAILS))
616 fprintf (vect_dump, "=== vect_analyze_dependences ===");
617
618 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
619 if (vect_analyze_data_ref_dependence (ddr, loop_vinfo))
620 return false;
621
622 return true;
623 }
624
625
626 /* Function vect_compute_data_ref_alignment
627
628 Compute the misalignment of the data reference DR.
629
630 Output:
631 1. If during the misalignment computation it is found that the data reference
632 cannot be vectorized then false is returned.
633 2. DR_MISALIGNMENT (DR) is defined.
634
635 FOR NOW: No analysis is actually performed. Misalignment is calculated
636 only for trivial cases. TODO. */
637
638 static bool
639 vect_compute_data_ref_alignment (struct data_reference *dr)
640 {
641 gimple stmt = DR_STMT (dr);
642 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
643 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
644 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
645 tree ref = DR_REF (dr);
646 tree vectype;
647 tree base, base_addr;
648 bool base_aligned;
649 tree misalign;
650 tree aligned_to, alignment;
651
652 if (vect_print_dump_info (REPORT_DETAILS))
653 fprintf (vect_dump, "vect_compute_data_ref_alignment:");
654
655 /* Initialize misalignment to unknown. */
656 SET_DR_MISALIGNMENT (dr, -1);
657
658 misalign = DR_INIT (dr);
659 aligned_to = DR_ALIGNED_TO (dr);
660 base_addr = DR_BASE_ADDRESS (dr);
661 vectype = STMT_VINFO_VECTYPE (stmt_info);
662
663 /* In case the dataref is in an inner-loop of the loop that is being
664 vectorized (LOOP), we use the base and misalignment information
665 relative to the outer-loop (LOOP). This is ok only if the misalignment
666 stays the same throughout the execution of the inner-loop, which is why
667 we have to check that the stride of the dataref in the inner-loop evenly
668 divides by the vector size. */
669 if (nested_in_vect_loop_p (loop, stmt))
670 {
671 tree step = DR_STEP (dr);
672 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
673
674 if (dr_step % GET_MODE_SIZE (TYPE_MODE (vectype)) == 0)
675 {
676 if (vect_print_dump_info (REPORT_ALIGNMENT))
677 fprintf (vect_dump, "inner step divides the vector-size.");
678 misalign = STMT_VINFO_DR_INIT (stmt_info);
679 aligned_to = STMT_VINFO_DR_ALIGNED_TO (stmt_info);
680 base_addr = STMT_VINFO_DR_BASE_ADDRESS (stmt_info);
681 }
682 else
683 {
684 if (vect_print_dump_info (REPORT_ALIGNMENT))
685 fprintf (vect_dump, "inner step doesn't divide the vector-size.");
686 misalign = NULL_TREE;
687 }
688 }
689
690 base = build_fold_indirect_ref (base_addr);
691 alignment = ssize_int (TYPE_ALIGN (vectype)/BITS_PER_UNIT);
692
693 if ((aligned_to && tree_int_cst_compare (aligned_to, alignment) < 0)
694 || !misalign)
695 {
696 if (vect_print_dump_info (REPORT_ALIGNMENT))
697 {
698 fprintf (vect_dump, "Unknown alignment for access: ");
699 print_generic_expr (vect_dump, base, TDF_SLIM);
700 }
701 return true;
702 }
703
704 if ((DECL_P (base)
705 && tree_int_cst_compare (ssize_int (DECL_ALIGN_UNIT (base)),
706 alignment) >= 0)
707 || (TREE_CODE (base_addr) == SSA_NAME
708 && tree_int_cst_compare (ssize_int (TYPE_ALIGN_UNIT (TREE_TYPE (
709 TREE_TYPE (base_addr)))),
710 alignment) >= 0))
711 base_aligned = true;
712 else
713 base_aligned = false;
714
715 if (!base_aligned)
716 {
717 /* Do not change the alignment of global variables if
718 flag_section_anchors is enabled. */
719 if (!vect_can_force_dr_alignment_p (base, TYPE_ALIGN (vectype))
720 || (TREE_STATIC (base) && flag_section_anchors))
721 {
722 if (vect_print_dump_info (REPORT_DETAILS))
723 {
724 fprintf (vect_dump, "can't force alignment of ref: ");
725 print_generic_expr (vect_dump, ref, TDF_SLIM);
726 }
727 return true;
728 }
729
730 /* Force the alignment of the decl.
731 NOTE: This is the only change to the code we make during
732 the analysis phase, before deciding to vectorize the loop. */
733 if (vect_print_dump_info (REPORT_DETAILS))
734 fprintf (vect_dump, "force alignment");
735 DECL_ALIGN (base) = TYPE_ALIGN (vectype);
736 DECL_USER_ALIGN (base) = 1;
737 }
738
739 /* At this point we assume that the base is aligned. */
740 gcc_assert (base_aligned
741 || (TREE_CODE (base) == VAR_DECL
742 && DECL_ALIGN (base) >= TYPE_ALIGN (vectype)));
743
744 /* Modulo alignment. */
745 misalign = size_binop (FLOOR_MOD_EXPR, misalign, alignment);
746
747 if (!host_integerp (misalign, 1))
748 {
749 /* Negative or overflowed misalignment value. */
750 if (vect_print_dump_info (REPORT_DETAILS))
751 fprintf (vect_dump, "unexpected misalign value");
752 return false;
753 }
754
755 SET_DR_MISALIGNMENT (dr, TREE_INT_CST_LOW (misalign));
756
757 if (vect_print_dump_info (REPORT_DETAILS))
758 {
759 fprintf (vect_dump, "misalign = %d bytes of ref ", DR_MISALIGNMENT (dr));
760 print_generic_expr (vect_dump, ref, TDF_SLIM);
761 }
762
763 return true;
764 }
765
766
767 /* Function vect_compute_data_refs_alignment
768
769 Compute the misalignment of data references in the loop.
770 Return FALSE if a data reference is found that cannot be vectorized. */
771
772 static bool
773 vect_compute_data_refs_alignment (loop_vec_info loop_vinfo)
774 {
775 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
776 struct data_reference *dr;
777 unsigned int i;
778
779 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
780 if (!vect_compute_data_ref_alignment (dr))
781 return false;
782
783 return true;
784 }
785
786
787 /* Function vect_update_misalignment_for_peel
788
789 DR - the data reference whose misalignment is to be adjusted.
790 DR_PEEL - the data reference whose misalignment is being made
791 zero in the vector loop by the peel.
792 NPEEL - the number of iterations in the peel loop if the misalignment
793 of DR_PEEL is known at compile time. */
794
795 static void
796 vect_update_misalignment_for_peel (struct data_reference *dr,
797 struct data_reference *dr_peel, int npeel)
798 {
799 unsigned int i;
800 VEC(dr_p,heap) *same_align_drs;
801 struct data_reference *current_dr;
802 int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
803 int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
804 stmt_vec_info stmt_info = vinfo_for_stmt (DR_STMT (dr));
805 stmt_vec_info peel_stmt_info = vinfo_for_stmt (DR_STMT (dr_peel));
806
807 /* For interleaved data accesses the step in the loop must be multiplied by
808 the size of the interleaving group. */
809 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
810 dr_size *= DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info)));
811 if (STMT_VINFO_STRIDED_ACCESS (peel_stmt_info))
812 dr_peel_size *= DR_GROUP_SIZE (peel_stmt_info);
813
814 /* It can be assumed that the data refs with the same alignment as dr_peel
815 are aligned in the vector loop. */
816 same_align_drs
817 = STMT_VINFO_SAME_ALIGN_REFS (vinfo_for_stmt (DR_STMT (dr_peel)));
818 for (i = 0; VEC_iterate (dr_p, same_align_drs, i, current_dr); i++)
819 {
820 if (current_dr != dr)
821 continue;
822 gcc_assert (DR_MISALIGNMENT (dr) / dr_size ==
823 DR_MISALIGNMENT (dr_peel) / dr_peel_size);
824 SET_DR_MISALIGNMENT (dr, 0);
825 return;
826 }
827
828 if (known_alignment_for_access_p (dr)
829 && known_alignment_for_access_p (dr_peel))
830 {
831 int misal = DR_MISALIGNMENT (dr);
832 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
833 misal += npeel * dr_size;
834 misal %= GET_MODE_SIZE (TYPE_MODE (vectype));
835 SET_DR_MISALIGNMENT (dr, misal);
836 return;
837 }
838
839 if (vect_print_dump_info (REPORT_DETAILS))
840 fprintf (vect_dump, "Setting misalignment to -1.");
841 SET_DR_MISALIGNMENT (dr, -1);
842 }
843
844
845 /* Function vect_verify_datarefs_alignment
846
847 Return TRUE if all data references in the loop can be
848 handled with respect to alignment. */
849
850 static bool
851 vect_verify_datarefs_alignment (loop_vec_info loop_vinfo)
852 {
853 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
854 struct data_reference *dr;
855 enum dr_alignment_support supportable_dr_alignment;
856 unsigned int i;
857
858 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
859 {
860 gimple stmt = DR_STMT (dr);
861 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
862
863 /* For interleaving, only the alignment of the first access matters. */
864 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
865 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
866 continue;
867
868 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
869 if (!supportable_dr_alignment)
870 {
871 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
872 {
873 if (DR_IS_READ (dr))
874 fprintf (vect_dump,
875 "not vectorized: unsupported unaligned load.");
876 else
877 fprintf (vect_dump,
878 "not vectorized: unsupported unaligned store.");
879 }
880 return false;
881 }
882 if (supportable_dr_alignment != dr_aligned
883 && vect_print_dump_info (REPORT_ALIGNMENT))
884 fprintf (vect_dump, "Vectorizing an unaligned access.");
885 }
886 return true;
887 }
888
889
890 /* Function vector_alignment_reachable_p
891
892 Return true if vector alignment for DR is reachable by peeling
893 a few loop iterations. Return false otherwise. */
894
895 static bool
896 vector_alignment_reachable_p (struct data_reference *dr)
897 {
898 gimple stmt = DR_STMT (dr);
899 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
900 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
901
902 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
903 {
904 /* For interleaved access we peel only if number of iterations in
905 the prolog loop ({VF - misalignment}), is a multiple of the
906 number of the interleaved accesses. */
907 int elem_size, mis_in_elements;
908 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
909
910 /* FORNOW: handle only known alignment. */
911 if (!known_alignment_for_access_p (dr))
912 return false;
913
914 elem_size = GET_MODE_SIZE (TYPE_MODE (vectype)) / nelements;
915 mis_in_elements = DR_MISALIGNMENT (dr) / elem_size;
916
917 if ((nelements - mis_in_elements) % DR_GROUP_SIZE (stmt_info))
918 return false;
919 }
920
921 /* If misalignment is known at the compile time then allow peeling
922 only if natural alignment is reachable through peeling. */
923 if (known_alignment_for_access_p (dr) && !aligned_access_p (dr))
924 {
925 HOST_WIDE_INT elmsize =
926 int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
927 if (vect_print_dump_info (REPORT_DETAILS))
928 {
929 fprintf (vect_dump, "data size =" HOST_WIDE_INT_PRINT_DEC, elmsize);
930 fprintf (vect_dump, ". misalignment = %d. ", DR_MISALIGNMENT (dr));
931 }
932 if (DR_MISALIGNMENT (dr) % elmsize)
933 {
934 if (vect_print_dump_info (REPORT_DETAILS))
935 fprintf (vect_dump, "data size does not divide the misalignment.\n");
936 return false;
937 }
938 }
939
940 if (!known_alignment_for_access_p (dr))
941 {
942 tree type = (TREE_TYPE (DR_REF (dr)));
943 tree ba = DR_BASE_OBJECT (dr);
944 bool is_packed = false;
945
946 if (ba)
947 is_packed = contains_packed_reference (ba);
948
949 if (vect_print_dump_info (REPORT_DETAILS))
950 fprintf (vect_dump, "Unknown misalignment, is_packed = %d",is_packed);
951 if (targetm.vectorize.vector_alignment_reachable (type, is_packed))
952 return true;
953 else
954 return false;
955 }
956
957 return true;
958 }
959
960 /* Function vect_enhance_data_refs_alignment
961
962 This pass will use loop versioning and loop peeling in order to enhance
963 the alignment of data references in the loop.
964
965 FOR NOW: we assume that whatever versioning/peeling takes place, only the
966 original loop is to be vectorized; Any other loops that are created by
967 the transformations performed in this pass - are not supposed to be
968 vectorized. This restriction will be relaxed.
969
970 This pass will require a cost model to guide it whether to apply peeling
971 or versioning or a combination of the two. For example, the scheme that
972 intel uses when given a loop with several memory accesses, is as follows:
973 choose one memory access ('p') which alignment you want to force by doing
974 peeling. Then, either (1) generate a loop in which 'p' is aligned and all
975 other accesses are not necessarily aligned, or (2) use loop versioning to
976 generate one loop in which all accesses are aligned, and another loop in
977 which only 'p' is necessarily aligned.
978
979 ("Automatic Intra-Register Vectorization for the Intel Architecture",
980 Aart J.C. Bik, Milind Girkar, Paul M. Grey and Ximmin Tian, International
981 Journal of Parallel Programming, Vol. 30, No. 2, April 2002.)
982
983 Devising a cost model is the most critical aspect of this work. It will
984 guide us on which access to peel for, whether to use loop versioning, how
985 many versions to create, etc. The cost model will probably consist of
986 generic considerations as well as target specific considerations (on
987 powerpc for example, misaligned stores are more painful than misaligned
988 loads).
989
990 Here are the general steps involved in alignment enhancements:
991
992 -- original loop, before alignment analysis:
993 for (i=0; i<N; i++){
994 x = q[i]; # DR_MISALIGNMENT(q) = unknown
995 p[i] = y; # DR_MISALIGNMENT(p) = unknown
996 }
997
998 -- After vect_compute_data_refs_alignment:
999 for (i=0; i<N; i++){
1000 x = q[i]; # DR_MISALIGNMENT(q) = 3
1001 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1002 }
1003
1004 -- Possibility 1: we do loop versioning:
1005 if (p is aligned) {
1006 for (i=0; i<N; i++){ # loop 1A
1007 x = q[i]; # DR_MISALIGNMENT(q) = 3
1008 p[i] = y; # DR_MISALIGNMENT(p) = 0
1009 }
1010 }
1011 else {
1012 for (i=0; i<N; i++){ # loop 1B
1013 x = q[i]; # DR_MISALIGNMENT(q) = 3
1014 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1015 }
1016 }
1017
1018 -- Possibility 2: we do loop peeling:
1019 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1020 x = q[i];
1021 p[i] = y;
1022 }
1023 for (i = 3; i < N; i++){ # loop 2A
1024 x = q[i]; # DR_MISALIGNMENT(q) = 0
1025 p[i] = y; # DR_MISALIGNMENT(p) = unknown
1026 }
1027
1028 -- Possibility 3: combination of loop peeling and versioning:
1029 for (i = 0; i < 3; i++){ # (scalar loop, not to be vectorized).
1030 x = q[i];
1031 p[i] = y;
1032 }
1033 if (p is aligned) {
1034 for (i = 3; i<N; i++){ # loop 3A
1035 x = q[i]; # DR_MISALIGNMENT(q) = 0
1036 p[i] = y; # DR_MISALIGNMENT(p) = 0
1037 }
1038 }
1039 else {
1040 for (i = 3; i<N; i++){ # loop 3B
1041 x = q[i]; # DR_MISALIGNMENT(q) = 0
1042 p[i] = y; # DR_MISALIGNMENT(p) = unaligned
1043 }
1044 }
1045
1046 These loops are later passed to loop_transform to be vectorized. The
1047 vectorizer will use the alignment information to guide the transformation
1048 (whether to generate regular loads/stores, or with special handling for
1049 misalignment). */
1050
1051 bool
1052 vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
1053 {
1054 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1055 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1056 enum dr_alignment_support supportable_dr_alignment;
1057 struct data_reference *dr0 = NULL;
1058 struct data_reference *dr;
1059 unsigned int i;
1060 bool do_peeling = false;
1061 bool do_versioning = false;
1062 bool stat;
1063 gimple stmt;
1064 stmt_vec_info stmt_info;
1065 int vect_versioning_for_alias_required;
1066
1067 if (vect_print_dump_info (REPORT_DETAILS))
1068 fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
1069
1070 /* While cost model enhancements are expected in the future, the high level
1071 view of the code at this time is as follows:
1072
1073 A) If there is a misaligned write then see if peeling to align this write
1074 can make all data references satisfy vect_supportable_dr_alignment.
1075 If so, update data structures as needed and return true. Note that
1076 at this time vect_supportable_dr_alignment is known to return false
1077 for a misaligned write.
1078
1079 B) If peeling wasn't possible and there is a data reference with an
1080 unknown misalignment that does not satisfy vect_supportable_dr_alignment
1081 then see if loop versioning checks can be used to make all data
1082 references satisfy vect_supportable_dr_alignment. If so, update
1083 data structures as needed and return true.
1084
1085 C) If neither peeling nor versioning were successful then return false if
1086 any data reference does not satisfy vect_supportable_dr_alignment.
1087
1088 D) Return true (all data references satisfy vect_supportable_dr_alignment).
1089
1090 Note, Possibility 3 above (which is peeling and versioning together) is not
1091 being done at this time. */
1092
1093 /* (1) Peeling to force alignment. */
1094
1095 /* (1.1) Decide whether to perform peeling, and how many iterations to peel:
1096 Considerations:
1097 + How many accesses will become aligned due to the peeling
1098 - How many accesses will become unaligned due to the peeling,
1099 and the cost of misaligned accesses.
1100 - The cost of peeling (the extra runtime checks, the increase
1101 in code size).
1102
1103 The scheme we use FORNOW: peel to force the alignment of the first
1104 misaligned store in the loop.
1105 Rationale: misaligned stores are not yet supported.
1106
1107 TODO: Use a cost model. */
1108
1109 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1110 {
1111 stmt = DR_STMT (dr);
1112 stmt_info = vinfo_for_stmt (stmt);
1113
1114 /* For interleaving, only the alignment of the first access
1115 matters. */
1116 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1117 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1118 continue;
1119
1120 if (!DR_IS_READ (dr) && !aligned_access_p (dr))
1121 {
1122 do_peeling = vector_alignment_reachable_p (dr);
1123 if (do_peeling)
1124 dr0 = dr;
1125 if (!do_peeling && vect_print_dump_info (REPORT_DETAILS))
1126 fprintf (vect_dump, "vector alignment may not be reachable");
1127 break;
1128 }
1129 }
1130
1131 vect_versioning_for_alias_required =
1132 (VEC_length (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)) > 0);
1133
1134 /* Temporarily, if versioning for alias is required, we disable peeling
1135 until we support peeling and versioning. Often peeling for alignment
1136 will require peeling for loop-bound, which in turn requires that we
1137 know how to adjust the loop ivs after the loop. */
1138 if (vect_versioning_for_alias_required
1139 || !vect_can_advance_ivs_p (loop_vinfo)
1140 || !slpeel_can_duplicate_loop_p (loop, single_exit (loop)))
1141 do_peeling = false;
1142
1143 if (do_peeling)
1144 {
1145 int mis;
1146 int npeel = 0;
1147 gimple stmt = DR_STMT (dr0);
1148 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1149 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
1150 int nelements = TYPE_VECTOR_SUBPARTS (vectype);
1151
1152 if (known_alignment_for_access_p (dr0))
1153 {
1154 /* Since it's known at compile time, compute the number of iterations
1155 in the peeled loop (the peeling factor) for use in updating
1156 DR_MISALIGNMENT values. The peeling factor is the vectorization
1157 factor minus the misalignment as an element count. */
1158 mis = DR_MISALIGNMENT (dr0);
1159 mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
1160 npeel = nelements - mis;
1161
1162 /* For interleaved data access every iteration accesses all the
1163 members of the group, therefore we divide the number of iterations
1164 by the group size. */
1165 stmt_info = vinfo_for_stmt (DR_STMT (dr0));
1166 if (STMT_VINFO_STRIDED_ACCESS (stmt_info))
1167 npeel /= DR_GROUP_SIZE (stmt_info);
1168
1169 if (vect_print_dump_info (REPORT_DETAILS))
1170 fprintf (vect_dump, "Try peeling by %d", npeel);
1171 }
1172
1173 /* Ensure that all data refs can be vectorized after the peel. */
1174 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1175 {
1176 int save_misalignment;
1177
1178 if (dr == dr0)
1179 continue;
1180
1181 stmt = DR_STMT (dr);
1182 stmt_info = vinfo_for_stmt (stmt);
1183 /* For interleaving, only the alignment of the first access
1184 matters. */
1185 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1186 && DR_GROUP_FIRST_DR (stmt_info) != stmt)
1187 continue;
1188
1189 save_misalignment = DR_MISALIGNMENT (dr);
1190 vect_update_misalignment_for_peel (dr, dr0, npeel);
1191 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1192 SET_DR_MISALIGNMENT (dr, save_misalignment);
1193
1194 if (!supportable_dr_alignment)
1195 {
1196 do_peeling = false;
1197 break;
1198 }
1199 }
1200
1201 if (do_peeling)
1202 {
1203 /* (1.2) Update the DR_MISALIGNMENT of each data reference DR_i.
1204 If the misalignment of DR_i is identical to that of dr0 then set
1205 DR_MISALIGNMENT (DR_i) to zero. If the misalignment of DR_i and
1206 dr0 are known at compile time then increment DR_MISALIGNMENT (DR_i)
1207 by the peeling factor times the element size of DR_i (MOD the
1208 vectorization factor times the size). Otherwise, the
1209 misalignment of DR_i must be set to unknown. */
1210 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1211 if (dr != dr0)
1212 vect_update_misalignment_for_peel (dr, dr0, npeel);
1213
1214 LOOP_VINFO_UNALIGNED_DR (loop_vinfo) = dr0;
1215 LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) = DR_MISALIGNMENT (dr0);
1216 SET_DR_MISALIGNMENT (dr0, 0);
1217 if (vect_print_dump_info (REPORT_ALIGNMENT))
1218 fprintf (vect_dump, "Alignment of access forced using peeling.");
1219
1220 if (vect_print_dump_info (REPORT_DETAILS))
1221 fprintf (vect_dump, "Peeling for alignment will be applied.");
1222
1223 stat = vect_verify_datarefs_alignment (loop_vinfo);
1224 gcc_assert (stat);
1225 return stat;
1226 }
1227 }
1228
1229
1230 /* (2) Versioning to force alignment. */
1231
1232 /* Try versioning if:
1233 1) flag_tree_vect_loop_version is TRUE
1234 2) optimize loop for speed
1235 3) there is at least one unsupported misaligned data ref with an unknown
1236 misalignment, and
1237 4) all misaligned data refs with a known misalignment are supported, and
1238 5) the number of runtime alignment checks is within reason. */
1239
1240 do_versioning =
1241 flag_tree_vect_loop_version
1242 && optimize_loop_nest_for_speed_p (loop)
1243 && (!loop->inner); /* FORNOW */
1244
1245 if (do_versioning)
1246 {
1247 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1248 {
1249 stmt = DR_STMT (dr);
1250 stmt_info = vinfo_for_stmt (stmt);
1251
1252 /* For interleaving, only the alignment of the first access
1253 matters. */
1254 if (aligned_access_p (dr)
1255 || (STMT_VINFO_STRIDED_ACCESS (stmt_info)
1256 && DR_GROUP_FIRST_DR (stmt_info) != stmt))
1257 continue;
1258
1259 supportable_dr_alignment = vect_supportable_dr_alignment (dr);
1260
1261 if (!supportable_dr_alignment)
1262 {
1263 gimple stmt;
1264 int mask;
1265 tree vectype;
1266
1267 if (known_alignment_for_access_p (dr)
1268 || VEC_length (gimple,
1269 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo))
1270 >= (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS))
1271 {
1272 do_versioning = false;
1273 break;
1274 }
1275
1276 stmt = DR_STMT (dr);
1277 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1278 gcc_assert (vectype);
1279
1280 /* The rightmost bits of an aligned address must be zeros.
1281 Construct the mask needed for this test. For example,
1282 GET_MODE_SIZE for the vector mode V4SI is 16 bytes so the
1283 mask must be 15 = 0xf. */
1284 mask = GET_MODE_SIZE (TYPE_MODE (vectype)) - 1;
1285
1286 /* FORNOW: use the same mask to test all potentially unaligned
1287 references in the loop. The vectorizer currently supports
1288 a single vector size, see the reference to
1289 GET_MODE_NUNITS (TYPE_MODE (vectype)) where the
1290 vectorization factor is computed. */
1291 gcc_assert (!LOOP_VINFO_PTR_MASK (loop_vinfo)
1292 || LOOP_VINFO_PTR_MASK (loop_vinfo) == mask);
1293 LOOP_VINFO_PTR_MASK (loop_vinfo) = mask;
1294 VEC_safe_push (gimple, heap,
1295 LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo),
1296 DR_STMT (dr));
1297 }
1298 }
1299
1300 /* Versioning requires at least one misaligned data reference. */
1301 if (VEC_length (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)) == 0)
1302 do_versioning = false;
1303 else if (!do_versioning)
1304 VEC_truncate (gimple, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo), 0);
1305 }
1306
1307 if (do_versioning)
1308 {
1309 VEC(gimple,heap) *may_misalign_stmts
1310 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
1311 gimple stmt;
1312
1313 /* It can now be assumed that the data references in the statements
1314 in LOOP_VINFO_MAY_MISALIGN_STMTS will be aligned in the version
1315 of the loop being vectorized. */
1316 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, stmt); i++)
1317 {
1318 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1319 dr = STMT_VINFO_DATA_REF (stmt_info);
1320 SET_DR_MISALIGNMENT (dr, 0);
1321 if (vect_print_dump_info (REPORT_ALIGNMENT))
1322 fprintf (vect_dump, "Alignment of access forced using versioning.");
1323 }
1324
1325 if (vect_print_dump_info (REPORT_DETAILS))
1326 fprintf (vect_dump, "Versioning for alignment will be applied.");
1327
1328 /* Peeling and versioning can't be done together at this time. */
1329 gcc_assert (! (do_peeling && do_versioning));
1330
1331 stat = vect_verify_datarefs_alignment (loop_vinfo);
1332 gcc_assert (stat);
1333 return stat;
1334 }
1335
1336 /* This point is reached if neither peeling nor versioning is being done. */
1337 gcc_assert (! (do_peeling || do_versioning));
1338
1339 stat = vect_verify_datarefs_alignment (loop_vinfo);
1340 return stat;
1341 }
1342
1343
1344 /* Function vect_analyze_data_refs_alignment
1345
1346 Analyze the alignment of the data-references in the loop.
1347 Return FALSE if a data reference is found that cannot be vectorized. */
1348
1349 bool
1350 vect_analyze_data_refs_alignment (loop_vec_info loop_vinfo)
1351 {
1352 if (vect_print_dump_info (REPORT_DETAILS))
1353 fprintf (vect_dump, "=== vect_analyze_data_refs_alignment ===");
1354
1355 if (!vect_compute_data_refs_alignment (loop_vinfo))
1356 {
1357 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1358 fprintf (vect_dump,
1359 "not vectorized: can't calculate alignment for data ref.");
1360 return false;
1361 }
1362
1363 return true;
1364 }
1365
1366
1367 /* Analyze groups of strided accesses: check that DR belongs to a group of
1368 strided accesses of legal size, step, etc. Detect gaps, single element
1369 interleaving, and other special cases. Set strided access info.
1370 Collect groups of strided stores for further use in SLP analysis. */
1371
1372 static bool
1373 vect_analyze_group_access (struct data_reference *dr)
1374 {
1375 tree step = DR_STEP (dr);
1376 tree scalar_type = TREE_TYPE (DR_REF (dr));
1377 HOST_WIDE_INT type_size = TREE_INT_CST_LOW (TYPE_SIZE_UNIT (scalar_type));
1378 gimple stmt = DR_STMT (dr);
1379 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1380 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1381 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1382 HOST_WIDE_INT stride;
1383 bool slp_impossible = false;
1384
1385 /* For interleaving, STRIDE is STEP counted in elements, i.e., the size of the
1386 interleaving group (including gaps). */
1387 stride = dr_step / type_size;
1388
1389 /* Not consecutive access is possible only if it is a part of interleaving. */
1390 if (!DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)))
1391 {
1392 /* Check if it this DR is a part of interleaving, and is a single
1393 element of the group that is accessed in the loop. */
1394
1395 /* Gaps are supported only for loads. STEP must be a multiple of the type
1396 size. The size of the group must be a power of 2. */
1397 if (DR_IS_READ (dr)
1398 && (dr_step % type_size) == 0
1399 && stride > 0
1400 && exact_log2 (stride) != -1)
1401 {
1402 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = stmt;
1403 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1404 if (vect_print_dump_info (REPORT_DR_DETAILS))
1405 {
1406 fprintf (vect_dump, "Detected single element interleaving %d ",
1407 DR_GROUP_SIZE (vinfo_for_stmt (stmt)));
1408 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1409 fprintf (vect_dump, " step ");
1410 print_generic_expr (vect_dump, step, TDF_SLIM);
1411 }
1412 return true;
1413 }
1414 if (vect_print_dump_info (REPORT_DETAILS))
1415 fprintf (vect_dump, "not consecutive access");
1416 return false;
1417 }
1418
1419 if (DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) == stmt)
1420 {
1421 /* First stmt in the interleaving chain. Check the chain. */
1422 gimple next = DR_GROUP_NEXT_DR (vinfo_for_stmt (stmt));
1423 struct data_reference *data_ref = dr;
1424 unsigned int count = 1;
1425 tree next_step;
1426 tree prev_init = DR_INIT (data_ref);
1427 gimple prev = stmt;
1428 HOST_WIDE_INT diff, count_in_bytes;
1429
1430 while (next)
1431 {
1432 /* Skip same data-refs. In case that two or more stmts share data-ref
1433 (supported only for loads), we vectorize only the first stmt, and
1434 the rest get their vectorized loads from the first one. */
1435 if (!tree_int_cst_compare (DR_INIT (data_ref),
1436 DR_INIT (STMT_VINFO_DATA_REF (
1437 vinfo_for_stmt (next)))))
1438 {
1439 if (!DR_IS_READ (data_ref))
1440 {
1441 if (vect_print_dump_info (REPORT_DETAILS))
1442 fprintf (vect_dump, "Two store stmts share the same dr.");
1443 return false;
1444 }
1445
1446 /* Check that there is no load-store dependencies for this loads
1447 to prevent a case of load-store-load to the same location. */
1448 if (DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (next))
1449 || DR_GROUP_READ_WRITE_DEPENDENCE (vinfo_for_stmt (prev)))
1450 {
1451 if (vect_print_dump_info (REPORT_DETAILS))
1452 fprintf (vect_dump,
1453 "READ_WRITE dependence in interleaving.");
1454 return false;
1455 }
1456
1457 /* For load use the same data-ref load. */
1458 DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next)) = prev;
1459
1460 prev = next;
1461 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1462 continue;
1463 }
1464 prev = next;
1465
1466 /* Check that all the accesses have the same STEP. */
1467 next_step = DR_STEP (STMT_VINFO_DATA_REF (vinfo_for_stmt (next)));
1468 if (tree_int_cst_compare (step, next_step))
1469 {
1470 if (vect_print_dump_info (REPORT_DETAILS))
1471 fprintf (vect_dump, "not consecutive access in interleaving");
1472 return false;
1473 }
1474
1475 data_ref = STMT_VINFO_DATA_REF (vinfo_for_stmt (next));
1476 /* Check that the distance between two accesses is equal to the type
1477 size. Otherwise, we have gaps. */
1478 diff = (TREE_INT_CST_LOW (DR_INIT (data_ref))
1479 - TREE_INT_CST_LOW (prev_init)) / type_size;
1480 if (diff != 1)
1481 {
1482 /* FORNOW: SLP of accesses with gaps is not supported. */
1483 slp_impossible = true;
1484 if (!DR_IS_READ (data_ref))
1485 {
1486 if (vect_print_dump_info (REPORT_DETAILS))
1487 fprintf (vect_dump, "interleaved store with gaps");
1488 return false;
1489 }
1490 }
1491
1492 /* Store the gap from the previous member of the group. If there is no
1493 gap in the access, DR_GROUP_GAP is always 1. */
1494 DR_GROUP_GAP (vinfo_for_stmt (next)) = diff;
1495
1496 prev_init = DR_INIT (data_ref);
1497 next = DR_GROUP_NEXT_DR (vinfo_for_stmt (next));
1498 /* Count the number of data-refs in the chain. */
1499 count++;
1500 }
1501
1502 /* COUNT is the number of accesses found, we multiply it by the size of
1503 the type to get COUNT_IN_BYTES. */
1504 count_in_bytes = type_size * count;
1505
1506 /* Check that the size of the interleaving is not greater than STEP. */
1507 if (dr_step < count_in_bytes)
1508 {
1509 if (vect_print_dump_info (REPORT_DETAILS))
1510 {
1511 fprintf (vect_dump, "interleaving size is greater than step for ");
1512 print_generic_expr (vect_dump, DR_REF (dr), TDF_SLIM);
1513 }
1514 return false;
1515 }
1516
1517 /* Check that the size of the interleaving is equal to STEP for stores,
1518 i.e., that there are no gaps. */
1519 if (dr_step != count_in_bytes)
1520 {
1521 if (DR_IS_READ (dr))
1522 {
1523 slp_impossible = true;
1524 /* There is a gap after the last load in the group. This gap is a
1525 difference between the stride and the number of elements. When
1526 there is no gap, this difference should be 0. */
1527 DR_GROUP_GAP (vinfo_for_stmt (stmt)) = stride - count;
1528 }
1529 else
1530 {
1531 if (vect_print_dump_info (REPORT_DETAILS))
1532 fprintf (vect_dump, "interleaved store with gaps");
1533 return false;
1534 }
1535 }
1536
1537 /* Check that STEP is a multiple of type size. */
1538 if ((dr_step % type_size) != 0)
1539 {
1540 if (vect_print_dump_info (REPORT_DETAILS))
1541 {
1542 fprintf (vect_dump, "step is not a multiple of type size: step ");
1543 print_generic_expr (vect_dump, step, TDF_SLIM);
1544 fprintf (vect_dump, " size ");
1545 print_generic_expr (vect_dump, TYPE_SIZE_UNIT (scalar_type),
1546 TDF_SLIM);
1547 }
1548 return false;
1549 }
1550
1551 /* FORNOW: we handle only interleaving that is a power of 2.
1552 We don't fail here if it may be still possible to vectorize the
1553 group using SLP. If not, the size of the group will be checked in
1554 vect_analyze_operations, and the vectorization will fail. */
1555 if (exact_log2 (stride) == -1)
1556 {
1557 if (vect_print_dump_info (REPORT_DETAILS))
1558 fprintf (vect_dump, "interleaving is not a power of 2");
1559
1560 if (slp_impossible)
1561 return false;
1562 }
1563 DR_GROUP_SIZE (vinfo_for_stmt (stmt)) = stride;
1564 if (vect_print_dump_info (REPORT_DETAILS))
1565 fprintf (vect_dump, "Detected interleaving of size %d", (int)stride);
1566
1567 /* SLP: create an SLP data structure for every interleaving group of
1568 stores for further analysis in vect_analyse_slp. */
1569 if (!DR_IS_READ (dr) && !slp_impossible)
1570 VEC_safe_push (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo), stmt);
1571 }
1572
1573 return true;
1574 }
1575
1576
1577 /* Analyze the access pattern of the data-reference DR.
1578 In case of non-consecutive accesses call vect_analyze_group_access() to
1579 analyze groups of strided accesses. */
1580
1581 static bool
1582 vect_analyze_data_ref_access (struct data_reference *dr)
1583 {
1584 tree step = DR_STEP (dr);
1585 tree scalar_type = TREE_TYPE (DR_REF (dr));
1586 gimple stmt = DR_STMT (dr);
1587 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1588 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1589 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1590 HOST_WIDE_INT dr_step = TREE_INT_CST_LOW (step);
1591
1592 if (!step)
1593 {
1594 if (vect_print_dump_info (REPORT_DETAILS))
1595 fprintf (vect_dump, "bad data-ref access");
1596 return false;
1597 }
1598
1599 /* Don't allow invariant accesses. */
1600 if (dr_step == 0)
1601 return false;
1602
1603 if (nested_in_vect_loop_p (loop, stmt))
1604 {
1605 /* Interleaved accesses are not yet supported within outer-loop
1606 vectorization for references in the inner-loop. */
1607 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1608
1609 /* For the rest of the analysis we use the outer-loop step. */
1610 step = STMT_VINFO_DR_STEP (stmt_info);
1611 dr_step = TREE_INT_CST_LOW (step);
1612
1613 if (dr_step == 0)
1614 {
1615 if (vect_print_dump_info (REPORT_ALIGNMENT))
1616 fprintf (vect_dump, "zero step in outer loop.");
1617 if (DR_IS_READ (dr))
1618 return true;
1619 else
1620 return false;
1621 }
1622 }
1623
1624 /* Consecutive? */
1625 if (!tree_int_cst_compare (step, TYPE_SIZE_UNIT (scalar_type)))
1626 {
1627 /* Mark that it is not interleaving. */
1628 DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt)) = NULL;
1629 return true;
1630 }
1631
1632 if (nested_in_vect_loop_p (loop, stmt))
1633 {
1634 if (vect_print_dump_info (REPORT_ALIGNMENT))
1635 fprintf (vect_dump, "strided access in outer loop.");
1636 return false;
1637 }
1638
1639 /* Not consecutive access - check if it's a part of interleaving group. */
1640 return vect_analyze_group_access (dr);
1641 }
1642
1643
1644 /* Function vect_analyze_data_ref_accesses.
1645
1646 Analyze the access pattern of all the data references in the loop.
1647
1648 FORNOW: the only access pattern that is considered vectorizable is a
1649 simple step 1 (consecutive) access.
1650
1651 FORNOW: handle only arrays and pointer accesses. */
1652
1653 bool
1654 vect_analyze_data_ref_accesses (loop_vec_info loop_vinfo)
1655 {
1656 unsigned int i;
1657 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1658 struct data_reference *dr;
1659
1660 if (vect_print_dump_info (REPORT_DETAILS))
1661 fprintf (vect_dump, "=== vect_analyze_data_ref_accesses ===");
1662
1663 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1664 if (!vect_analyze_data_ref_access (dr))
1665 {
1666 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1667 fprintf (vect_dump, "not vectorized: complicated access pattern.");
1668 return false;
1669 }
1670
1671 return true;
1672 }
1673
1674 /* Function vect_prune_runtime_alias_test_list.
1675
1676 Prune a list of ddrs to be tested at run-time by versioning for alias.
1677 Return FALSE if resulting list of ddrs is longer then allowed by
1678 PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS, otherwise return TRUE. */
1679
1680 bool
1681 vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
1682 {
1683 VEC (ddr_p, heap) * ddrs =
1684 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo);
1685 unsigned i, j;
1686
1687 if (vect_print_dump_info (REPORT_DETAILS))
1688 fprintf (vect_dump, "=== vect_prune_runtime_alias_test_list ===");
1689
1690 for (i = 0; i < VEC_length (ddr_p, ddrs); )
1691 {
1692 bool found;
1693 ddr_p ddr_i;
1694
1695 ddr_i = VEC_index (ddr_p, ddrs, i);
1696 found = false;
1697
1698 for (j = 0; j < i; j++)
1699 {
1700 ddr_p ddr_j = VEC_index (ddr_p, ddrs, j);
1701
1702 if (vect_vfa_range_equal (ddr_i, ddr_j))
1703 {
1704 if (vect_print_dump_info (REPORT_DR_DETAILS))
1705 {
1706 fprintf (vect_dump, "found equal ranges ");
1707 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_i)), TDF_SLIM);
1708 fprintf (vect_dump, ", ");
1709 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_i)), TDF_SLIM);
1710 fprintf (vect_dump, " and ");
1711 print_generic_expr (vect_dump, DR_REF (DDR_A (ddr_j)), TDF_SLIM);
1712 fprintf (vect_dump, ", ");
1713 print_generic_expr (vect_dump, DR_REF (DDR_B (ddr_j)), TDF_SLIM);
1714 }
1715 found = true;
1716 break;
1717 }
1718 }
1719
1720 if (found)
1721 {
1722 VEC_ordered_remove (ddr_p, ddrs, i);
1723 continue;
1724 }
1725 i++;
1726 }
1727
1728 if (VEC_length (ddr_p, ddrs) >
1729 (unsigned) PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS))
1730 {
1731 if (vect_print_dump_info (REPORT_DR_DETAILS))
1732 {
1733 fprintf (vect_dump,
1734 "disable versioning for alias - max number of generated "
1735 "checks exceeded.");
1736 }
1737
1738 VEC_truncate (ddr_p, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo), 0);
1739
1740 return false;
1741 }
1742
1743 return true;
1744 }
1745
1746
1747 /* Function vect_analyze_data_refs.
1748
1749 Find all the data references in the loop.
1750
1751 The general structure of the analysis of data refs in the vectorizer is as
1752 follows:
1753 1- vect_analyze_data_refs(loop): call compute_data_dependences_for_loop to
1754 find and analyze all data-refs in the loop and their dependences.
1755 2- vect_analyze_dependences(): apply dependence testing using ddrs.
1756 3- vect_analyze_drs_alignment(): check that ref_stmt.alignment is ok.
1757 4- vect_analyze_drs_access(): check that ref_stmt.step is ok.
1758
1759 */
1760
1761 bool
1762 vect_analyze_data_refs (loop_vec_info loop_vinfo)
1763 {
1764 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1765 unsigned int i;
1766 VEC (data_reference_p, heap) *datarefs;
1767 struct data_reference *dr;
1768 tree scalar_type;
1769
1770 if (vect_print_dump_info (REPORT_DETAILS))
1771 fprintf (vect_dump, "=== vect_analyze_data_refs ===\n");
1772
1773 compute_data_dependences_for_loop (loop, true,
1774 &LOOP_VINFO_DATAREFS (loop_vinfo),
1775 &LOOP_VINFO_DDRS (loop_vinfo));
1776
1777 /* Go through the data-refs, check that the analysis succeeded. Update pointer
1778 from stmt_vec_info struct to DR and vectype. */
1779 datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
1780
1781 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
1782 {
1783 gimple stmt;
1784 stmt_vec_info stmt_info;
1785 basic_block bb;
1786 tree base, offset, init;
1787
1788 if (!dr || !DR_REF (dr))
1789 {
1790 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1791 fprintf (vect_dump, "not vectorized: unhandled data-ref ");
1792 return false;
1793 }
1794
1795 stmt = DR_STMT (dr);
1796 stmt_info = vinfo_for_stmt (stmt);
1797
1798 /* Check that analysis of the data-ref succeeded. */
1799 if (!DR_BASE_ADDRESS (dr) || !DR_OFFSET (dr) || !DR_INIT (dr)
1800 || !DR_STEP (dr))
1801 {
1802 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1803 {
1804 fprintf (vect_dump, "not vectorized: data ref analysis failed ");
1805 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1806 }
1807 return false;
1808 }
1809
1810 if (TREE_CODE (DR_BASE_ADDRESS (dr)) == INTEGER_CST)
1811 {
1812 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1813 fprintf (vect_dump, "not vectorized: base addr of dr is a "
1814 "constant");
1815 return false;
1816 }
1817
1818 base = unshare_expr (DR_BASE_ADDRESS (dr));
1819 offset = unshare_expr (DR_OFFSET (dr));
1820 init = unshare_expr (DR_INIT (dr));
1821
1822 /* Update DR field in stmt_vec_info struct. */
1823 bb = gimple_bb (stmt);
1824
1825 /* If the dataref is in an inner-loop of the loop that is considered for
1826 for vectorization, we also want to analyze the access relative to
1827 the outer-loop (DR contains information only relative to the
1828 inner-most enclosing loop). We do that by building a reference to the
1829 first location accessed by the inner-loop, and analyze it relative to
1830 the outer-loop. */
1831 if (nested_in_vect_loop_p (loop, stmt))
1832 {
1833 tree outer_step, outer_base, outer_init;
1834 HOST_WIDE_INT pbitsize, pbitpos;
1835 tree poffset;
1836 enum machine_mode pmode;
1837 int punsignedp, pvolatilep;
1838 affine_iv base_iv, offset_iv;
1839 tree dinit;
1840
1841 /* Build a reference to the first location accessed by the
1842 inner-loop: *(BASE+INIT). (The first location is actually
1843 BASE+INIT+OFFSET, but we add OFFSET separately later). */
1844 tree inner_base = build_fold_indirect_ref
1845 (fold_build2 (POINTER_PLUS_EXPR,
1846 TREE_TYPE (base), base,
1847 fold_convert (sizetype, init)));
1848
1849 if (vect_print_dump_info (REPORT_DETAILS))
1850 {
1851 fprintf (vect_dump, "analyze in outer-loop: ");
1852 print_generic_expr (vect_dump, inner_base, TDF_SLIM);
1853 }
1854
1855 outer_base = get_inner_reference (inner_base, &pbitsize, &pbitpos,
1856 &poffset, &pmode, &punsignedp, &pvolatilep, false);
1857 gcc_assert (outer_base != NULL_TREE);
1858
1859 if (pbitpos % BITS_PER_UNIT != 0)
1860 {
1861 if (vect_print_dump_info (REPORT_DETAILS))
1862 fprintf (vect_dump, "failed: bit offset alignment.\n");
1863 return false;
1864 }
1865
1866 outer_base = build_fold_addr_expr (outer_base);
1867 if (!simple_iv (loop, loop_containing_stmt (stmt), outer_base,
1868 &base_iv, false))
1869 {
1870 if (vect_print_dump_info (REPORT_DETAILS))
1871 fprintf (vect_dump, "failed: evolution of base is not affine.\n");
1872 return false;
1873 }
1874
1875 if (offset)
1876 {
1877 if (poffset)
1878 poffset = fold_build2 (PLUS_EXPR, TREE_TYPE (offset), offset,
1879 poffset);
1880 else
1881 poffset = offset;
1882 }
1883
1884 if (!poffset)
1885 {
1886 offset_iv.base = ssize_int (0);
1887 offset_iv.step = ssize_int (0);
1888 }
1889 else if (!simple_iv (loop, loop_containing_stmt (stmt), poffset,
1890 &offset_iv, false))
1891 {
1892 if (vect_print_dump_info (REPORT_DETAILS))
1893 fprintf (vect_dump, "evolution of offset is not affine.\n");
1894 return false;
1895 }
1896
1897 outer_init = ssize_int (pbitpos / BITS_PER_UNIT);
1898 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
1899 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
1900 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
1901 outer_init = size_binop (PLUS_EXPR, outer_init, dinit);
1902
1903 outer_step = size_binop (PLUS_EXPR,
1904 fold_convert (ssizetype, base_iv.step),
1905 fold_convert (ssizetype, offset_iv.step));
1906
1907 STMT_VINFO_DR_STEP (stmt_info) = outer_step;
1908 /* FIXME: Use canonicalize_base_object_address (base_iv.base); */
1909 STMT_VINFO_DR_BASE_ADDRESS (stmt_info) = base_iv.base;
1910 STMT_VINFO_DR_INIT (stmt_info) = outer_init;
1911 STMT_VINFO_DR_OFFSET (stmt_info) =
1912 fold_convert (ssizetype, offset_iv.base);
1913 STMT_VINFO_DR_ALIGNED_TO (stmt_info) =
1914 size_int (highest_pow2_factor (offset_iv.base));
1915
1916 if (vect_print_dump_info (REPORT_DETAILS))
1917 {
1918 fprintf (vect_dump, "\touter base_address: ");
1919 print_generic_expr (vect_dump, STMT_VINFO_DR_BASE_ADDRESS (stmt_info), TDF_SLIM);
1920 fprintf (vect_dump, "\n\touter offset from base address: ");
1921 print_generic_expr (vect_dump, STMT_VINFO_DR_OFFSET (stmt_info), TDF_SLIM);
1922 fprintf (vect_dump, "\n\touter constant offset from base address: ");
1923 print_generic_expr (vect_dump, STMT_VINFO_DR_INIT (stmt_info), TDF_SLIM);
1924 fprintf (vect_dump, "\n\touter step: ");
1925 print_generic_expr (vect_dump, STMT_VINFO_DR_STEP (stmt_info), TDF_SLIM);
1926 fprintf (vect_dump, "\n\touter aligned to: ");
1927 print_generic_expr (vect_dump, STMT_VINFO_DR_ALIGNED_TO (stmt_info), TDF_SLIM);
1928 }
1929 }
1930
1931 if (STMT_VINFO_DATA_REF (stmt_info))
1932 {
1933 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1934 {
1935 fprintf (vect_dump,
1936 "not vectorized: more than one data ref in stmt: ");
1937 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1938 }
1939 return false;
1940 }
1941 STMT_VINFO_DATA_REF (stmt_info) = dr;
1942
1943 /* Set vectype for STMT. */
1944 scalar_type = TREE_TYPE (DR_REF (dr));
1945 STMT_VINFO_VECTYPE (stmt_info) =
1946 get_vectype_for_scalar_type (scalar_type);
1947 if (!STMT_VINFO_VECTYPE (stmt_info))
1948 {
1949 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
1950 {
1951 fprintf (vect_dump,
1952 "not vectorized: no vectype for stmt: ");
1953 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM);
1954 fprintf (vect_dump, " scalar_type: ");
1955 print_generic_expr (vect_dump, scalar_type, TDF_DETAILS);
1956 }
1957 return false;
1958 }
1959 }
1960
1961 return true;
1962 }
1963
1964
1965 /* Function vect_get_new_vect_var.
1966
1967 Returns a name for a new variable. The current naming scheme appends the
1968 prefix "vect_" or "vect_p" (depending on the value of VAR_KIND) to
1969 the name of vectorizer generated variables, and appends that to NAME if
1970 provided. */
1971
1972 tree
1973 vect_get_new_vect_var (tree type, enum vect_var_kind var_kind, const char *name)
1974 {
1975 const char *prefix;
1976 tree new_vect_var;
1977
1978 switch (var_kind)
1979 {
1980 case vect_simple_var:
1981 prefix = "vect_";
1982 break;
1983 case vect_scalar_var:
1984 prefix = "stmp_";
1985 break;
1986 case vect_pointer_var:
1987 prefix = "vect_p";
1988 break;
1989 default:
1990 gcc_unreachable ();
1991 }
1992
1993 if (name)
1994 {
1995 char* tmp = concat (prefix, name, NULL);
1996 new_vect_var = create_tmp_var (type, tmp);
1997 free (tmp);
1998 }
1999 else
2000 new_vect_var = create_tmp_var (type, prefix);
2001
2002 /* Mark vector typed variable as a gimple register variable. */
2003 if (TREE_CODE (type) == VECTOR_TYPE)
2004 DECL_GIMPLE_REG_P (new_vect_var) = true;
2005
2006 return new_vect_var;
2007 }
2008
2009
2010 /* Function vect_create_addr_base_for_vector_ref.
2011
2012 Create an expression that computes the address of the first memory location
2013 that will be accessed for a data reference.
2014
2015 Input:
2016 STMT: The statement containing the data reference.
2017 NEW_STMT_LIST: Must be initialized to NULL_TREE or a statement list.
2018 OFFSET: Optional. If supplied, it is be added to the initial address.
2019 LOOP: Specify relative to which loop-nest should the address be computed.
2020 For example, when the dataref is in an inner-loop nested in an
2021 outer-loop that is now being vectorized, LOOP can be either the
2022 outer-loop, or the inner-loop. The first memory location accessed
2023 by the following dataref ('in' points to short):
2024
2025 for (i=0; i<N; i++)
2026 for (j=0; j<M; j++)
2027 s += in[i+j]
2028
2029 is as follows:
2030 if LOOP=i_loop: &in (relative to i_loop)
2031 if LOOP=j_loop: &in+i*2B (relative to j_loop)
2032
2033 Output:
2034 1. Return an SSA_NAME whose value is the address of the memory location of
2035 the first vector of the data reference.
2036 2. If new_stmt_list is not NULL_TREE after return then the caller must insert
2037 these statement(s) which define the returned SSA_NAME.
2038
2039 FORNOW: We are only handling array accesses with step 1. */
2040
2041 tree
2042 vect_create_addr_base_for_vector_ref (gimple stmt,
2043 gimple_seq *new_stmt_list,
2044 tree offset,
2045 struct loop *loop)
2046 {
2047 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2048 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2049 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2050 tree data_ref_base = unshare_expr (DR_BASE_ADDRESS (dr));
2051 tree base_name;
2052 tree data_ref_base_var;
2053 tree vec_stmt;
2054 tree addr_base, addr_expr;
2055 tree dest;
2056 gimple_seq seq = NULL;
2057 tree base_offset = unshare_expr (DR_OFFSET (dr));
2058 tree init = unshare_expr (DR_INIT (dr));
2059 tree vect_ptr_type, addr_expr2;
2060 tree step = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
2061
2062 gcc_assert (loop);
2063 if (loop != containing_loop)
2064 {
2065 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2066 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2067
2068 gcc_assert (nested_in_vect_loop_p (loop, stmt));
2069
2070 data_ref_base = unshare_expr (STMT_VINFO_DR_BASE_ADDRESS (stmt_info));
2071 base_offset = unshare_expr (STMT_VINFO_DR_OFFSET (stmt_info));
2072 init = unshare_expr (STMT_VINFO_DR_INIT (stmt_info));
2073 }
2074
2075 /* Create data_ref_base */
2076 base_name = build_fold_indirect_ref (data_ref_base);
2077 data_ref_base_var = create_tmp_var (TREE_TYPE (data_ref_base), "batmp");
2078 add_referenced_var (data_ref_base_var);
2079 data_ref_base = force_gimple_operand (data_ref_base, &seq, true,
2080 data_ref_base_var);
2081 gimple_seq_add_seq (new_stmt_list, seq);
2082
2083 /* Create base_offset */
2084 base_offset = size_binop (PLUS_EXPR,
2085 fold_convert (sizetype, base_offset),
2086 fold_convert (sizetype, init));
2087 dest = create_tmp_var (sizetype, "base_off");
2088 add_referenced_var (dest);
2089 base_offset = force_gimple_operand (base_offset, &seq, true, dest);
2090 gimple_seq_add_seq (new_stmt_list, seq);
2091
2092 if (offset)
2093 {
2094 tree tmp = create_tmp_var (sizetype, "offset");
2095
2096 add_referenced_var (tmp);
2097 offset = fold_build2 (MULT_EXPR, sizetype,
2098 fold_convert (sizetype, offset), step);
2099 base_offset = fold_build2 (PLUS_EXPR, sizetype,
2100 base_offset, offset);
2101 base_offset = force_gimple_operand (base_offset, &seq, false, tmp);
2102 gimple_seq_add_seq (new_stmt_list, seq);
2103 }
2104
2105 /* base + base_offset */
2106 addr_base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (data_ref_base),
2107 data_ref_base, base_offset);
2108
2109 vect_ptr_type = build_pointer_type (STMT_VINFO_VECTYPE (stmt_info));
2110
2111 /* addr_expr = addr_base */
2112 addr_expr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2113 get_name (base_name));
2114 add_referenced_var (addr_expr);
2115 vec_stmt = fold_convert (vect_ptr_type, addr_base);
2116 addr_expr2 = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2117 get_name (base_name));
2118 add_referenced_var (addr_expr2);
2119 vec_stmt = force_gimple_operand (vec_stmt, &seq, false, addr_expr2);
2120 gimple_seq_add_seq (new_stmt_list, seq);
2121
2122 if (vect_print_dump_info (REPORT_DETAILS))
2123 {
2124 fprintf (vect_dump, "created ");
2125 print_generic_expr (vect_dump, vec_stmt, TDF_SLIM);
2126 }
2127 return vec_stmt;
2128 }
2129
2130
2131 /* Function vect_create_data_ref_ptr.
2132
2133 Create a new pointer to vector type (vp), that points to the first location
2134 accessed in the loop by STMT, along with the def-use update chain to
2135 appropriately advance the pointer through the loop iterations. Also set
2136 aliasing information for the pointer. This vector pointer is used by the
2137 callers to this function to create a memory reference expression for vector
2138 load/store access.
2139
2140 Input:
2141 1. STMT: a stmt that references memory. Expected to be of the form
2142 GIMPLE_ASSIGN <name, data-ref> or
2143 GIMPLE_ASSIGN <data-ref, name>.
2144 2. AT_LOOP: the loop where the vector memref is to be created.
2145 3. OFFSET (optional): an offset to be added to the initial address accessed
2146 by the data-ref in STMT.
2147 4. ONLY_INIT: indicate if vp is to be updated in the loop, or remain
2148 pointing to the initial address.
2149 5. TYPE: if not NULL indicates the required type of the data-ref.
2150
2151 Output:
2152 1. Declare a new ptr to vector_type, and have it point to the base of the
2153 data reference (initial addressed accessed by the data reference).
2154 For example, for vector of type V8HI, the following code is generated:
2155
2156 v8hi *vp;
2157 vp = (v8hi *)initial_address;
2158
2159 if OFFSET is not supplied:
2160 initial_address = &a[init];
2161 if OFFSET is supplied:
2162 initial_address = &a[init + OFFSET];
2163
2164 Return the initial_address in INITIAL_ADDRESS.
2165
2166 2. If ONLY_INIT is true, just return the initial pointer. Otherwise, also
2167 update the pointer in each iteration of the loop.
2168
2169 Return the increment stmt that updates the pointer in PTR_INCR.
2170
2171 3. Set INV_P to true if the access pattern of the data reference in the
2172 vectorized loop is invariant. Set it to false otherwise.
2173
2174 4. Return the pointer. */
2175
2176 tree
2177 vect_create_data_ref_ptr (gimple stmt, struct loop *at_loop,
2178 tree offset, tree *initial_address, gimple *ptr_incr,
2179 bool only_init, bool *inv_p)
2180 {
2181 tree base_name;
2182 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2183 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2184 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2185 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2186 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2187 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2188 tree vect_ptr_type;
2189 tree vect_ptr;
2190 tree new_temp;
2191 gimple vec_stmt;
2192 gimple_seq new_stmt_list = NULL;
2193 edge pe;
2194 basic_block new_bb;
2195 tree vect_ptr_init;
2196 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2197 tree vptr;
2198 gimple_stmt_iterator incr_gsi;
2199 bool insert_after;
2200 tree indx_before_incr, indx_after_incr;
2201 gimple incr;
2202 tree step;
2203
2204 /* Check the step (evolution) of the load in LOOP, and record
2205 whether it's invariant. */
2206 if (nested_in_vect_loop)
2207 step = STMT_VINFO_DR_STEP (stmt_info);
2208 else
2209 step = DR_STEP (STMT_VINFO_DATA_REF (stmt_info));
2210
2211 if (tree_int_cst_compare (step, size_zero_node) == 0)
2212 *inv_p = true;
2213 else
2214 *inv_p = false;
2215
2216 /* Create an expression for the first address accessed by this load
2217 in LOOP. */
2218 base_name = build_fold_indirect_ref (unshare_expr (DR_BASE_ADDRESS (dr)));
2219
2220 if (vect_print_dump_info (REPORT_DETAILS))
2221 {
2222 tree data_ref_base = base_name;
2223 fprintf (vect_dump, "create vector-pointer variable to type: ");
2224 print_generic_expr (vect_dump, vectype, TDF_SLIM);
2225 if (TREE_CODE (data_ref_base) == VAR_DECL)
2226 fprintf (vect_dump, " vectorizing a one dimensional array ref: ");
2227 else if (TREE_CODE (data_ref_base) == ARRAY_REF)
2228 fprintf (vect_dump, " vectorizing a multidimensional array ref: ");
2229 else if (TREE_CODE (data_ref_base) == COMPONENT_REF)
2230 fprintf (vect_dump, " vectorizing a record based array ref: ");
2231 else if (TREE_CODE (data_ref_base) == SSA_NAME)
2232 fprintf (vect_dump, " vectorizing a pointer ref: ");
2233 print_generic_expr (vect_dump, base_name, TDF_SLIM);
2234 }
2235
2236 /** (1) Create the new vector-pointer variable: **/
2237 vect_ptr_type = build_pointer_type (vectype);
2238 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2239 get_name (base_name));
2240 /* If any of the data-references in the stmt group does not conflict
2241 with the created vector data-reference use a ref-all pointer instead. */
2242 if (STMT_VINFO_DR_GROUP_SIZE (stmt_info) > 1)
2243 {
2244 gimple orig_stmt = STMT_VINFO_DR_GROUP_FIRST_DR (stmt_info);
2245 do
2246 {
2247 tree lhs = gimple_assign_lhs (orig_stmt);
2248 if (!alias_sets_conflict_p (get_deref_alias_set (vect_ptr),
2249 get_alias_set (lhs)))
2250 {
2251 vect_ptr_type = build_pointer_type_for_mode (vectype,
2252 ptr_mode, true);
2253 vect_ptr = vect_get_new_vect_var (vect_ptr_type, vect_pointer_var,
2254 get_name (base_name));
2255 break;
2256 }
2257
2258 orig_stmt = STMT_VINFO_DR_GROUP_NEXT_DR (vinfo_for_stmt (orig_stmt));
2259 }
2260 while (orig_stmt);
2261 }
2262
2263 add_referenced_var (vect_ptr);
2264
2265 /** Note: If the dataref is in an inner-loop nested in LOOP, and we are
2266 vectorizing LOOP (i.e. outer-loop vectorization), we need to create two
2267 def-use update cycles for the pointer: One relative to the outer-loop
2268 (LOOP), which is what steps (3) and (4) below do. The other is relative
2269 to the inner-loop (which is the inner-most loop containing the dataref),
2270 and this is done be step (5) below.
2271
2272 When vectorizing inner-most loops, the vectorized loop (LOOP) is also the
2273 inner-most loop, and so steps (3),(4) work the same, and step (5) is
2274 redundant. Steps (3),(4) create the following:
2275
2276 vp0 = &base_addr;
2277 LOOP: vp1 = phi(vp0,vp2)
2278 ...
2279 ...
2280 vp2 = vp1 + step
2281 goto LOOP
2282
2283 If there is an inner-loop nested in loop, then step (5) will also be
2284 applied, and an additional update in the inner-loop will be created:
2285
2286 vp0 = &base_addr;
2287 LOOP: vp1 = phi(vp0,vp2)
2288 ...
2289 inner: vp3 = phi(vp1,vp4)
2290 vp4 = vp3 + inner_step
2291 if () goto inner
2292 ...
2293 vp2 = vp1 + step
2294 if () goto LOOP */
2295
2296 /** (3) Calculate the initial address the vector-pointer, and set
2297 the vector-pointer to point to it before the loop: **/
2298
2299 /* Create: (&(base[init_val+offset]) in the loop preheader. */
2300
2301 new_temp = vect_create_addr_base_for_vector_ref (stmt, &new_stmt_list,
2302 offset, loop);
2303 pe = loop_preheader_edge (loop);
2304 if (new_stmt_list)
2305 {
2306 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmt_list);
2307 gcc_assert (!new_bb);
2308 }
2309
2310 *initial_address = new_temp;
2311
2312 /* Create: p = (vectype *) initial_base */
2313 vec_stmt = gimple_build_assign (vect_ptr,
2314 fold_convert (vect_ptr_type, new_temp));
2315 vect_ptr_init = make_ssa_name (vect_ptr, vec_stmt);
2316 gimple_assign_set_lhs (vec_stmt, vect_ptr_init);
2317 new_bb = gsi_insert_on_edge_immediate (pe, vec_stmt);
2318 gcc_assert (!new_bb);
2319
2320
2321 /** (4) Handle the updating of the vector-pointer inside the loop.
2322 This is needed when ONLY_INIT is false, and also when AT_LOOP
2323 is the inner-loop nested in LOOP (during outer-loop vectorization).
2324 **/
2325
2326 if (only_init && at_loop == loop) /* No update in loop is required. */
2327 {
2328 /* Copy the points-to information if it exists. */
2329 if (DR_PTR_INFO (dr))
2330 duplicate_ssa_name_ptr_info (vect_ptr_init, DR_PTR_INFO (dr));
2331 vptr = vect_ptr_init;
2332 }
2333 else
2334 {
2335 /* The step of the vector pointer is the Vector Size. */
2336 tree step = TYPE_SIZE_UNIT (vectype);
2337 /* One exception to the above is when the scalar step of the load in
2338 LOOP is zero. In this case the step here is also zero. */
2339 if (*inv_p)
2340 step = size_zero_node;
2341
2342 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
2343
2344 create_iv (vect_ptr_init,
2345 fold_convert (vect_ptr_type, step),
2346 vect_ptr, loop, &incr_gsi, insert_after,
2347 &indx_before_incr, &indx_after_incr);
2348 incr = gsi_stmt (incr_gsi);
2349 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
2350
2351 /* Copy the points-to information if it exists. */
2352 if (DR_PTR_INFO (dr))
2353 {
2354 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2355 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2356 }
2357 merge_alias_info (vect_ptr_init, indx_before_incr);
2358 merge_alias_info (vect_ptr_init, indx_after_incr);
2359 if (ptr_incr)
2360 *ptr_incr = incr;
2361
2362 vptr = indx_before_incr;
2363 }
2364
2365 if (!nested_in_vect_loop || only_init)
2366 return vptr;
2367
2368
2369 /** (5) Handle the updating of the vector-pointer inside the inner-loop
2370 nested in LOOP, if exists: **/
2371
2372 gcc_assert (nested_in_vect_loop);
2373 if (!only_init)
2374 {
2375 standard_iv_increment_position (containing_loop, &incr_gsi,
2376 &insert_after);
2377 create_iv (vptr, fold_convert (vect_ptr_type, DR_STEP (dr)), vect_ptr,
2378 containing_loop, &incr_gsi, insert_after, &indx_before_incr,
2379 &indx_after_incr);
2380 incr = gsi_stmt (incr_gsi);
2381 set_vinfo_for_stmt (incr, new_stmt_vec_info (incr, loop_vinfo));
2382
2383 /* Copy the points-to information if it exists. */
2384 if (DR_PTR_INFO (dr))
2385 {
2386 duplicate_ssa_name_ptr_info (indx_before_incr, DR_PTR_INFO (dr));
2387 duplicate_ssa_name_ptr_info (indx_after_incr, DR_PTR_INFO (dr));
2388 }
2389 merge_alias_info (vect_ptr_init, indx_before_incr);
2390 merge_alias_info (vect_ptr_init, indx_after_incr);
2391 if (ptr_incr)
2392 *ptr_incr = incr;
2393
2394 return indx_before_incr;
2395 }
2396 else
2397 gcc_unreachable ();
2398 }
2399
2400
2401 /* Function bump_vector_ptr
2402
2403 Increment a pointer (to a vector type) by vector-size. If requested,
2404 i.e. if PTR-INCR is given, then also connect the new increment stmt
2405 to the existing def-use update-chain of the pointer, by modifying
2406 the PTR_INCR as illustrated below:
2407
2408 The pointer def-use update-chain before this function:
2409 DATAREF_PTR = phi (p_0, p_2)
2410 ....
2411 PTR_INCR: p_2 = DATAREF_PTR + step
2412
2413 The pointer def-use update-chain after this function:
2414 DATAREF_PTR = phi (p_0, p_2)
2415 ....
2416 NEW_DATAREF_PTR = DATAREF_PTR + BUMP
2417 ....
2418 PTR_INCR: p_2 = NEW_DATAREF_PTR + step
2419
2420 Input:
2421 DATAREF_PTR - ssa_name of a pointer (to vector type) that is being updated
2422 in the loop.
2423 PTR_INCR - optional. The stmt that updates the pointer in each iteration of
2424 the loop. The increment amount across iterations is expected
2425 to be vector_size.
2426 BSI - location where the new update stmt is to be placed.
2427 STMT - the original scalar memory-access stmt that is being vectorized.
2428 BUMP - optional. The offset by which to bump the pointer. If not given,
2429 the offset is assumed to be vector_size.
2430
2431 Output: Return NEW_DATAREF_PTR as illustrated above.
2432
2433 */
2434
2435 tree
2436 bump_vector_ptr (tree dataref_ptr, gimple ptr_incr, gimple_stmt_iterator *gsi,
2437 gimple stmt, tree bump)
2438 {
2439 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2440 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2441 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2442 tree ptr_var = SSA_NAME_VAR (dataref_ptr);
2443 tree update = TYPE_SIZE_UNIT (vectype);
2444 gimple incr_stmt;
2445 ssa_op_iter iter;
2446 use_operand_p use_p;
2447 tree new_dataref_ptr;
2448
2449 if (bump)
2450 update = bump;
2451
2452 incr_stmt = gimple_build_assign_with_ops (POINTER_PLUS_EXPR, ptr_var,
2453 dataref_ptr, update);
2454 new_dataref_ptr = make_ssa_name (ptr_var, incr_stmt);
2455 gimple_assign_set_lhs (incr_stmt, new_dataref_ptr);
2456 vect_finish_stmt_generation (stmt, incr_stmt, gsi);
2457
2458 /* Copy the points-to information if it exists. */
2459 if (DR_PTR_INFO (dr))
2460 duplicate_ssa_name_ptr_info (new_dataref_ptr, DR_PTR_INFO (dr));
2461 merge_alias_info (new_dataref_ptr, dataref_ptr);
2462
2463 if (!ptr_incr)
2464 return new_dataref_ptr;
2465
2466 /* Update the vector-pointer's cross-iteration increment. */
2467 FOR_EACH_SSA_USE_OPERAND (use_p, ptr_incr, iter, SSA_OP_USE)
2468 {
2469 tree use = USE_FROM_PTR (use_p);
2470
2471 if (use == dataref_ptr)
2472 SET_USE (use_p, new_dataref_ptr);
2473 else
2474 gcc_assert (tree_int_cst_compare (use, update) == 0);
2475 }
2476
2477 return new_dataref_ptr;
2478 }
2479
2480
2481 /* Function vect_create_destination_var.
2482
2483 Create a new temporary of type VECTYPE. */
2484
2485 tree
2486 vect_create_destination_var (tree scalar_dest, tree vectype)
2487 {
2488 tree vec_dest;
2489 const char *new_name;
2490 tree type;
2491 enum vect_var_kind kind;
2492
2493 kind = vectype ? vect_simple_var : vect_scalar_var;
2494 type = vectype ? vectype : TREE_TYPE (scalar_dest);
2495
2496 gcc_assert (TREE_CODE (scalar_dest) == SSA_NAME);
2497
2498 new_name = get_name (scalar_dest);
2499 if (!new_name)
2500 new_name = "var_";
2501 vec_dest = vect_get_new_vect_var (type, kind, new_name);
2502 add_referenced_var (vec_dest);
2503
2504 return vec_dest;
2505 }
2506
2507 /* Function vect_strided_store_supported.
2508
2509 Returns TRUE is INTERLEAVE_HIGH and INTERLEAVE_LOW operations are supported,
2510 and FALSE otherwise. */
2511
2512 bool
2513 vect_strided_store_supported (tree vectype)
2514 {
2515 optab interleave_high_optab, interleave_low_optab;
2516 int mode;
2517
2518 mode = (int) TYPE_MODE (vectype);
2519
2520 /* Check that the operation is supported. */
2521 interleave_high_optab = optab_for_tree_code (VEC_INTERLEAVE_HIGH_EXPR,
2522 vectype, optab_default);
2523 interleave_low_optab = optab_for_tree_code (VEC_INTERLEAVE_LOW_EXPR,
2524 vectype, optab_default);
2525 if (!interleave_high_optab || !interleave_low_optab)
2526 {
2527 if (vect_print_dump_info (REPORT_DETAILS))
2528 fprintf (vect_dump, "no optab for interleave.");
2529 return false;
2530 }
2531
2532 if (optab_handler (interleave_high_optab, mode)->insn_code
2533 == CODE_FOR_nothing
2534 || optab_handler (interleave_low_optab, mode)->insn_code
2535 == CODE_FOR_nothing)
2536 {
2537 if (vect_print_dump_info (REPORT_DETAILS))
2538 fprintf (vect_dump, "interleave op not supported by target.");
2539 return false;
2540 }
2541
2542 return true;
2543 }
2544
2545
2546 /* Function vect_permute_store_chain.
2547
2548 Given a chain of interleaved stores in DR_CHAIN of LENGTH that must be
2549 a power of 2, generate interleave_high/low stmts to reorder the data
2550 correctly for the stores. Return the final references for stores in
2551 RESULT_CHAIN.
2552
2553 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2554 The input is 4 vectors each containing 8 elements. We assign a number to each
2555 element, the input sequence is:
2556
2557 1st vec: 0 1 2 3 4 5 6 7
2558 2nd vec: 8 9 10 11 12 13 14 15
2559 3rd vec: 16 17 18 19 20 21 22 23
2560 4th vec: 24 25 26 27 28 29 30 31
2561
2562 The output sequence should be:
2563
2564 1st vec: 0 8 16 24 1 9 17 25
2565 2nd vec: 2 10 18 26 3 11 19 27
2566 3rd vec: 4 12 20 28 5 13 21 30
2567 4th vec: 6 14 22 30 7 15 23 31
2568
2569 i.e., we interleave the contents of the four vectors in their order.
2570
2571 We use interleave_high/low instructions to create such output. The input of
2572 each interleave_high/low operation is two vectors:
2573 1st vec 2nd vec
2574 0 1 2 3 4 5 6 7
2575 the even elements of the result vector are obtained left-to-right from the
2576 high/low elements of the first vector. The odd elements of the result are
2577 obtained left-to-right from the high/low elements of the second vector.
2578 The output of interleave_high will be: 0 4 1 5
2579 and of interleave_low: 2 6 3 7
2580
2581
2582 The permutation is done in log LENGTH stages. In each stage interleave_high
2583 and interleave_low stmts are created for each pair of vectors in DR_CHAIN,
2584 where the first argument is taken from the first half of DR_CHAIN and the
2585 second argument from it's second half.
2586 In our example,
2587
2588 I1: interleave_high (1st vec, 3rd vec)
2589 I2: interleave_low (1st vec, 3rd vec)
2590 I3: interleave_high (2nd vec, 4th vec)
2591 I4: interleave_low (2nd vec, 4th vec)
2592
2593 The output for the first stage is:
2594
2595 I1: 0 16 1 17 2 18 3 19
2596 I2: 4 20 5 21 6 22 7 23
2597 I3: 8 24 9 25 10 26 11 27
2598 I4: 12 28 13 29 14 30 15 31
2599
2600 The output of the second stage, i.e. the final result is:
2601
2602 I1: 0 8 16 24 1 9 17 25
2603 I2: 2 10 18 26 3 11 19 27
2604 I3: 4 12 20 28 5 13 21 30
2605 I4: 6 14 22 30 7 15 23 31. */
2606
2607 bool
2608 vect_permute_store_chain (VEC(tree,heap) *dr_chain,
2609 unsigned int length,
2610 gimple stmt,
2611 gimple_stmt_iterator *gsi,
2612 VEC(tree,heap) **result_chain)
2613 {
2614 tree perm_dest, vect1, vect2, high, low;
2615 gimple perm_stmt;
2616 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
2617 tree scalar_dest;
2618 int i;
2619 unsigned int j;
2620 enum tree_code high_code, low_code;
2621
2622 scalar_dest = gimple_assign_lhs (stmt);
2623
2624 /* Check that the operation is supported. */
2625 if (!vect_strided_store_supported (vectype))
2626 return false;
2627
2628 *result_chain = VEC_copy (tree, heap, dr_chain);
2629
2630 for (i = 0; i < exact_log2 (length); i++)
2631 {
2632 for (j = 0; j < length/2; j++)
2633 {
2634 vect1 = VEC_index (tree, dr_chain, j);
2635 vect2 = VEC_index (tree, dr_chain, j+length/2);
2636
2637 /* Create interleaving stmt:
2638 in the case of big endian:
2639 high = interleave_high (vect1, vect2)
2640 and in the case of little endian:
2641 high = interleave_low (vect1, vect2). */
2642 perm_dest = create_tmp_var (vectype, "vect_inter_high");
2643 DECL_GIMPLE_REG_P (perm_dest) = 1;
2644 add_referenced_var (perm_dest);
2645 if (BYTES_BIG_ENDIAN)
2646 {
2647 high_code = VEC_INTERLEAVE_HIGH_EXPR;
2648 low_code = VEC_INTERLEAVE_LOW_EXPR;
2649 }
2650 else
2651 {
2652 low_code = VEC_INTERLEAVE_HIGH_EXPR;
2653 high_code = VEC_INTERLEAVE_LOW_EXPR;
2654 }
2655 perm_stmt = gimple_build_assign_with_ops (high_code, perm_dest,
2656 vect1, vect2);
2657 high = make_ssa_name (perm_dest, perm_stmt);
2658 gimple_assign_set_lhs (perm_stmt, high);
2659 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2660 VEC_replace (tree, *result_chain, 2*j, high);
2661
2662 /* Create interleaving stmt:
2663 in the case of big endian:
2664 low = interleave_low (vect1, vect2)
2665 and in the case of little endian:
2666 low = interleave_high (vect1, vect2). */
2667 perm_dest = create_tmp_var (vectype, "vect_inter_low");
2668 DECL_GIMPLE_REG_P (perm_dest) = 1;
2669 add_referenced_var (perm_dest);
2670 perm_stmt = gimple_build_assign_with_ops (low_code, perm_dest,
2671 vect1, vect2);
2672 low = make_ssa_name (perm_dest, perm_stmt);
2673 gimple_assign_set_lhs (perm_stmt, low);
2674 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
2675 VEC_replace (tree, *result_chain, 2*j+1, low);
2676 }
2677 dr_chain = VEC_copy (tree, heap, *result_chain);
2678 }
2679 return true;
2680 }
2681
2682 /* Function vect_setup_realignment
2683
2684 This function is called when vectorizing an unaligned load using
2685 the dr_explicit_realign[_optimized] scheme.
2686 This function generates the following code at the loop prolog:
2687
2688 p = initial_addr;
2689 x msq_init = *(floor(p)); # prolog load
2690 realignment_token = call target_builtin;
2691 loop:
2692 x msq = phi (msq_init, ---)
2693
2694 The stmts marked with x are generated only for the case of
2695 dr_explicit_realign_optimized.
2696
2697 The code above sets up a new (vector) pointer, pointing to the first
2698 location accessed by STMT, and a "floor-aligned" load using that pointer.
2699 It also generates code to compute the "realignment-token" (if the relevant
2700 target hook was defined), and creates a phi-node at the loop-header bb
2701 whose arguments are the result of the prolog-load (created by this
2702 function) and the result of a load that takes place in the loop (to be
2703 created by the caller to this function).
2704
2705 For the case of dr_explicit_realign_optimized:
2706 The caller to this function uses the phi-result (msq) to create the
2707 realignment code inside the loop, and sets up the missing phi argument,
2708 as follows:
2709 loop:
2710 msq = phi (msq_init, lsq)
2711 lsq = *(floor(p')); # load in loop
2712 result = realign_load (msq, lsq, realignment_token);
2713
2714 For the case of dr_explicit_realign:
2715 loop:
2716 msq = *(floor(p)); # load in loop
2717 p' = p + (VS-1);
2718 lsq = *(floor(p')); # load in loop
2719 result = realign_load (msq, lsq, realignment_token);
2720
2721 Input:
2722 STMT - (scalar) load stmt to be vectorized. This load accesses
2723 a memory location that may be unaligned.
2724 BSI - place where new code is to be inserted.
2725 ALIGNMENT_SUPPORT_SCHEME - which of the two misalignment handling schemes
2726 is used.
2727
2728 Output:
2729 REALIGNMENT_TOKEN - the result of a call to the builtin_mask_for_load
2730 target hook, if defined.
2731 Return value - the result of the loop-header phi node. */
2732
2733 tree
2734 vect_setup_realignment (gimple stmt, gimple_stmt_iterator *gsi,
2735 tree *realignment_token,
2736 enum dr_alignment_support alignment_support_scheme,
2737 tree init_addr,
2738 struct loop **at_loop)
2739 {
2740 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2741 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2742 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2743 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2744 edge pe;
2745 tree scalar_dest = gimple_assign_lhs (stmt);
2746 tree vec_dest;
2747 gimple inc;
2748 tree ptr;
2749 tree data_ref;
2750 gimple new_stmt;
2751 basic_block new_bb;
2752 tree msq_init = NULL_TREE;
2753 tree new_temp;
2754 gimple phi_stmt;
2755 tree msq = NULL_TREE;
2756 gimple_seq stmts = NULL;
2757 bool inv_p;
2758 bool compute_in_loop = false;
2759 bool nested_in_vect_loop = nested_in_vect_loop_p (loop, stmt);
2760 struct loop *containing_loop = (gimple_bb (stmt))->loop_father;
2761 struct loop *loop_for_initial_load;
2762
2763 gcc_assert (alignment_support_scheme == dr_explicit_realign
2764 || alignment_support_scheme == dr_explicit_realign_optimized);
2765
2766 /* We need to generate three things:
2767 1. the misalignment computation
2768 2. the extra vector load (for the optimized realignment scheme).
2769 3. the phi node for the two vectors from which the realignment is
2770 done (for the optimized realignment scheme).
2771 */
2772
2773 /* 1. Determine where to generate the misalignment computation.
2774
2775 If INIT_ADDR is NULL_TREE, this indicates that the misalignment
2776 calculation will be generated by this function, outside the loop (in the
2777 preheader). Otherwise, INIT_ADDR had already been computed for us by the
2778 caller, inside the loop.
2779
2780 Background: If the misalignment remains fixed throughout the iterations of
2781 the loop, then both realignment schemes are applicable, and also the
2782 misalignment computation can be done outside LOOP. This is because we are
2783 vectorizing LOOP, and so the memory accesses in LOOP advance in steps that
2784 are a multiple of VS (the Vector Size), and therefore the misalignment in
2785 different vectorized LOOP iterations is always the same.
2786 The problem arises only if the memory access is in an inner-loop nested
2787 inside LOOP, which is now being vectorized using outer-loop vectorization.
2788 This is the only case when the misalignment of the memory access may not
2789 remain fixed throughout the iterations of the inner-loop (as explained in
2790 detail in vect_supportable_dr_alignment). In this case, not only is the
2791 optimized realignment scheme not applicable, but also the misalignment
2792 computation (and generation of the realignment token that is passed to
2793 REALIGN_LOAD) have to be done inside the loop.
2794
2795 In short, INIT_ADDR indicates whether we are in a COMPUTE_IN_LOOP mode
2796 or not, which in turn determines if the misalignment is computed inside
2797 the inner-loop, or outside LOOP. */
2798
2799 if (init_addr != NULL_TREE)
2800 {
2801 compute_in_loop = true;
2802 gcc_assert (alignment_support_scheme == dr_explicit_realign);
2803 }
2804
2805
2806 /* 2. Determine where to generate the extra vector load.
2807
2808 For the optimized realignment scheme, instead of generating two vector
2809 loads in each iteration, we generate a single extra vector load in the
2810 preheader of the loop, and in each iteration reuse the result of the
2811 vector load from the previous iteration. In case the memory access is in
2812 an inner-loop nested inside LOOP, which is now being vectorized using
2813 outer-loop vectorization, we need to determine whether this initial vector
2814 load should be generated at the preheader of the inner-loop, or can be
2815 generated at the preheader of LOOP. If the memory access has no evolution
2816 in LOOP, it can be generated in the preheader of LOOP. Otherwise, it has
2817 to be generated inside LOOP (in the preheader of the inner-loop). */
2818
2819 if (nested_in_vect_loop)
2820 {
2821 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
2822 bool invariant_in_outerloop =
2823 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
2824 loop_for_initial_load = (invariant_in_outerloop ? loop : loop->inner);
2825 }
2826 else
2827 loop_for_initial_load = loop;
2828 if (at_loop)
2829 *at_loop = loop_for_initial_load;
2830
2831 /* 3. For the case of the optimized realignment, create the first vector
2832 load at the loop preheader. */
2833
2834 if (alignment_support_scheme == dr_explicit_realign_optimized)
2835 {
2836 /* Create msq_init = *(floor(p1)) in the loop preheader */
2837
2838 gcc_assert (!compute_in_loop);
2839 pe = loop_preheader_edge (loop_for_initial_load);
2840 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2841 ptr = vect_create_data_ref_ptr (stmt, loop_for_initial_load, NULL_TREE,
2842 &init_addr, &inc, true, &inv_p);
2843 data_ref = build1 (ALIGN_INDIRECT_REF, vectype, ptr);
2844 new_stmt = gimple_build_assign (vec_dest, data_ref);
2845 new_temp = make_ssa_name (vec_dest, new_stmt);
2846 gimple_assign_set_lhs (new_stmt, new_temp);
2847 mark_symbols_for_renaming (new_stmt);
2848 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2849 gcc_assert (!new_bb);
2850 msq_init = gimple_assign_lhs (new_stmt);
2851 }
2852
2853 /* 4. Create realignment token using a target builtin, if available.
2854 It is done either inside the containing loop, or before LOOP (as
2855 determined above). */
2856
2857 if (targetm.vectorize.builtin_mask_for_load)
2858 {
2859 tree builtin_decl;
2860
2861 /* Compute INIT_ADDR - the initial addressed accessed by this memref. */
2862 if (compute_in_loop)
2863 gcc_assert (init_addr); /* already computed by the caller. */
2864 else
2865 {
2866 /* Generate the INIT_ADDR computation outside LOOP. */
2867 init_addr = vect_create_addr_base_for_vector_ref (stmt, &stmts,
2868 NULL_TREE, loop);
2869 pe = loop_preheader_edge (loop);
2870 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
2871 gcc_assert (!new_bb);
2872 }
2873
2874 builtin_decl = targetm.vectorize.builtin_mask_for_load ();
2875 new_stmt = gimple_build_call (builtin_decl, 1, init_addr);
2876 vec_dest =
2877 vect_create_destination_var (scalar_dest,
2878 gimple_call_return_type (new_stmt));
2879 new_temp = make_ssa_name (vec_dest, new_stmt);
2880 gimple_call_set_lhs (new_stmt, new_temp);
2881
2882 if (compute_in_loop)
2883 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
2884 else
2885 {
2886 /* Generate the misalignment computation outside LOOP. */
2887 pe = loop_preheader_edge (loop);
2888 new_bb = gsi_insert_on_edge_immediate (pe, new_stmt);
2889 gcc_assert (!new_bb);
2890 }
2891
2892 *realignment_token = gimple_call_lhs (new_stmt);
2893
2894 /* The result of the CALL_EXPR to this builtin is determined from
2895 the value of the parameter and no global variables are touched
2896 which makes the builtin a "const" function. Requiring the
2897 builtin to have the "const" attribute makes it unnecessary
2898 to call mark_call_clobbered. */
2899 gcc_assert (TREE_READONLY (builtin_decl));
2900 }
2901
2902 if (alignment_support_scheme == dr_explicit_realign)
2903 return msq;
2904
2905 gcc_assert (!compute_in_loop);
2906 gcc_assert (alignment_support_scheme == dr_explicit_realign_optimized);
2907
2908
2909 /* 5. Create msq = phi <msq_init, lsq> in loop */
2910
2911 pe = loop_preheader_edge (containing_loop);
2912 vec_dest = vect_create_destination_var (scalar_dest, vectype);
2913 msq = make_ssa_name (vec_dest, NULL);
2914 phi_stmt = create_phi_node (msq, containing_loop->header);
2915 SSA_NAME_DEF_STMT (msq) = phi_stmt;
2916 add_phi_arg (phi_stmt, msq_init, pe);
2917
2918 return msq;
2919 }
2920
2921
2922 /* Function vect_strided_load_supported.
2923
2924 Returns TRUE is EXTRACT_EVEN and EXTRACT_ODD operations are supported,
2925 and FALSE otherwise. */
2926
2927 bool
2928 vect_strided_load_supported (tree vectype)
2929 {
2930 optab perm_even_optab, perm_odd_optab;
2931 int mode;
2932
2933 mode = (int) TYPE_MODE (vectype);
2934
2935 perm_even_optab = optab_for_tree_code (VEC_EXTRACT_EVEN_EXPR, vectype,
2936 optab_default);
2937 if (!perm_even_optab)
2938 {
2939 if (vect_print_dump_info (REPORT_DETAILS))
2940 fprintf (vect_dump, "no optab for perm_even.");
2941 return false;
2942 }
2943
2944 if (optab_handler (perm_even_optab, mode)->insn_code == CODE_FOR_nothing)
2945 {
2946 if (vect_print_dump_info (REPORT_DETAILS))
2947 fprintf (vect_dump, "perm_even op not supported by target.");
2948 return false;
2949 }
2950
2951 perm_odd_optab = optab_for_tree_code (VEC_EXTRACT_ODD_EXPR, vectype,
2952 optab_default);
2953 if (!perm_odd_optab)
2954 {
2955 if (vect_print_dump_info (REPORT_DETAILS))
2956 fprintf (vect_dump, "no optab for perm_odd.");
2957 return false;
2958 }
2959
2960 if (optab_handler (perm_odd_optab, mode)->insn_code == CODE_FOR_nothing)
2961 {
2962 if (vect_print_dump_info (REPORT_DETAILS))
2963 fprintf (vect_dump, "perm_odd op not supported by target.");
2964 return false;
2965 }
2966 return true;
2967 }
2968
2969
2970 /* Function vect_permute_load_chain.
2971
2972 Given a chain of interleaved loads in DR_CHAIN of LENGTH that must be
2973 a power of 2, generate extract_even/odd stmts to reorder the input data
2974 correctly. Return the final references for loads in RESULT_CHAIN.
2975
2976 E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
2977 The input is 4 vectors each containing 8 elements. We assign a number to each
2978 element, the input sequence is:
2979
2980 1st vec: 0 1 2 3 4 5 6 7
2981 2nd vec: 8 9 10 11 12 13 14 15
2982 3rd vec: 16 17 18 19 20 21 22 23
2983 4th vec: 24 25 26 27 28 29 30 31
2984
2985 The output sequence should be:
2986
2987 1st vec: 0 4 8 12 16 20 24 28
2988 2nd vec: 1 5 9 13 17 21 25 29
2989 3rd vec: 2 6 10 14 18 22 26 30
2990 4th vec: 3 7 11 15 19 23 27 31
2991
2992 i.e., the first output vector should contain the first elements of each
2993 interleaving group, etc.
2994
2995 We use extract_even/odd instructions to create such output. The input of each
2996 extract_even/odd operation is two vectors
2997 1st vec 2nd vec
2998 0 1 2 3 4 5 6 7
2999
3000 and the output is the vector of extracted even/odd elements. The output of
3001 extract_even will be: 0 2 4 6
3002 and of extract_odd: 1 3 5 7
3003
3004
3005 The permutation is done in log LENGTH stages. In each stage extract_even and
3006 extract_odd stmts are created for each pair of vectors in DR_CHAIN in their
3007 order. In our example,
3008
3009 E1: extract_even (1st vec, 2nd vec)
3010 E2: extract_odd (1st vec, 2nd vec)
3011 E3: extract_even (3rd vec, 4th vec)
3012 E4: extract_odd (3rd vec, 4th vec)
3013
3014 The output for the first stage will be:
3015
3016 E1: 0 2 4 6 8 10 12 14
3017 E2: 1 3 5 7 9 11 13 15
3018 E3: 16 18 20 22 24 26 28 30
3019 E4: 17 19 21 23 25 27 29 31
3020
3021 In order to proceed and create the correct sequence for the next stage (or
3022 for the correct output, if the second stage is the last one, as in our
3023 example), we first put the output of extract_even operation and then the
3024 output of extract_odd in RESULT_CHAIN (which is then copied to DR_CHAIN).
3025 The input for the second stage is:
3026
3027 1st vec (E1): 0 2 4 6 8 10 12 14
3028 2nd vec (E3): 16 18 20 22 24 26 28 30
3029 3rd vec (E2): 1 3 5 7 9 11 13 15
3030 4th vec (E4): 17 19 21 23 25 27 29 31
3031
3032 The output of the second stage:
3033
3034 E1: 0 4 8 12 16 20 24 28
3035 E2: 2 6 10 14 18 22 26 30
3036 E3: 1 5 9 13 17 21 25 29
3037 E4: 3 7 11 15 19 23 27 31
3038
3039 And RESULT_CHAIN after reordering:
3040
3041 1st vec (E1): 0 4 8 12 16 20 24 28
3042 2nd vec (E3): 1 5 9 13 17 21 25 29
3043 3rd vec (E2): 2 6 10 14 18 22 26 30
3044 4th vec (E4): 3 7 11 15 19 23 27 31. */
3045
3046 bool
3047 vect_permute_load_chain (VEC(tree,heap) *dr_chain,
3048 unsigned int length,
3049 gimple stmt,
3050 gimple_stmt_iterator *gsi,
3051 VEC(tree,heap) **result_chain)
3052 {
3053 tree perm_dest, data_ref, first_vect, second_vect;
3054 gimple perm_stmt;
3055 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
3056 int i;
3057 unsigned int j;
3058
3059 /* Check that the operation is supported. */
3060 if (!vect_strided_load_supported (vectype))
3061 return false;
3062
3063 *result_chain = VEC_copy (tree, heap, dr_chain);
3064 for (i = 0; i < exact_log2 (length); i++)
3065 {
3066 for (j = 0; j < length; j +=2)
3067 {
3068 first_vect = VEC_index (tree, dr_chain, j);
3069 second_vect = VEC_index (tree, dr_chain, j+1);
3070
3071 /* data_ref = permute_even (first_data_ref, second_data_ref); */
3072 perm_dest = create_tmp_var (vectype, "vect_perm_even");
3073 DECL_GIMPLE_REG_P (perm_dest) = 1;
3074 add_referenced_var (perm_dest);
3075
3076 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_EVEN_EXPR,
3077 perm_dest, first_vect,
3078 second_vect);
3079
3080 data_ref = make_ssa_name (perm_dest, perm_stmt);
3081 gimple_assign_set_lhs (perm_stmt, data_ref);
3082 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3083 mark_symbols_for_renaming (perm_stmt);
3084
3085 VEC_replace (tree, *result_chain, j/2, data_ref);
3086
3087 /* data_ref = permute_odd (first_data_ref, second_data_ref); */
3088 perm_dest = create_tmp_var (vectype, "vect_perm_odd");
3089 DECL_GIMPLE_REG_P (perm_dest) = 1;
3090 add_referenced_var (perm_dest);
3091
3092 perm_stmt = gimple_build_assign_with_ops (VEC_EXTRACT_ODD_EXPR,
3093 perm_dest, first_vect,
3094 second_vect);
3095 data_ref = make_ssa_name (perm_dest, perm_stmt);
3096 gimple_assign_set_lhs (perm_stmt, data_ref);
3097 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3098 mark_symbols_for_renaming (perm_stmt);
3099
3100 VEC_replace (tree, *result_chain, j/2+length/2, data_ref);
3101 }
3102 dr_chain = VEC_copy (tree, heap, *result_chain);
3103 }
3104 return true;
3105 }
3106
3107
3108 /* Function vect_transform_strided_load.
3109
3110 Given a chain of input interleaved data-refs (in DR_CHAIN), build statements
3111 to perform their permutation and ascribe the result vectorized statements to
3112 the scalar statements.
3113 */
3114
3115 bool
3116 vect_transform_strided_load (gimple stmt, VEC(tree,heap) *dr_chain, int size,
3117 gimple_stmt_iterator *gsi)
3118 {
3119 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3120 gimple first_stmt = DR_GROUP_FIRST_DR (stmt_info);
3121 gimple next_stmt, new_stmt;
3122 VEC(tree,heap) *result_chain = NULL;
3123 unsigned int i, gap_count;
3124 tree tmp_data_ref;
3125
3126 /* DR_CHAIN contains input data-refs that are a part of the interleaving.
3127 RESULT_CHAIN is the output of vect_permute_load_chain, it contains permuted
3128 vectors, that are ready for vector computation. */
3129 result_chain = VEC_alloc (tree, heap, size);
3130 /* Permute. */
3131 if (!vect_permute_load_chain (dr_chain, size, stmt, gsi, &result_chain))
3132 return false;
3133
3134 /* Put a permuted data-ref in the VECTORIZED_STMT field.
3135 Since we scan the chain starting from it's first node, their order
3136 corresponds the order of data-refs in RESULT_CHAIN. */
3137 next_stmt = first_stmt;
3138 gap_count = 1;
3139 for (i = 0; VEC_iterate (tree, result_chain, i, tmp_data_ref); i++)
3140 {
3141 if (!next_stmt)
3142 break;
3143
3144 /* Skip the gaps. Loads created for the gaps will be removed by dead
3145 code elimination pass later. No need to check for the first stmt in
3146 the group, since it always exists.
3147 DR_GROUP_GAP is the number of steps in elements from the previous
3148 access (if there is no gap DR_GROUP_GAP is 1). We skip loads that
3149 correspond to the gaps.
3150 */
3151 if (next_stmt != first_stmt
3152 && gap_count < DR_GROUP_GAP (vinfo_for_stmt (next_stmt)))
3153 {
3154 gap_count++;
3155 continue;
3156 }
3157
3158 while (next_stmt)
3159 {
3160 new_stmt = SSA_NAME_DEF_STMT (tmp_data_ref);
3161 /* We assume that if VEC_STMT is not NULL, this is a case of multiple
3162 copies, and we put the new vector statement in the first available
3163 RELATED_STMT. */
3164 if (!STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)))
3165 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt)) = new_stmt;
3166 else
3167 {
3168 if (!DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3169 {
3170 gimple prev_stmt =
3171 STMT_VINFO_VEC_STMT (vinfo_for_stmt (next_stmt));
3172 gimple rel_stmt =
3173 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt));
3174 while (rel_stmt)
3175 {
3176 prev_stmt = rel_stmt;
3177 rel_stmt =
3178 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (rel_stmt));
3179 }
3180
3181 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (prev_stmt)) =
3182 new_stmt;
3183 }
3184 }
3185
3186 next_stmt = DR_GROUP_NEXT_DR (vinfo_for_stmt (next_stmt));
3187 gap_count = 1;
3188 /* If NEXT_STMT accesses the same DR as the previous statement,
3189 put the same TMP_DATA_REF as its vectorized statement; otherwise
3190 get the next data-ref from RESULT_CHAIN. */
3191 if (!next_stmt || !DR_GROUP_SAME_DR_STMT (vinfo_for_stmt (next_stmt)))
3192 break;
3193 }
3194 }
3195
3196 VEC_free (tree, heap, result_chain);
3197 return true;
3198 }
3199
3200 /* Function vect_force_dr_alignment_p.
3201
3202 Returns whether the alignment of a DECL can be forced to be aligned
3203 on ALIGNMENT bit boundary. */
3204
3205 bool
3206 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment)
3207 {
3208 if (TREE_CODE (decl) != VAR_DECL)
3209 return false;
3210
3211 if (DECL_EXTERNAL (decl))
3212 return false;
3213
3214 if (TREE_ASM_WRITTEN (decl))
3215 return false;
3216
3217 if (TREE_STATIC (decl))
3218 return (alignment <= MAX_OFILE_ALIGNMENT);
3219 else
3220 return (alignment <= MAX_STACK_ALIGNMENT);
3221 }
3222
3223 /* Function vect_supportable_dr_alignment
3224
3225 Return whether the data reference DR is supported with respect to its
3226 alignment. */
3227
3228 enum dr_alignment_support
3229 vect_supportable_dr_alignment (struct data_reference *dr)
3230 {
3231 gimple stmt = DR_STMT (dr);
3232 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3233 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3234 enum machine_mode mode = TYPE_MODE (vectype);
3235 struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info));
3236 bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt);
3237 bool invariant_in_outerloop = false;
3238
3239 if (aligned_access_p (dr))
3240 return dr_aligned;
3241
3242 if (nested_in_vect_loop)
3243 {
3244 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info);
3245 invariant_in_outerloop =
3246 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0);
3247 }
3248
3249 /* Possibly unaligned access. */
3250
3251 /* We can choose between using the implicit realignment scheme (generating
3252 a misaligned_move stmt) and the explicit realignment scheme (generating
3253 aligned loads with a REALIGN_LOAD). There are two variants to the explicit
3254 realignment scheme: optimized, and unoptimized.
3255 We can optimize the realignment only if the step between consecutive
3256 vector loads is equal to the vector size. Since the vector memory
3257 accesses advance in steps of VS (Vector Size) in the vectorized loop, it
3258 is guaranteed that the misalignment amount remains the same throughout the
3259 execution of the vectorized loop. Therefore, we can create the
3260 "realignment token" (the permutation mask that is passed to REALIGN_LOAD)
3261 at the loop preheader.
3262
3263 However, in the case of outer-loop vectorization, when vectorizing a
3264 memory access in the inner-loop nested within the LOOP that is now being
3265 vectorized, while it is guaranteed that the misalignment of the
3266 vectorized memory access will remain the same in different outer-loop
3267 iterations, it is *not* guaranteed that is will remain the same throughout
3268 the execution of the inner-loop. This is because the inner-loop advances
3269 with the original scalar step (and not in steps of VS). If the inner-loop
3270 step happens to be a multiple of VS, then the misalignment remains fixed
3271 and we can use the optimized realignment scheme. For example:
3272
3273 for (i=0; i<N; i++)
3274 for (j=0; j<M; j++)
3275 s += a[i+j];
3276
3277 When vectorizing the i-loop in the above example, the step between
3278 consecutive vector loads is 1, and so the misalignment does not remain
3279 fixed across the execution of the inner-loop, and the realignment cannot
3280 be optimized (as illustrated in the following pseudo vectorized loop):
3281
3282 for (i=0; i<N; i+=4)
3283 for (j=0; j<M; j++){
3284 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...}
3285 // when j is {0,1,2,3,4,5,6,7,...} respectively.
3286 // (assuming that we start from an aligned address).
3287 }
3288
3289 We therefore have to use the unoptimized realignment scheme:
3290
3291 for (i=0; i<N; i+=4)
3292 for (j=k; j<M; j+=4)
3293 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming
3294 // that the misalignment of the initial address is
3295 // 0).
3296
3297 The loop can then be vectorized as follows:
3298
3299 for (k=0; k<4; k++){
3300 rt = get_realignment_token (&vp[k]);
3301 for (i=0; i<N; i+=4){
3302 v1 = vp[i+k];
3303 for (j=k; j<M; j+=4){
3304 v2 = vp[i+j+VS-1];
3305 va = REALIGN_LOAD <v1,v2,rt>;
3306 vs += va;
3307 v1 = v2;
3308 }
3309 }
3310 } */
3311
3312 if (DR_IS_READ (dr))
3313 {
3314 if (optab_handler (vec_realign_load_optab, mode)->insn_code !=
3315 CODE_FOR_nothing
3316 && (!targetm.vectorize.builtin_mask_for_load
3317 || targetm.vectorize.builtin_mask_for_load ()))
3318 {
3319 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3320 if (nested_in_vect_loop
3321 && (TREE_INT_CST_LOW (DR_STEP (dr))
3322 != GET_MODE_SIZE (TYPE_MODE (vectype))))
3323 return dr_explicit_realign;
3324 else
3325 return dr_explicit_realign_optimized;
3326 }
3327
3328 if (optab_handler (movmisalign_optab, mode)->insn_code !=
3329 CODE_FOR_nothing)
3330 /* Can't software pipeline the loads, but can at least do them. */
3331 return dr_unaligned_supported;
3332 }
3333
3334 /* Unsupported. */
3335 return dr_unaligned_unsupported;
3336 }