]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-vect-slp.c
re PR fortran/91003 (ICE when compiling LAPACK (CGEGV) with optimization)
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2019 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "backend.h"
26 #include "target.h"
27 #include "rtl.h"
28 #include "tree.h"
29 #include "gimple.h"
30 #include "tree-pass.h"
31 #include "ssa.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
37 #include "gimple-iterator.h"
38 #include "cfgloop.h"
39 #include "tree-vectorizer.h"
40 #include "langhooks.h"
41 #include "gimple-walk.h"
42 #include "dbgcnt.h"
43 #include "tree-vector-builder.h"
44 #include "vec-perm-indices.h"
45 #include "gimple-fold.h"
46 #include "internal-fn.h"
47
48
49 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
50 FINAL_P is true if we have vectorized the instance or if we have
51 made a final decision not to vectorize the statements in any way. */
52
53 static void
54 vect_free_slp_tree (slp_tree node, bool final_p)
55 {
56 int i;
57 slp_tree child;
58
59 if (--node->refcnt != 0)
60 return;
61
62 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
63 vect_free_slp_tree (child, final_p);
64
65 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
66 Some statements might no longer exist, after having been
67 removed by vect_transform_stmt. Updating the remaining
68 statements would be redundant. */
69 if (!final_p)
70 {
71 stmt_vec_info stmt_info;
72 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
73 {
74 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
75 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
76 }
77 }
78
79 SLP_TREE_CHILDREN (node).release ();
80 SLP_TREE_SCALAR_STMTS (node).release ();
81 SLP_TREE_SCALAR_OPS (node).release ();
82 SLP_TREE_VEC_STMTS (node).release ();
83 SLP_TREE_LOAD_PERMUTATION (node).release ();
84
85 free (node);
86 }
87
88 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
89 have vectorized the instance or if we have made a final decision not
90 to vectorize the statements in any way. */
91
92 void
93 vect_free_slp_instance (slp_instance instance, bool final_p)
94 {
95 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
96 SLP_INSTANCE_LOADS (instance).release ();
97 free (instance);
98 }
99
100
101 /* Create an SLP node for SCALAR_STMTS. */
102
103 static slp_tree
104 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
105 {
106 slp_tree node;
107 stmt_vec_info stmt_info = scalar_stmts[0];
108 unsigned int nops;
109
110 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
111 nops = gimple_call_num_args (stmt);
112 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
113 {
114 nops = gimple_num_ops (stmt) - 1;
115 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
116 nops++;
117 }
118 else if (is_a <gphi *> (stmt_info->stmt))
119 nops = 0;
120 else
121 return NULL;
122
123 node = XNEW (struct _slp_tree);
124 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
125 SLP_TREE_SCALAR_OPS (node) = vNULL;
126 SLP_TREE_VEC_STMTS (node).create (0);
127 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
128 SLP_TREE_CHILDREN (node).create (nops);
129 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
130 SLP_TREE_TWO_OPERATORS (node) = false;
131 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
132 node->refcnt = 1;
133 node->max_nunits = 1;
134
135 unsigned i;
136 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
137 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
138
139 return node;
140 }
141
142 /* Create an SLP node for OPS. */
143
144 static slp_tree
145 vect_create_new_slp_node (vec<tree> ops)
146 {
147 slp_tree node;
148
149 node = XNEW (struct _slp_tree);
150 SLP_TREE_SCALAR_STMTS (node) = vNULL;
151 SLP_TREE_SCALAR_OPS (node) = ops;
152 SLP_TREE_VEC_STMTS (node).create (0);
153 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
154 SLP_TREE_CHILDREN (node) = vNULL;
155 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
156 SLP_TREE_TWO_OPERATORS (node) = false;
157 SLP_TREE_DEF_TYPE (node) = vect_external_def;
158 node->refcnt = 1;
159 node->max_nunits = 1;
160
161 return node;
162 }
163
164
165 /* This structure is used in creation of an SLP tree. Each instance
166 corresponds to the same operand in a group of scalar stmts in an SLP
167 node. */
168 typedef struct _slp_oprnd_info
169 {
170 /* Def-stmts for the operands. */
171 vec<stmt_vec_info> def_stmts;
172 /* Operands. */
173 vec<tree> ops;
174 /* Information about the first statement, its vector def-type, type, the
175 operand itself in case it's constant, and an indication if it's a pattern
176 stmt. */
177 tree first_op_type;
178 enum vect_def_type first_dt;
179 bool any_pattern;
180 } *slp_oprnd_info;
181
182
183 /* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
184 operand. */
185 static vec<slp_oprnd_info>
186 vect_create_oprnd_info (int nops, int group_size)
187 {
188 int i;
189 slp_oprnd_info oprnd_info;
190 vec<slp_oprnd_info> oprnds_info;
191
192 oprnds_info.create (nops);
193 for (i = 0; i < nops; i++)
194 {
195 oprnd_info = XNEW (struct _slp_oprnd_info);
196 oprnd_info->def_stmts.create (group_size);
197 oprnd_info->ops.create (group_size);
198 oprnd_info->first_dt = vect_uninitialized_def;
199 oprnd_info->first_op_type = NULL_TREE;
200 oprnd_info->any_pattern = false;
201 oprnds_info.quick_push (oprnd_info);
202 }
203
204 return oprnds_info;
205 }
206
207
208 /* Free operands info. */
209
210 static void
211 vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
212 {
213 int i;
214 slp_oprnd_info oprnd_info;
215
216 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
217 {
218 oprnd_info->def_stmts.release ();
219 oprnd_info->ops.release ();
220 XDELETE (oprnd_info);
221 }
222
223 oprnds_info.release ();
224 }
225
226
227 /* Return true if STMTS contains a pattern statement. */
228
229 static bool
230 vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
231 {
232 stmt_vec_info stmt_info;
233 unsigned int i;
234 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
235 if (is_pattern_stmt_p (stmt_info))
236 return true;
237 return false;
238 }
239
240 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
241 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
242 of the chain. */
243
244 int
245 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
246 stmt_vec_info first_stmt_info)
247 {
248 stmt_vec_info next_stmt_info = first_stmt_info;
249 int result = 0;
250
251 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
252 return -1;
253
254 do
255 {
256 if (next_stmt_info == stmt_info)
257 return result;
258 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
259 if (next_stmt_info)
260 result += DR_GROUP_GAP (next_stmt_info);
261 }
262 while (next_stmt_info);
263
264 return -1;
265 }
266
267 /* Check whether it is possible to load COUNT elements of type ELT_TYPE
268 using the method implemented by duplicate_and_interleave. Return true
269 if so, returning the number of intermediate vectors in *NVECTORS_OUT
270 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
271 (if nonnull). */
272
273 bool
274 can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
275 tree elt_type, unsigned int *nvectors_out,
276 tree *vector_type_out,
277 tree *permutes)
278 {
279 tree base_vector_type = get_vectype_for_scalar_type (vinfo, elt_type, count);
280 if (!base_vector_type || !VECTOR_MODE_P (TYPE_MODE (base_vector_type)))
281 return false;
282
283 machine_mode base_vector_mode = TYPE_MODE (base_vector_type);
284 poly_int64 elt_bytes = count * GET_MODE_UNIT_SIZE (base_vector_mode);
285 unsigned int nvectors = 1;
286 for (;;)
287 {
288 scalar_int_mode int_mode;
289 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
290 if (int_mode_for_size (elt_bits, 1).exists (&int_mode))
291 {
292 /* Get the natural vector type for this SLP group size. */
293 tree int_type = build_nonstandard_integer_type
294 (GET_MODE_BITSIZE (int_mode), 1);
295 tree vector_type
296 = get_vectype_for_scalar_type (vinfo, int_type, count);
297 if (vector_type
298 && VECTOR_MODE_P (TYPE_MODE (vector_type))
299 && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
300 GET_MODE_SIZE (base_vector_mode)))
301 {
302 /* Try fusing consecutive sequences of COUNT / NVECTORS elements
303 together into elements of type INT_TYPE and using the result
304 to build NVECTORS vectors. */
305 poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
306 vec_perm_builder sel1 (nelts, 2, 3);
307 vec_perm_builder sel2 (nelts, 2, 3);
308 poly_int64 half_nelts = exact_div (nelts, 2);
309 for (unsigned int i = 0; i < 3; ++i)
310 {
311 sel1.quick_push (i);
312 sel1.quick_push (i + nelts);
313 sel2.quick_push (half_nelts + i);
314 sel2.quick_push (half_nelts + i + nelts);
315 }
316 vec_perm_indices indices1 (sel1, 2, nelts);
317 vec_perm_indices indices2 (sel2, 2, nelts);
318 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
319 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
320 {
321 if (nvectors_out)
322 *nvectors_out = nvectors;
323 if (vector_type_out)
324 *vector_type_out = vector_type;
325 if (permutes)
326 {
327 permutes[0] = vect_gen_perm_mask_checked (vector_type,
328 indices1);
329 permutes[1] = vect_gen_perm_mask_checked (vector_type,
330 indices2);
331 }
332 return true;
333 }
334 }
335 }
336 if (!multiple_p (elt_bytes, 2, &elt_bytes))
337 return false;
338 nvectors *= 2;
339 }
340 }
341
342 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
343 they are of a valid type and that they match the defs of the first stmt of
344 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
345 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
346 indicates swap is required for cond_expr stmts. Specifically, *SWAP
347 is 1 if STMT is cond and operands of comparison need to be swapped;
348 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
349 If there is any operand swap in this function, *SWAP is set to non-zero
350 value.
351 If there was a fatal error return -1; if the error could be corrected by
352 swapping operands of father node of this one, return 1; if everything is
353 ok return 0. */
354 static int
355 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
356 vec<stmt_vec_info> stmts, unsigned stmt_num,
357 vec<slp_oprnd_info> *oprnds_info)
358 {
359 stmt_vec_info stmt_info = stmts[stmt_num];
360 tree oprnd;
361 unsigned int i, number_of_oprnds;
362 enum vect_def_type dt = vect_uninitialized_def;
363 slp_oprnd_info oprnd_info;
364 int first_op_idx = 1;
365 unsigned int commutative_op = -1U;
366 bool first_op_cond = false;
367 bool first = stmt_num == 0;
368
369 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
370 {
371 number_of_oprnds = gimple_call_num_args (stmt);
372 first_op_idx = 3;
373 if (gimple_call_internal_p (stmt))
374 {
375 internal_fn ifn = gimple_call_internal_fn (stmt);
376 commutative_op = first_commutative_argument (ifn);
377
378 /* Masked load, only look at mask. */
379 if (ifn == IFN_MASK_LOAD)
380 {
381 number_of_oprnds = 1;
382 /* Mask operand index. */
383 first_op_idx = 5;
384 }
385 }
386 }
387 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
388 {
389 enum tree_code code = gimple_assign_rhs_code (stmt);
390 number_of_oprnds = gimple_num_ops (stmt) - 1;
391 /* Swap can only be done for cond_expr if asked to, otherwise we
392 could result in different comparison code to the first stmt. */
393 if (code == COND_EXPR
394 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
395 {
396 first_op_cond = true;
397 number_of_oprnds++;
398 }
399 else
400 commutative_op = commutative_tree_code (code) ? 0U : -1U;
401 }
402 else
403 return -1;
404
405 bool swapped = (*swap != 0);
406 gcc_assert (!swapped || first_op_cond);
407 for (i = 0; i < number_of_oprnds; i++)
408 {
409 again:
410 if (first_op_cond)
411 {
412 /* Map indicating how operands of cond_expr should be swapped. */
413 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
414 int *map = maps[*swap];
415
416 if (i < 2)
417 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
418 first_op_idx), map[i]);
419 else
420 oprnd = gimple_op (stmt_info->stmt, map[i]);
421 }
422 else
423 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
424 if (TREE_CODE (oprnd) == VIEW_CONVERT_EXPR)
425 oprnd = TREE_OPERAND (oprnd, 0);
426
427 oprnd_info = (*oprnds_info)[i];
428
429 stmt_vec_info def_stmt_info;
430 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
431 {
432 if (dump_enabled_p ())
433 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
434 "Build SLP failed: can't analyze def for %T\n",
435 oprnd);
436
437 return -1;
438 }
439
440 if (def_stmt_info && is_pattern_stmt_p (def_stmt_info))
441 oprnd_info->any_pattern = true;
442
443 if (first)
444 {
445 /* For the swapping logic below force vect_reduction_def
446 for the reduction op in a SLP reduction group. */
447 if (!STMT_VINFO_DATA_REF (stmt_info)
448 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
449 && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
450 && def_stmt_info)
451 dt = vect_reduction_def;
452 oprnd_info->first_dt = dt;
453 oprnd_info->first_op_type = TREE_TYPE (oprnd);
454 }
455 else
456 {
457 /* Not first stmt of the group, check that the def-stmt/s match
458 the def-stmt/s of the first stmt. Allow different definition
459 types for reduction chains: the first stmt must be a
460 vect_reduction_def (a phi node), and the rest
461 end in the reduction chain. */
462 tree type = TREE_TYPE (oprnd);
463 if ((oprnd_info->first_dt != dt
464 && !(oprnd_info->first_dt == vect_reduction_def
465 && !STMT_VINFO_DATA_REF (stmt_info)
466 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
467 && def_stmt_info
468 && !STMT_VINFO_DATA_REF (def_stmt_info)
469 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
470 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
471 && !((oprnd_info->first_dt == vect_external_def
472 || oprnd_info->first_dt == vect_constant_def)
473 && (dt == vect_external_def
474 || dt == vect_constant_def)))
475 || !types_compatible_p (oprnd_info->first_op_type, type)
476 || (!STMT_VINFO_DATA_REF (stmt_info)
477 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
478 && ((!def_stmt_info
479 || STMT_VINFO_DATA_REF (def_stmt_info)
480 || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
481 != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
482 != (oprnd_info->first_dt != vect_reduction_def))))
483 {
484 /* Try swapping operands if we got a mismatch. */
485 if (i == commutative_op && !swapped)
486 {
487 if (dump_enabled_p ())
488 dump_printf_loc (MSG_NOTE, vect_location,
489 "trying swapped operands\n");
490 swapped = true;
491 goto again;
492 }
493
494 if (dump_enabled_p ())
495 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
496 "Build SLP failed: different types\n");
497
498 return 1;
499 }
500 if ((dt == vect_constant_def
501 || dt == vect_external_def)
502 && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
503 && (TREE_CODE (type) == BOOLEAN_TYPE
504 || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
505 type)))
506 {
507 if (dump_enabled_p ())
508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
509 "Build SLP failed: invalid type of def "
510 "for variable-length SLP %T\n", oprnd);
511 return -1;
512 }
513 }
514
515 /* Check the types of the definitions. */
516 switch (dt)
517 {
518 case vect_external_def:
519 /* Make sure to demote the overall operand to external. */
520 oprnd_info->first_dt = vect_external_def;
521 /* Fallthru. */
522 case vect_constant_def:
523 oprnd_info->def_stmts.quick_push (NULL);
524 oprnd_info->ops.quick_push (oprnd);
525 break;
526
527 case vect_internal_def:
528 case vect_reduction_def:
529 if (oprnd_info->first_dt == vect_reduction_def
530 && !STMT_VINFO_DATA_REF (stmt_info)
531 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
532 && !STMT_VINFO_DATA_REF (def_stmt_info)
533 && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
534 == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
535 {
536 /* For a SLP reduction chain we want to duplicate the
537 reduction to each of the chain members. That gets
538 us a sane SLP graph (still the stmts are not 100%
539 correct wrt the initial values). */
540 gcc_assert (!first);
541 oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
542 oprnd_info->ops.quick_push (oprnd_info->ops[0]);
543 break;
544 }
545 /* Fallthru. */
546 case vect_induction_def:
547 oprnd_info->def_stmts.quick_push (def_stmt_info);
548 oprnd_info->ops.quick_push (oprnd);
549 break;
550
551 default:
552 /* FORNOW: Not supported. */
553 if (dump_enabled_p ())
554 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
555 "Build SLP failed: illegal type of def %T\n",
556 oprnd);
557
558 return -1;
559 }
560 }
561
562 /* Swap operands. */
563 if (swapped)
564 {
565 if (first_op_cond)
566 {
567 /* If there are already uses of this stmt in a SLP instance then
568 we've committed to the operand order and can't swap it. */
569 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
570 {
571 if (dump_enabled_p ())
572 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
573 "Build SLP failed: cannot swap operands of "
574 "shared stmt %G", stmt_info->stmt);
575 return -1;
576 }
577
578 /* To get rid of this swapping we have to move the stmt code
579 to the SLP tree as well (and gather it here per stmt). */
580 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
581 tree cond = gimple_assign_rhs1 (stmt);
582 enum tree_code code = TREE_CODE (cond);
583
584 /* Swap. */
585 if (*swap == 1)
586 {
587 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
588 &TREE_OPERAND (cond, 1));
589 TREE_SET_CODE (cond, swap_tree_comparison (code));
590 }
591 /* Invert. */
592 else
593 {
594 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
595 gimple_assign_rhs3_ptr (stmt));
596 if (STMT_VINFO_REDUC_IDX (stmt_info) == 1)
597 STMT_VINFO_REDUC_IDX (stmt_info) = 2;
598 else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
599 STMT_VINFO_REDUC_IDX (stmt_info) = 1;
600 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
601 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
602 gcc_assert (code != ERROR_MARK);
603 TREE_SET_CODE (cond, code);
604 }
605 }
606 else
607 {
608 /* Commutative ops need not reflect swapping, ops are in
609 the SLP tree. */
610 }
611 if (dump_enabled_p ())
612 dump_printf_loc (MSG_NOTE, vect_location,
613 "swapped operands to match def types in %G",
614 stmt_info->stmt);
615 }
616
617 *swap = swapped;
618 return 0;
619 }
620
621 /* Try to assign vector type VECTYPE to STMT_INFO for BB vectorization.
622 Return true if we can, meaning that this choice doesn't conflict with
623 existing SLP nodes that use STMT_INFO. */
624
625 static bool
626 vect_update_shared_vectype (stmt_vec_info stmt_info, tree vectype)
627 {
628 tree old_vectype = STMT_VINFO_VECTYPE (stmt_info);
629 if (old_vectype && useless_type_conversion_p (vectype, old_vectype))
630 return true;
631
632 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
633 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
634 {
635 /* We maintain the invariant that if any statement in the group is
636 used, all other members of the group have the same vector type. */
637 stmt_vec_info first_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
638 stmt_vec_info member_info = first_info;
639 for (; member_info; member_info = DR_GROUP_NEXT_ELEMENT (member_info))
640 if (STMT_VINFO_NUM_SLP_USES (member_info) > 0
641 || is_pattern_stmt_p (member_info))
642 break;
643
644 if (!member_info)
645 {
646 for (member_info = first_info; member_info;
647 member_info = DR_GROUP_NEXT_ELEMENT (member_info))
648 STMT_VINFO_VECTYPE (member_info) = vectype;
649 return true;
650 }
651 }
652 else if (STMT_VINFO_NUM_SLP_USES (stmt_info) == 0
653 && !is_pattern_stmt_p (stmt_info))
654 {
655 STMT_VINFO_VECTYPE (stmt_info) = vectype;
656 return true;
657 }
658
659 if (dump_enabled_p ())
660 {
661 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
662 "Build SLP failed: incompatible vector"
663 " types for: %G", stmt_info->stmt);
664 dump_printf_loc (MSG_NOTE, vect_location,
665 " old vector type: %T\n", old_vectype);
666 dump_printf_loc (MSG_NOTE, vect_location,
667 " new vector type: %T\n", vectype);
668 }
669 return false;
670 }
671
672 /* Try to infer and assign a vector type to all the statements in STMTS.
673 Used only for BB vectorization. */
674
675 static bool
676 vect_update_all_shared_vectypes (vec<stmt_vec_info> stmts)
677 {
678 tree vectype, nunits_vectype;
679 if (!vect_get_vector_types_for_stmt (stmts[0], &vectype,
680 &nunits_vectype, stmts.length ()))
681 return false;
682
683 stmt_vec_info stmt_info;
684 unsigned int i;
685 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
686 if (!vect_update_shared_vectype (stmt_info, vectype))
687 return false;
688
689 return true;
690 }
691
692 /* Return true if call statements CALL1 and CALL2 are similar enough
693 to be combined into the same SLP group. */
694
695 static bool
696 compatible_calls_p (gcall *call1, gcall *call2)
697 {
698 unsigned int nargs = gimple_call_num_args (call1);
699 if (nargs != gimple_call_num_args (call2))
700 return false;
701
702 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
703 return false;
704
705 if (gimple_call_internal_p (call1))
706 {
707 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
708 TREE_TYPE (gimple_call_lhs (call2))))
709 return false;
710 for (unsigned int i = 0; i < nargs; ++i)
711 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
712 TREE_TYPE (gimple_call_arg (call2, i))))
713 return false;
714 }
715 else
716 {
717 if (!operand_equal_p (gimple_call_fn (call1),
718 gimple_call_fn (call2), 0))
719 return false;
720
721 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
722 return false;
723 }
724 return true;
725 }
726
727 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
728 caller's attempt to find the vector type in STMT_INFO with the narrowest
729 element type. Return true if VECTYPE is nonnull and if it is valid
730 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
731 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
732 vect_build_slp_tree. */
733
734 static bool
735 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
736 tree vectype, poly_uint64 *max_nunits)
737 {
738 if (!vectype)
739 {
740 if (dump_enabled_p ())
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: unsupported data-type in %G\n",
743 stmt_info->stmt);
744 /* Fatal mismatch. */
745 return false;
746 }
747
748 /* If populating the vector type requires unrolling then fail
749 before adjusting *max_nunits for basic-block vectorization. */
750 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
751 unsigned HOST_WIDE_INT const_nunits;
752 if (STMT_VINFO_BB_VINFO (stmt_info)
753 && (!nunits.is_constant (&const_nunits)
754 || const_nunits > group_size))
755 {
756 if (dump_enabled_p ())
757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
758 "Build SLP failed: unrolling required "
759 "in basic block SLP\n");
760 /* Fatal mismatch. */
761 return false;
762 }
763
764 /* In case of multiple types we need to detect the smallest type. */
765 vect_update_max_nunits (max_nunits, vectype);
766 return true;
767 }
768
769 /* STMTS is a group of GROUP_SIZE SLP statements in which some
770 statements do the same operation as the first statement and in which
771 the others do ALT_STMT_CODE. Return true if we can take one vector
772 of the first operation and one vector of the second and permute them
773 to get the required result. VECTYPE is the type of the vector that
774 would be permuted. */
775
776 static bool
777 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
778 unsigned int group_size, tree vectype,
779 tree_code alt_stmt_code)
780 {
781 unsigned HOST_WIDE_INT count;
782 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
783 return false;
784
785 vec_perm_builder sel (count, count, 1);
786 for (unsigned int i = 0; i < count; ++i)
787 {
788 unsigned int elt = i;
789 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
790 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
791 elt += count;
792 sel.quick_push (elt);
793 }
794 vec_perm_indices indices (sel, 2, count);
795 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
796 }
797
798 /* Verify if the scalar stmts STMTS are isomorphic, require data
799 permutation or are of unsupported types of operation. Return
800 true if they are, otherwise return false and indicate in *MATCHES
801 which stmts are not isomorphic to the first one. If MATCHES[0]
802 is false then this indicates the comparison could not be
803 carried out or the stmts will never be vectorized by SLP.
804
805 Note COND_EXPR is possibly isomorphic to another one after swapping its
806 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
807 the first stmt by swapping the two operands of comparison; set SWAP[i]
808 to 2 if stmt I is isormorphic to the first stmt by inverting the code
809 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
810 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
811
812 static bool
813 vect_build_slp_tree_1 (unsigned char *swap,
814 vec<stmt_vec_info> stmts, unsigned int group_size,
815 poly_uint64 *max_nunits, bool *matches,
816 bool *two_operators)
817 {
818 unsigned int i;
819 stmt_vec_info first_stmt_info = stmts[0];
820 enum tree_code first_stmt_code = ERROR_MARK;
821 enum tree_code alt_stmt_code = ERROR_MARK;
822 enum tree_code rhs_code = ERROR_MARK;
823 enum tree_code first_cond_code = ERROR_MARK;
824 tree lhs;
825 bool need_same_oprnds = false;
826 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
827 optab optab;
828 int icode;
829 machine_mode optab_op2_mode;
830 machine_mode vec_mode;
831 stmt_vec_info first_load = NULL, prev_first_load = NULL;
832 bool load_p = false;
833
834 /* For every stmt in NODE find its def stmt/s. */
835 stmt_vec_info stmt_info;
836 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
837 {
838 vec_info *vinfo = stmt_info->vinfo;
839 gimple *stmt = stmt_info->stmt;
840 swap[i] = 0;
841 matches[i] = false;
842
843 if (dump_enabled_p ())
844 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
845
846 /* Fail to vectorize statements marked as unvectorizable. */
847 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
848 {
849 if (dump_enabled_p ())
850 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
851 "Build SLP failed: unvectorizable statement %G",
852 stmt);
853 /* Fatal mismatch. */
854 matches[0] = false;
855 return false;
856 }
857
858 lhs = gimple_get_lhs (stmt);
859 if (lhs == NULL_TREE)
860 {
861 if (dump_enabled_p ())
862 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
863 "Build SLP failed: not GIMPLE_ASSIGN nor "
864 "GIMPLE_CALL %G", stmt);
865 /* Fatal mismatch. */
866 matches[0] = false;
867 return false;
868 }
869
870 tree nunits_vectype;
871 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
872 &nunits_vectype, group_size)
873 || (nunits_vectype
874 && !vect_record_max_nunits (stmt_info, group_size,
875 nunits_vectype, max_nunits)))
876 {
877 /* Fatal mismatch. */
878 matches[0] = false;
879 return false;
880 }
881
882 gcc_assert (vectype);
883
884 if (is_a <bb_vec_info> (vinfo)
885 && !vect_update_shared_vectype (stmt_info, vectype))
886 continue;
887
888 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
889 {
890 rhs_code = CALL_EXPR;
891
892 if (gimple_call_internal_p (stmt, IFN_MASK_LOAD))
893 load_p = true;
894 else if ((gimple_call_internal_p (call_stmt)
895 && (!vectorizable_internal_fn_p
896 (gimple_call_internal_fn (call_stmt))))
897 || gimple_call_tail_p (call_stmt)
898 || gimple_call_noreturn_p (call_stmt)
899 || !gimple_call_nothrow_p (call_stmt)
900 || gimple_call_chain (call_stmt))
901 {
902 if (dump_enabled_p ())
903 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
904 "Build SLP failed: unsupported call type %G",
905 call_stmt);
906 /* Fatal mismatch. */
907 matches[0] = false;
908 return false;
909 }
910 }
911 else
912 {
913 rhs_code = gimple_assign_rhs_code (stmt);
914 load_p = TREE_CODE_CLASS (rhs_code) == tcc_reference;
915 }
916
917 /* Check the operation. */
918 if (i == 0)
919 {
920 first_stmt_code = rhs_code;
921
922 /* Shift arguments should be equal in all the packed stmts for a
923 vector shift with scalar shift operand. */
924 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
925 || rhs_code == LROTATE_EXPR
926 || rhs_code == RROTATE_EXPR)
927 {
928 vec_mode = TYPE_MODE (vectype);
929
930 /* First see if we have a vector/vector shift. */
931 optab = optab_for_tree_code (rhs_code, vectype,
932 optab_vector);
933
934 if (!optab
935 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
936 {
937 /* No vector/vector shift, try for a vector/scalar shift. */
938 optab = optab_for_tree_code (rhs_code, vectype,
939 optab_scalar);
940
941 if (!optab)
942 {
943 if (dump_enabled_p ())
944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
945 "Build SLP failed: no optab.\n");
946 /* Fatal mismatch. */
947 matches[0] = false;
948 return false;
949 }
950 icode = (int) optab_handler (optab, vec_mode);
951 if (icode == CODE_FOR_nothing)
952 {
953 if (dump_enabled_p ())
954 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
955 "Build SLP failed: "
956 "op not supported by target.\n");
957 /* Fatal mismatch. */
958 matches[0] = false;
959 return false;
960 }
961 optab_op2_mode = insn_data[icode].operand[2].mode;
962 if (!VECTOR_MODE_P (optab_op2_mode))
963 {
964 need_same_oprnds = true;
965 first_op1 = gimple_assign_rhs2 (stmt);
966 }
967 }
968 }
969 else if (rhs_code == WIDEN_LSHIFT_EXPR)
970 {
971 need_same_oprnds = true;
972 first_op1 = gimple_assign_rhs2 (stmt);
973 }
974 }
975 else
976 {
977 if (first_stmt_code != rhs_code
978 && alt_stmt_code == ERROR_MARK)
979 alt_stmt_code = rhs_code;
980 if (first_stmt_code != rhs_code
981 && (first_stmt_code != IMAGPART_EXPR
982 || rhs_code != REALPART_EXPR)
983 && (first_stmt_code != REALPART_EXPR
984 || rhs_code != IMAGPART_EXPR)
985 /* Handle mismatches in plus/minus by computing both
986 and merging the results. */
987 && !((first_stmt_code == PLUS_EXPR
988 || first_stmt_code == MINUS_EXPR)
989 && (alt_stmt_code == PLUS_EXPR
990 || alt_stmt_code == MINUS_EXPR)
991 && rhs_code == alt_stmt_code)
992 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
993 && (first_stmt_code == ARRAY_REF
994 || first_stmt_code == BIT_FIELD_REF
995 || first_stmt_code == INDIRECT_REF
996 || first_stmt_code == COMPONENT_REF
997 || first_stmt_code == MEM_REF)))
998 {
999 if (dump_enabled_p ())
1000 {
1001 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1002 "Build SLP failed: different operation "
1003 "in stmt %G", stmt);
1004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1005 "original stmt %G", first_stmt_info->stmt);
1006 }
1007 /* Mismatch. */
1008 continue;
1009 }
1010
1011 if (need_same_oprnds
1012 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
1013 {
1014 if (dump_enabled_p ())
1015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1016 "Build SLP failed: different shift "
1017 "arguments in %G", stmt);
1018 /* Mismatch. */
1019 continue;
1020 }
1021
1022 if (!load_p && rhs_code == CALL_EXPR)
1023 {
1024 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
1025 as_a <gcall *> (stmt)))
1026 {
1027 if (dump_enabled_p ())
1028 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1029 "Build SLP failed: different calls in %G",
1030 stmt);
1031 /* Mismatch. */
1032 continue;
1033 }
1034 }
1035 }
1036
1037 /* Grouped store or load. */
1038 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1039 {
1040 if (REFERENCE_CLASS_P (lhs))
1041 {
1042 /* Store. */
1043 ;
1044 }
1045 else
1046 {
1047 /* Load. */
1048 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
1049 if (prev_first_load)
1050 {
1051 /* Check that there are no loads from different interleaving
1052 chains in the same node. */
1053 if (prev_first_load != first_load)
1054 {
1055 if (dump_enabled_p ())
1056 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1057 vect_location,
1058 "Build SLP failed: different "
1059 "interleaving chains in one node %G",
1060 stmt);
1061 /* Mismatch. */
1062 continue;
1063 }
1064 }
1065 else
1066 prev_first_load = first_load;
1067 }
1068 } /* Grouped access. */
1069 else
1070 {
1071 if (load_p)
1072 {
1073 /* Not grouped load. */
1074 if (dump_enabled_p ())
1075 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1076 "Build SLP failed: not grouped load %G", stmt);
1077
1078 /* FORNOW: Not grouped loads are not supported. */
1079 /* Fatal mismatch. */
1080 matches[0] = false;
1081 return false;
1082 }
1083
1084 /* Not memory operation. */
1085 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
1086 && TREE_CODE_CLASS (rhs_code) != tcc_unary
1087 && TREE_CODE_CLASS (rhs_code) != tcc_expression
1088 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
1089 && rhs_code != VIEW_CONVERT_EXPR
1090 && rhs_code != CALL_EXPR)
1091 {
1092 if (dump_enabled_p ())
1093 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1094 "Build SLP failed: operation unsupported %G",
1095 stmt);
1096 /* Fatal mismatch. */
1097 matches[0] = false;
1098 return false;
1099 }
1100
1101 if (rhs_code == COND_EXPR)
1102 {
1103 tree cond_expr = gimple_assign_rhs1 (stmt);
1104 enum tree_code cond_code = TREE_CODE (cond_expr);
1105 enum tree_code swap_code = ERROR_MARK;
1106 enum tree_code invert_code = ERROR_MARK;
1107
1108 if (i == 0)
1109 first_cond_code = TREE_CODE (cond_expr);
1110 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
1111 {
1112 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
1113 swap_code = swap_tree_comparison (cond_code);
1114 invert_code = invert_tree_comparison (cond_code, honor_nans);
1115 }
1116
1117 if (first_cond_code == cond_code)
1118 ;
1119 /* Isomorphic can be achieved by swapping. */
1120 else if (first_cond_code == swap_code)
1121 swap[i] = 1;
1122 /* Isomorphic can be achieved by inverting. */
1123 else if (first_cond_code == invert_code)
1124 swap[i] = 2;
1125 else
1126 {
1127 if (dump_enabled_p ())
1128 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1129 "Build SLP failed: different"
1130 " operation %G", stmt);
1131 /* Mismatch. */
1132 continue;
1133 }
1134 }
1135 }
1136
1137 matches[i] = true;
1138 }
1139
1140 for (i = 0; i < group_size; ++i)
1141 if (!matches[i])
1142 return false;
1143
1144 /* If we allowed a two-operation SLP node verify the target can cope
1145 with the permute we are going to use. */
1146 if (alt_stmt_code != ERROR_MARK
1147 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
1148 {
1149 if (!vect_two_operations_perm_ok_p (stmts, group_size,
1150 vectype, alt_stmt_code))
1151 {
1152 for (i = 0; i < group_size; ++i)
1153 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
1154 {
1155 matches[i] = false;
1156 if (dump_enabled_p ())
1157 {
1158 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1159 "Build SLP failed: different operation "
1160 "in stmt %G", stmts[i]->stmt);
1161 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1162 "original stmt %G", first_stmt_info->stmt);
1163 }
1164 }
1165 return false;
1166 }
1167 *two_operators = true;
1168 }
1169
1170 return true;
1171 }
1172
1173 /* Traits for the hash_set to record failed SLP builds for a stmt set.
1174 Note we never remove apart from at destruction time so we do not
1175 need a special value for deleted that differs from empty. */
1176 struct bst_traits
1177 {
1178 typedef vec <stmt_vec_info> value_type;
1179 typedef vec <stmt_vec_info> compare_type;
1180 static inline hashval_t hash (value_type);
1181 static inline bool equal (value_type existing, value_type candidate);
1182 static inline bool is_empty (value_type x) { return !x.exists (); }
1183 static inline bool is_deleted (value_type x) { return !x.exists (); }
1184 static inline void mark_empty (value_type &x) { x.release (); }
1185 static inline void mark_deleted (value_type &x) { x.release (); }
1186 static inline void remove (value_type &x) { x.release (); }
1187 };
1188 inline hashval_t
1189 bst_traits::hash (value_type x)
1190 {
1191 inchash::hash h;
1192 for (unsigned i = 0; i < x.length (); ++i)
1193 h.add_int (gimple_uid (x[i]->stmt));
1194 return h.end ();
1195 }
1196 inline bool
1197 bst_traits::equal (value_type existing, value_type candidate)
1198 {
1199 if (existing.length () != candidate.length ())
1200 return false;
1201 for (unsigned i = 0; i < existing.length (); ++i)
1202 if (existing[i] != candidate[i])
1203 return false;
1204 return true;
1205 }
1206
1207 typedef hash_map <vec <gimple *>, slp_tree,
1208 simple_hashmap_traits <bst_traits, slp_tree> >
1209 scalar_stmts_to_slp_tree_map_t;
1210
1211 static slp_tree
1212 vect_build_slp_tree_2 (vec_info *vinfo,
1213 vec<stmt_vec_info> stmts, unsigned int group_size,
1214 poly_uint64 *max_nunits,
1215 bool *matches, unsigned *npermutes, unsigned *tree_size,
1216 scalar_stmts_to_slp_tree_map_t *bst_map);
1217
1218 static slp_tree
1219 vect_build_slp_tree (vec_info *vinfo,
1220 vec<stmt_vec_info> stmts, unsigned int group_size,
1221 poly_uint64 *max_nunits,
1222 bool *matches, unsigned *npermutes, unsigned *tree_size,
1223 scalar_stmts_to_slp_tree_map_t *bst_map)
1224 {
1225 if (slp_tree *leader = bst_map->get (stmts))
1226 {
1227 if (dump_enabled_p ())
1228 dump_printf_loc (MSG_NOTE, vect_location, "re-using %sSLP tree %p\n",
1229 *leader ? "" : "failed ", *leader);
1230 if (*leader)
1231 {
1232 (*leader)->refcnt++;
1233 vect_update_max_nunits (max_nunits, (*leader)->max_nunits);
1234 }
1235 return *leader;
1236 }
1237 poly_uint64 this_max_nunits = 1;
1238 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
1239 &this_max_nunits,
1240 matches, npermutes, tree_size, bst_map);
1241 if (res)
1242 {
1243 res->max_nunits = this_max_nunits;
1244 vect_update_max_nunits (max_nunits, this_max_nunits);
1245 /* Keep a reference for the bst_map use. */
1246 res->refcnt++;
1247 }
1248 bst_map->put (stmts.copy (), res);
1249 return res;
1250 }
1251
1252 /* Recursively build an SLP tree starting from NODE.
1253 Fail (and return a value not equal to zero) if def-stmts are not
1254 isomorphic, require data permutation or are of unsupported types of
1255 operation. Otherwise, return 0.
1256 The value returned is the depth in the SLP tree where a mismatch
1257 was found. */
1258
1259 static slp_tree
1260 vect_build_slp_tree_2 (vec_info *vinfo,
1261 vec<stmt_vec_info> stmts, unsigned int group_size,
1262 poly_uint64 *max_nunits,
1263 bool *matches, unsigned *npermutes, unsigned *tree_size,
1264 scalar_stmts_to_slp_tree_map_t *bst_map)
1265 {
1266 unsigned nops, i, this_tree_size = 0;
1267 poly_uint64 this_max_nunits = *max_nunits;
1268 slp_tree node;
1269
1270 matches[0] = false;
1271
1272 stmt_vec_info stmt_info = stmts[0];
1273 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1274 nops = gimple_call_num_args (stmt);
1275 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1276 {
1277 nops = gimple_num_ops (stmt) - 1;
1278 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1279 nops++;
1280 }
1281 else if (is_a <gphi *> (stmt_info->stmt))
1282 nops = 0;
1283 else
1284 return NULL;
1285
1286 /* If the SLP node is a PHI (induction or reduction), terminate
1287 the recursion. */
1288 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1289 {
1290 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1291 tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
1292 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1293 return NULL;
1294
1295 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1296 /* Induction from different IVs is not supported. */
1297 if (def_type == vect_induction_def)
1298 {
1299 stmt_vec_info other_info;
1300 FOR_EACH_VEC_ELT (stmts, i, other_info)
1301 if (stmt_info != other_info)
1302 return NULL;
1303 }
1304 else if (def_type == vect_reduction_def
1305 || def_type == vect_double_reduction_def
1306 || def_type == vect_nested_cycle)
1307 {
1308 /* Else def types have to match. */
1309 stmt_vec_info other_info;
1310 FOR_EACH_VEC_ELT (stmts, i, other_info)
1311 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1312 return NULL;
1313 }
1314 else
1315 return NULL;
1316 (*tree_size)++;
1317 node = vect_create_new_slp_node (stmts);
1318 return node;
1319 }
1320
1321
1322 bool two_operators = false;
1323 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1324 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1325 &this_max_nunits, matches, &two_operators))
1326 return NULL;
1327
1328 /* If the SLP node is a load, terminate the recursion unless masked. */
1329 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1330 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1331 {
1332 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1333 {
1334 /* Masked load. */
1335 gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
1336 nops = 1;
1337 }
1338 else
1339 {
1340 *max_nunits = this_max_nunits;
1341 (*tree_size)++;
1342 node = vect_create_new_slp_node (stmts);
1343 return node;
1344 }
1345 }
1346
1347 /* Get at the operands, verifying they are compatible. */
1348 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1349 slp_oprnd_info oprnd_info;
1350 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1351 {
1352 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1353 stmts, i, &oprnds_info);
1354 if (res != 0)
1355 matches[(res == -1) ? 0 : i] = false;
1356 if (!matches[0])
1357 break;
1358 }
1359 for (i = 0; i < group_size; ++i)
1360 if (!matches[i])
1361 {
1362 vect_free_oprnd_info (oprnds_info);
1363 return NULL;
1364 }
1365
1366 auto_vec<slp_tree, 4> children;
1367
1368 stmt_info = stmts[0];
1369
1370 /* Create SLP_TREE nodes for the definition node/s. */
1371 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
1372 {
1373 slp_tree child;
1374 unsigned old_tree_size = this_tree_size;
1375 unsigned int j;
1376
1377 if (oprnd_info->first_dt == vect_uninitialized_def)
1378 {
1379 /* COND_EXPR have one too many eventually if the condition
1380 is a SSA name. */
1381 gcc_assert (i == 3 && nops == 4);
1382 continue;
1383 }
1384
1385 if (oprnd_info->first_dt != vect_internal_def
1386 && oprnd_info->first_dt != vect_reduction_def
1387 && oprnd_info->first_dt != vect_induction_def)
1388 {
1389 slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
1390 SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
1391 oprnd_info->ops = vNULL;
1392 children.safe_push (invnode);
1393 continue;
1394 }
1395
1396 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1397 group_size, &this_max_nunits,
1398 matches, npermutes,
1399 &this_tree_size, bst_map)) != NULL)
1400 {
1401 /* If we have all children of a non-unary child built up from
1402 scalars then just throw that away and build it up this node
1403 from scalars. */
1404 if (is_a <bb_vec_info> (vinfo)
1405 && SLP_TREE_CHILDREN (child).length () > 1
1406 /* ??? Rejecting patterns this way doesn't work. We'd have to
1407 do extra work to cancel the pattern so the uses see the
1408 scalar version. */
1409 && !oprnd_info->any_pattern)
1410 {
1411 slp_tree grandchild;
1412
1413 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1414 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1415 break;
1416 if (!grandchild
1417 && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1418 {
1419 /* Roll back. */
1420 this_tree_size = old_tree_size;
1421 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1422 vect_free_slp_tree (grandchild, false);
1423 SLP_TREE_CHILDREN (child).truncate (0);
1424
1425 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location,
1427 "Building parent vector operands from "
1428 "scalars instead\n");
1429 oprnd_info->def_stmts = vNULL;
1430 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1431 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1432 oprnd_info->ops = vNULL;
1433 ++this_tree_size;
1434 children.safe_push (child);
1435 continue;
1436 }
1437 }
1438
1439 oprnd_info->def_stmts = vNULL;
1440 children.safe_push (child);
1441 continue;
1442 }
1443
1444 /* If the SLP build failed fatally and we analyze a basic-block
1445 simply treat nodes we fail to build as externally defined
1446 (and thus build vectors from the scalar defs).
1447 The cost model will reject outright expensive cases.
1448 ??? This doesn't treat cases where permutation ultimatively
1449 fails (or we don't try permutation below). Ideally we'd
1450 even compute a permutation that will end up with the maximum
1451 SLP tree size... */
1452 if (is_a <bb_vec_info> (vinfo)
1453 && !matches[0]
1454 /* ??? Rejecting patterns this way doesn't work. We'd have to
1455 do extra work to cancel the pattern so the uses see the
1456 scalar version. */
1457 && !is_pattern_stmt_p (stmt_info)
1458 && !oprnd_info->any_pattern
1459 && vect_update_all_shared_vectypes (oprnd_info->def_stmts))
1460 {
1461 if (dump_enabled_p ())
1462 dump_printf_loc (MSG_NOTE, vect_location,
1463 "Building vector operands from scalars\n");
1464 this_tree_size++;
1465 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1466 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1467 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1468 children.safe_push (child);
1469 oprnd_info->ops = vNULL;
1470 oprnd_info->def_stmts = vNULL;
1471 continue;
1472 }
1473
1474 /* If the SLP build for operand zero failed and operand zero
1475 and one can be commutated try that for the scalar stmts
1476 that failed the match. */
1477 if (i == 0
1478 /* A first scalar stmt mismatch signals a fatal mismatch. */
1479 && matches[0]
1480 /* ??? For COND_EXPRs we can swap the comparison operands
1481 as well as the arms under some constraints. */
1482 && nops == 2
1483 && oprnds_info[1]->first_dt == vect_internal_def
1484 && is_gimple_assign (stmt_info->stmt)
1485 /* Swapping operands for reductions breaks assumptions later on. */
1486 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
1487 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
1488 /* Do so only if the number of not successful permutes was nor more
1489 than a cut-ff as re-trying the recursive match on
1490 possibly each level of the tree would expose exponential
1491 behavior. */
1492 && *npermutes < 4)
1493 {
1494 /* See whether we can swap the matching or the non-matching
1495 stmt operands. */
1496 bool swap_not_matching = true;
1497 do
1498 {
1499 for (j = 0; j < group_size; ++j)
1500 {
1501 if (matches[j] != !swap_not_matching)
1502 continue;
1503 stmt_vec_info stmt_info = stmts[j];
1504 /* Verify if we can swap operands of this stmt. */
1505 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1506 if (!stmt
1507 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1508 {
1509 if (!swap_not_matching)
1510 goto fail;
1511 swap_not_matching = false;
1512 break;
1513 }
1514 }
1515 }
1516 while (j != group_size);
1517
1518 /* Swap mismatched definition stmts. */
1519 if (dump_enabled_p ())
1520 dump_printf_loc (MSG_NOTE, vect_location,
1521 "Re-trying with swapped operands of stmts ");
1522 for (j = 0; j < group_size; ++j)
1523 if (matches[j] == !swap_not_matching)
1524 {
1525 std::swap (oprnds_info[0]->def_stmts[j],
1526 oprnds_info[1]->def_stmts[j]);
1527 std::swap (oprnds_info[0]->ops[j],
1528 oprnds_info[1]->ops[j]);
1529 if (dump_enabled_p ())
1530 dump_printf (MSG_NOTE, "%d ", j);
1531 }
1532 if (dump_enabled_p ())
1533 dump_printf (MSG_NOTE, "\n");
1534 /* And try again with scratch 'matches' ... */
1535 bool *tem = XALLOCAVEC (bool, group_size);
1536 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1537 group_size, &this_max_nunits,
1538 tem, npermutes,
1539 &this_tree_size, bst_map)) != NULL)
1540 {
1541 /* If we have all children of a non-unary child built up from
1542 scalars then just throw that away and build it up this node
1543 from scalars. */
1544 if (is_a <bb_vec_info> (vinfo)
1545 && SLP_TREE_CHILDREN (child).length () > 1
1546 /* ??? Rejecting patterns this way doesn't work. We'd have
1547 to do extra work to cancel the pattern so the uses see the
1548 scalar version. */
1549 && !oprnd_info->any_pattern)
1550 {
1551 unsigned int j;
1552 slp_tree grandchild;
1553
1554 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1555 if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
1556 break;
1557 if (!grandchild
1558 && (vect_update_all_shared_vectypes
1559 (oprnd_info->def_stmts)))
1560 {
1561 /* Roll back. */
1562 this_tree_size = old_tree_size;
1563 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1564 vect_free_slp_tree (grandchild, false);
1565 SLP_TREE_CHILDREN (child).truncate (0);
1566
1567 if (dump_enabled_p ())
1568 dump_printf_loc (MSG_NOTE, vect_location,
1569 "Building parent vector operands from "
1570 "scalars instead\n");
1571 oprnd_info->def_stmts = vNULL;
1572 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1573 SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
1574 oprnd_info->ops = vNULL;
1575 ++this_tree_size;
1576 children.safe_push (child);
1577 continue;
1578 }
1579 }
1580
1581 oprnd_info->def_stmts = vNULL;
1582 children.safe_push (child);
1583 continue;
1584 }
1585
1586 ++*npermutes;
1587 }
1588
1589 fail:
1590 gcc_assert (child == NULL);
1591 FOR_EACH_VEC_ELT (children, j, child)
1592 vect_free_slp_tree (child, false);
1593 vect_free_oprnd_info (oprnds_info);
1594 return NULL;
1595 }
1596
1597 vect_free_oprnd_info (oprnds_info);
1598
1599 *tree_size += this_tree_size + 1;
1600 *max_nunits = this_max_nunits;
1601
1602 node = vect_create_new_slp_node (stmts);
1603 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1604 SLP_TREE_CHILDREN (node).splice (children);
1605 return node;
1606 }
1607
1608 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1609
1610 static void
1611 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1612 slp_tree node, hash_set<slp_tree> &visited)
1613 {
1614 unsigned i;
1615 stmt_vec_info stmt_info;
1616 slp_tree child;
1617 tree op;
1618
1619 if (visited.add (node))
1620 return;
1621
1622 dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
1623 dump_user_location_t user_loc = loc.get_user_location ();
1624 dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
1625 SLP_TREE_DEF_TYPE (node) == vect_external_def
1626 ? " (external)"
1627 : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
1628 ? " (constant)"
1629 : ""), node,
1630 estimated_poly_value (node->max_nunits));
1631 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1632 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1633 dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
1634 else
1635 {
1636 dump_printf_loc (metadata, user_loc, "\t{ ");
1637 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1638 dump_printf (metadata, "%T%s ", op,
1639 i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
1640 dump_printf (metadata, "}\n");
1641 }
1642 if (SLP_TREE_CHILDREN (node).is_empty ())
1643 return;
1644 dump_printf_loc (metadata, user_loc, "\tchildren");
1645 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1646 dump_printf (dump_kind, " %p", (void *)child);
1647 dump_printf (dump_kind, "\n");
1648 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1649 vect_print_slp_tree (dump_kind, loc, child, visited);
1650 }
1651
1652 static void
1653 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1654 slp_tree node)
1655 {
1656 hash_set<slp_tree> visited;
1657 vect_print_slp_tree (dump_kind, loc, node, visited);
1658 }
1659
1660 /* Mark the tree rooted at NODE with PURE_SLP. */
1661
1662 static void
1663 vect_mark_slp_stmts (slp_tree node, hash_set<slp_tree> &visited)
1664 {
1665 int i;
1666 stmt_vec_info stmt_info;
1667 slp_tree child;
1668
1669 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1670 return;
1671
1672 if (visited.add (node))
1673 return;
1674
1675 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1676 STMT_SLP_TYPE (stmt_info) = pure_slp;
1677
1678 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1679 vect_mark_slp_stmts (child, visited);
1680 }
1681
1682 static void
1683 vect_mark_slp_stmts (slp_tree node)
1684 {
1685 hash_set<slp_tree> visited;
1686 vect_mark_slp_stmts (node, visited);
1687 }
1688
1689 /* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1690
1691 static void
1692 vect_mark_slp_stmts_relevant (slp_tree node, hash_set<slp_tree> &visited)
1693 {
1694 int i;
1695 stmt_vec_info stmt_info;
1696 slp_tree child;
1697
1698 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1699 return;
1700
1701 if (visited.add (node))
1702 return;
1703
1704 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1705 {
1706 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1707 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1708 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1709 }
1710
1711 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1712 vect_mark_slp_stmts_relevant (child, visited);
1713 }
1714
1715 static void
1716 vect_mark_slp_stmts_relevant (slp_tree node)
1717 {
1718 hash_set<slp_tree> visited;
1719 vect_mark_slp_stmts_relevant (node, visited);
1720 }
1721
1722
1723 /* Rearrange the statements of NODE according to PERMUTATION. */
1724
1725 static void
1726 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1727 vec<unsigned> permutation,
1728 hash_set<slp_tree> &visited)
1729 {
1730 unsigned int i;
1731 slp_tree child;
1732
1733 if (visited.add (node))
1734 return;
1735
1736 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1737 vect_slp_rearrange_stmts (child, group_size, permutation, visited);
1738
1739 if (SLP_TREE_SCALAR_STMTS (node).exists ())
1740 {
1741 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1742 vec<stmt_vec_info> tmp_stmts;
1743 tmp_stmts.create (group_size);
1744 tmp_stmts.quick_grow (group_size);
1745 stmt_vec_info stmt_info;
1746 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1747 tmp_stmts[permutation[i]] = stmt_info;
1748 SLP_TREE_SCALAR_STMTS (node).release ();
1749 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1750 }
1751 if (SLP_TREE_SCALAR_OPS (node).exists ())
1752 {
1753 gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
1754 vec<tree> tmp_ops;
1755 tmp_ops.create (group_size);
1756 tmp_ops.quick_grow (group_size);
1757 tree op;
1758 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
1759 tmp_ops[permutation[i]] = op;
1760 SLP_TREE_SCALAR_OPS (node).release ();
1761 SLP_TREE_SCALAR_OPS (node) = tmp_ops;
1762 }
1763 }
1764
1765
1766 /* Attempt to reorder stmts in a reduction chain so that we don't
1767 require any load permutation. Return true if that was possible,
1768 otherwise return false. */
1769
1770 static bool
1771 vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1772 {
1773 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1774 unsigned int i, j;
1775 unsigned int lidx;
1776 slp_tree node, load;
1777
1778 /* Compare all the permutation sequences to the first one. We know
1779 that at least one load is permuted. */
1780 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1781 if (!node->load_permutation.exists ())
1782 return false;
1783 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1784 {
1785 if (!load->load_permutation.exists ())
1786 return false;
1787 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1788 if (lidx != node->load_permutation[j])
1789 return false;
1790 }
1791
1792 /* Check that the loads in the first sequence are different and there
1793 are no gaps between them. */
1794 auto_sbitmap load_index (group_size);
1795 bitmap_clear (load_index);
1796 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1797 {
1798 if (lidx >= group_size)
1799 return false;
1800 if (bitmap_bit_p (load_index, lidx))
1801 return false;
1802
1803 bitmap_set_bit (load_index, lidx);
1804 }
1805 for (i = 0; i < group_size; i++)
1806 if (!bitmap_bit_p (load_index, i))
1807 return false;
1808
1809 /* This permutation is valid for reduction. Since the order of the
1810 statements in the nodes is not important unless they are memory
1811 accesses, we can rearrange the statements in all the nodes
1812 according to the order of the loads. */
1813 hash_set<slp_tree> visited;
1814 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1815 node->load_permutation, visited);
1816
1817 /* We are done, no actual permutations need to be generated. */
1818 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1819 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1820 {
1821 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1822 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1823 /* But we have to keep those permutations that are required because
1824 of handling of gaps. */
1825 if (known_eq (unrolling_factor, 1U)
1826 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1827 && DR_GROUP_GAP (first_stmt_info) == 0))
1828 SLP_TREE_LOAD_PERMUTATION (node).release ();
1829 else
1830 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1831 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1832 }
1833
1834 return true;
1835 }
1836
1837 /* Gather loads in the SLP graph NODE and populate the INST loads array. */
1838
1839 static void
1840 vect_gather_slp_loads (slp_instance inst, slp_tree node,
1841 hash_set<slp_tree> &visited)
1842 {
1843 if (visited.add (node))
1844 return;
1845
1846 if (SLP_TREE_CHILDREN (node).length () == 0)
1847 {
1848 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1849 return;
1850 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1851 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1852 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1853 SLP_INSTANCE_LOADS (inst).safe_push (node);
1854 }
1855 else
1856 {
1857 unsigned i;
1858 slp_tree child;
1859 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1860 vect_gather_slp_loads (inst, child, visited);
1861 }
1862 }
1863
1864 static void
1865 vect_gather_slp_loads (slp_instance inst, slp_tree node)
1866 {
1867 hash_set<slp_tree> visited;
1868 vect_gather_slp_loads (inst, node, visited);
1869 }
1870
1871 /* Check if the required load permutations in the SLP instance
1872 SLP_INSTN are supported. */
1873
1874 static bool
1875 vect_supported_load_permutation_p (slp_instance slp_instn)
1876 {
1877 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1878 unsigned int i, j, k, next;
1879 slp_tree node;
1880
1881 if (dump_enabled_p ())
1882 {
1883 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1884 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1885 if (node->load_permutation.exists ())
1886 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1887 dump_printf (MSG_NOTE, "%d ", next);
1888 else
1889 for (k = 0; k < group_size; ++k)
1890 dump_printf (MSG_NOTE, "%d ", k);
1891 dump_printf (MSG_NOTE, "\n");
1892 }
1893
1894 /* In case of reduction every load permutation is allowed, since the order
1895 of the reduction statements is not important (as opposed to the case of
1896 grouped stores). The only condition we need to check is that all the
1897 load nodes are of the same size and have the same permutation (and then
1898 rearrange all the nodes of the SLP instance according to this
1899 permutation). */
1900
1901 /* Check that all the load nodes are of the same size. */
1902 /* ??? Can't we assert this? */
1903 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1904 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1905 return false;
1906
1907 node = SLP_INSTANCE_TREE (slp_instn);
1908 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1909
1910 /* Reduction (there are no data-refs in the root).
1911 In reduction chain the order of the loads is not important. */
1912 if (!STMT_VINFO_DATA_REF (stmt_info)
1913 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1914 vect_attempt_slp_rearrange_stmts (slp_instn);
1915
1916 /* In basic block vectorization we allow any subchain of an interleaving
1917 chain.
1918 FORNOW: not supported in loop SLP because of realignment compications. */
1919 if (STMT_VINFO_BB_VINFO (stmt_info))
1920 {
1921 /* Check whether the loads in an instance form a subchain and thus
1922 no permutation is necessary. */
1923 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1924 {
1925 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1926 continue;
1927 bool subchain_p = true;
1928 stmt_vec_info next_load_info = NULL;
1929 stmt_vec_info load_info;
1930 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1931 {
1932 if (j != 0
1933 && (next_load_info != load_info
1934 || DR_GROUP_GAP (load_info) != 1))
1935 {
1936 subchain_p = false;
1937 break;
1938 }
1939 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1940 }
1941 if (subchain_p)
1942 SLP_TREE_LOAD_PERMUTATION (node).release ();
1943 else
1944 {
1945 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1946 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1947 unsigned HOST_WIDE_INT nunits;
1948 unsigned k, maxk = 0;
1949 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1950 if (k > maxk)
1951 maxk = k;
1952 /* In BB vectorization we may not actually use a loaded vector
1953 accessing elements in excess of DR_GROUP_SIZE. */
1954 tree vectype = STMT_VINFO_VECTYPE (group_info);
1955 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1956 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1957 {
1958 if (dump_enabled_p ())
1959 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1960 "BB vectorization with gaps at the end of "
1961 "a load is not supported\n");
1962 return false;
1963 }
1964
1965 /* Verify the permutation can be generated. */
1966 vec<tree> tem;
1967 unsigned n_perms;
1968 if (!vect_transform_slp_perm_load (node, tem, NULL,
1969 1, slp_instn, true, &n_perms))
1970 {
1971 if (dump_enabled_p ())
1972 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1973 vect_location,
1974 "unsupported load permutation\n");
1975 return false;
1976 }
1977 }
1978 }
1979 return true;
1980 }
1981
1982 /* For loop vectorization verify we can generate the permutation. Be
1983 conservative about the vectorization factor, there are permutations
1984 that will use three vector inputs only starting from a specific factor
1985 and the vectorization factor is not yet final.
1986 ??? The SLP instance unrolling factor might not be the maximum one. */
1987 unsigned n_perms;
1988 poly_uint64 test_vf
1989 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1990 LOOP_VINFO_VECT_FACTOR
1991 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1992 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1993 if (node->load_permutation.exists ()
1994 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1995 slp_instn, true, &n_perms))
1996 return false;
1997
1998 return true;
1999 }
2000
2001
2002 /* Find the last store in SLP INSTANCE. */
2003
2004 stmt_vec_info
2005 vect_find_last_scalar_stmt_in_slp (slp_tree node)
2006 {
2007 stmt_vec_info last = NULL;
2008 stmt_vec_info stmt_vinfo;
2009
2010 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
2011 {
2012 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
2013 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
2014 }
2015
2016 return last;
2017 }
2018
2019 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
2020 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
2021 (also containing the first GROUP1_SIZE stmts, since stores are
2022 consecutive), the second containing the remainder.
2023 Return the first stmt in the second group. */
2024
2025 static stmt_vec_info
2026 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
2027 {
2028 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
2029 gcc_assert (group1_size > 0);
2030 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
2031 gcc_assert (group2_size > 0);
2032 DR_GROUP_SIZE (first_vinfo) = group1_size;
2033
2034 stmt_vec_info stmt_info = first_vinfo;
2035 for (unsigned i = group1_size; i > 1; i--)
2036 {
2037 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
2038 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2039 }
2040 /* STMT is now the last element of the first group. */
2041 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
2042 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
2043
2044 DR_GROUP_SIZE (group2) = group2_size;
2045 for (stmt_info = group2; stmt_info;
2046 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
2047 {
2048 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
2049 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
2050 }
2051
2052 /* For the second group, the DR_GROUP_GAP is that before the original group,
2053 plus skipping over the first vector. */
2054 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
2055
2056 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
2057 DR_GROUP_GAP (first_vinfo) += group2_size;
2058
2059 if (dump_enabled_p ())
2060 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
2061 group1_size, group2_size);
2062
2063 return group2;
2064 }
2065
2066 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
2067 statements and a vector of NUNITS elements. */
2068
2069 static poly_uint64
2070 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
2071 {
2072 return exact_div (common_multiple (nunits, group_size), group_size);
2073 }
2074
2075 /* Analyze an SLP instance starting from a group of grouped stores. Call
2076 vect_build_slp_tree to build a tree of packed stmts if possible.
2077 Return FALSE if it's impossible to SLP any stmt in the loop. */
2078
2079 static bool
2080 vect_analyze_slp_instance (vec_info *vinfo,
2081 scalar_stmts_to_slp_tree_map_t *bst_map,
2082 stmt_vec_info stmt_info, unsigned max_tree_size)
2083 {
2084 slp_instance new_instance;
2085 slp_tree node;
2086 unsigned int group_size;
2087 tree vectype, scalar_type = NULL_TREE;
2088 unsigned int i;
2089 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
2090 vec<stmt_vec_info> scalar_stmts;
2091 bool constructor = false;
2092
2093 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2094 {
2095 scalar_type = TREE_TYPE (DR_REF (dr));
2096 group_size = DR_GROUP_SIZE (stmt_info);
2097 vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
2098 }
2099 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2100 {
2101 gcc_assert (is_a <loop_vec_info> (vinfo));
2102 vectype = STMT_VINFO_VECTYPE (stmt_info);
2103 group_size = REDUC_GROUP_SIZE (stmt_info);
2104 }
2105 else if (is_gimple_assign (stmt_info->stmt)
2106 && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
2107 {
2108 vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
2109 group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
2110 constructor = true;
2111 }
2112 else
2113 {
2114 gcc_assert (is_a <loop_vec_info> (vinfo));
2115 vectype = STMT_VINFO_VECTYPE (stmt_info);
2116 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
2117 }
2118
2119 if (!vectype)
2120 {
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2123 "Build SLP failed: unsupported data-type %T\n",
2124 scalar_type);
2125
2126 return false;
2127 }
2128 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2129
2130 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2131 scalar_stmts.create (group_size);
2132 stmt_vec_info next_info = stmt_info;
2133 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2134 {
2135 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2136 while (next_info)
2137 {
2138 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2139 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2140 }
2141 }
2142 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2143 {
2144 /* Collect the reduction stmts and store them in
2145 SLP_TREE_SCALAR_STMTS. */
2146 while (next_info)
2147 {
2148 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2149 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2150 }
2151 /* Mark the first element of the reduction chain as reduction to properly
2152 transform the node. In the reduction analysis phase only the last
2153 element of the chain is marked as reduction. */
2154 STMT_VINFO_DEF_TYPE (stmt_info)
2155 = STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
2156 STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
2157 = STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
2158 }
2159 else if (constructor)
2160 {
2161 tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
2162 tree val;
2163 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
2164 {
2165 if (TREE_CODE (val) == SSA_NAME)
2166 {
2167 gimple* def = SSA_NAME_DEF_STMT (val);
2168 stmt_vec_info def_info = vinfo->lookup_stmt (def);
2169 /* Value is defined in another basic block. */
2170 if (!def_info)
2171 return false;
2172 scalar_stmts.safe_push (def_info);
2173 }
2174 else
2175 return false;
2176 }
2177 if (dump_enabled_p ())
2178 dump_printf_loc (MSG_NOTE, vect_location,
2179 "Analyzing vectorizable constructor: %G\n",
2180 stmt_info->stmt);
2181 }
2182 else
2183 {
2184 /* Collect reduction statements. */
2185 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2186 for (i = 0; reductions.iterate (i, &next_info); i++)
2187 scalar_stmts.safe_push (next_info);
2188 }
2189
2190 /* Build the tree for the SLP instance. */
2191 bool *matches = XALLOCAVEC (bool, group_size);
2192 unsigned npermutes = 0;
2193 poly_uint64 max_nunits = nunits;
2194 unsigned tree_size = 0;
2195 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2196 &max_nunits, matches, &npermutes,
2197 &tree_size, bst_map);
2198 if (node != NULL)
2199 {
2200 /* If this is a reduction chain with a conversion in front
2201 amend the SLP tree with a node for that. */
2202 if (!dr
2203 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
2204 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
2205 {
2206 /* Get at the conversion stmt - we know it's the single use
2207 of the last stmt of the reduction chain. */
2208 gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
2209 use_operand_p use_p;
2210 gimple *use_stmt;
2211 bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
2212 gcc_assert (r);
2213 next_info = vinfo->lookup_stmt (use_stmt);
2214 next_info = vect_stmt_to_vectorize (next_info);
2215 scalar_stmts = vNULL;
2216 scalar_stmts.create (group_size);
2217 for (unsigned i = 0; i < group_size; ++i)
2218 scalar_stmts.quick_push (next_info);
2219 slp_tree conv = vect_create_new_slp_node (scalar_stmts);
2220 SLP_TREE_CHILDREN (conv).quick_push (node);
2221 node = conv;
2222 /* We also have to fake this conversion stmt as SLP reduction group
2223 so we don't have to mess with too much code elsewhere. */
2224 REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
2225 REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
2226 }
2227
2228 /* Calculate the unrolling factor based on the smallest type. */
2229 poly_uint64 unrolling_factor
2230 = calculate_unrolling_factor (max_nunits, group_size);
2231
2232 if (maybe_ne (unrolling_factor, 1U)
2233 && is_a <bb_vec_info> (vinfo))
2234 {
2235 unsigned HOST_WIDE_INT const_max_nunits;
2236 if (!max_nunits.is_constant (&const_max_nunits)
2237 || const_max_nunits > group_size)
2238 {
2239 if (dump_enabled_p ())
2240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2241 "Build SLP failed: store group "
2242 "size not a multiple of the vector size "
2243 "in basic block SLP\n");
2244 vect_free_slp_tree (node, false);
2245 return false;
2246 }
2247 /* Fatal mismatch. */
2248 matches[group_size / const_max_nunits * const_max_nunits] = false;
2249 vect_free_slp_tree (node, false);
2250 }
2251 else
2252 {
2253 /* Create a new SLP instance. */
2254 new_instance = XNEW (class _slp_instance);
2255 SLP_INSTANCE_TREE (new_instance) = node;
2256 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
2257 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
2258 SLP_INSTANCE_LOADS (new_instance) = vNULL;
2259 SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
2260
2261 vect_gather_slp_loads (new_instance, node);
2262 if (dump_enabled_p ())
2263 dump_printf_loc (MSG_NOTE, vect_location,
2264 "SLP size %u vs. limit %u.\n",
2265 tree_size, max_tree_size);
2266
2267 /* Compute the load permutation. */
2268 slp_tree load_node;
2269 bool loads_permuted = false;
2270 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2271 {
2272 vec<unsigned> load_permutation;
2273 int j;
2274 stmt_vec_info load_info;
2275 bool this_load_permuted = false;
2276 load_permutation.create (group_size);
2277 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2278 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2279 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2280 {
2281 int load_place = vect_get_place_in_interleaving_chain
2282 (load_info, first_stmt_info);
2283 gcc_assert (load_place != -1);
2284 if (load_place != j)
2285 this_load_permuted = true;
2286 load_permutation.safe_push (load_place);
2287 }
2288 if (!this_load_permuted
2289 /* The load requires permutation when unrolling exposes
2290 a gap either because the group is larger than the SLP
2291 group-size or because there is a gap between the groups. */
2292 && (known_eq (unrolling_factor, 1U)
2293 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2294 && DR_GROUP_GAP (first_stmt_info) == 0)))
2295 {
2296 load_permutation.release ();
2297 continue;
2298 }
2299 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2300 loads_permuted = true;
2301 }
2302
2303 if (loads_permuted)
2304 {
2305 if (!vect_supported_load_permutation_p (new_instance))
2306 {
2307 if (dump_enabled_p ())
2308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2309 "Build SLP failed: unsupported load "
2310 "permutation %G", stmt_info->stmt);
2311 vect_free_slp_instance (new_instance, false);
2312 return false;
2313 }
2314 }
2315
2316 /* If the loads and stores can be handled with load/store-lan
2317 instructions do not generate this SLP instance. */
2318 if (is_a <loop_vec_info> (vinfo)
2319 && loads_permuted
2320 && dr && vect_store_lanes_supported (vectype, group_size, false))
2321 {
2322 slp_tree load_node;
2323 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (new_instance), i, load_node)
2324 {
2325 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2326 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2327 /* Use SLP for strided accesses (or if we can't load-lanes). */
2328 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2329 || ! vect_load_lanes_supported
2330 (STMT_VINFO_VECTYPE (stmt_vinfo),
2331 DR_GROUP_SIZE (stmt_vinfo), false))
2332 break;
2333 }
2334 if (i == SLP_INSTANCE_LOADS (new_instance).length ())
2335 {
2336 if (dump_enabled_p ())
2337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2338 "Built SLP cancelled: can use "
2339 "load/store-lanes\n");
2340 vect_free_slp_instance (new_instance, false);
2341 return false;
2342 }
2343 }
2344
2345 vinfo->slp_instances.safe_push (new_instance);
2346
2347 if (dump_enabled_p ())
2348 {
2349 dump_printf_loc (MSG_NOTE, vect_location,
2350 "Final SLP tree for instance:\n");
2351 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2352 }
2353
2354 return true;
2355 }
2356 }
2357 else
2358 {
2359 /* Failed to SLP. */
2360 /* Free the allocated memory. */
2361 scalar_stmts.release ();
2362 }
2363
2364 /* For basic block SLP, try to break the group up into multiples of the
2365 vector size. */
2366 unsigned HOST_WIDE_INT const_nunits;
2367 if (is_a <bb_vec_info> (vinfo)
2368 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2369 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2370 && nunits.is_constant (&const_nunits))
2371 {
2372 /* We consider breaking the group only on VF boundaries from the existing
2373 start. */
2374 for (i = 0; i < group_size; i++)
2375 if (!matches[i]) break;
2376
2377 if (i >= const_nunits && i < group_size)
2378 {
2379 /* Split into two groups at the first vector boundary before i. */
2380 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2381 unsigned group1_size = i & ~(const_nunits - 1);
2382
2383 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2384 group1_size);
2385 bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
2386 max_tree_size);
2387 /* If the first non-match was in the middle of a vector,
2388 skip the rest of that vector. */
2389 if (group1_size < i)
2390 {
2391 i = group1_size + const_nunits;
2392 if (i < group_size)
2393 rest = vect_split_slp_store_group (rest, const_nunits);
2394 }
2395 if (i < group_size)
2396 res |= vect_analyze_slp_instance (vinfo, bst_map,
2397 rest, max_tree_size);
2398 return res;
2399 }
2400 /* Even though the first vector did not all match, we might be able to SLP
2401 (some) of the remainder. FORNOW ignore this possibility. */
2402 }
2403
2404 return false;
2405 }
2406
2407
2408 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2409 trees of packed scalar stmts if SLP is possible. */
2410
2411 opt_result
2412 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2413 {
2414 unsigned int i;
2415 stmt_vec_info first_element;
2416
2417 DUMP_VECT_SCOPE ("vect_analyze_slp");
2418
2419 scalar_stmts_to_slp_tree_map_t *bst_map
2420 = new scalar_stmts_to_slp_tree_map_t ();
2421
2422 /* Find SLP sequences starting from groups of grouped stores. */
2423 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2424 vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
2425
2426 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2427 {
2428 if (loop_vinfo->reduction_chains.length () > 0)
2429 {
2430 /* Find SLP sequences starting from reduction chains. */
2431 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2432 if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
2433 max_tree_size))
2434 {
2435 /* Dissolve reduction chain group. */
2436 stmt_vec_info vinfo = first_element;
2437 stmt_vec_info last = NULL;
2438 while (vinfo)
2439 {
2440 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2441 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2442 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2443 last = vinfo;
2444 vinfo = next;
2445 }
2446 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2447 /* It can be still vectorized as part of an SLP reduction. */
2448 loop_vinfo->reductions.safe_push (last);
2449 }
2450 }
2451
2452 /* Find SLP sequences starting from groups of reductions. */
2453 if (loop_vinfo->reductions.length () > 1)
2454 vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
2455 max_tree_size);
2456 }
2457
2458 /* The map keeps a reference on SLP nodes built, release that. */
2459 for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
2460 it != bst_map->end (); ++it)
2461 if ((*it).second)
2462 vect_free_slp_tree ((*it).second, false);
2463 delete bst_map;
2464
2465 return opt_result::success ();
2466 }
2467
2468
2469 /* For each possible SLP instance decide whether to SLP it and calculate overall
2470 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2471 least one instance. */
2472
2473 bool
2474 vect_make_slp_decision (loop_vec_info loop_vinfo)
2475 {
2476 unsigned int i;
2477 poly_uint64 unrolling_factor = 1;
2478 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2479 slp_instance instance;
2480 int decided_to_slp = 0;
2481
2482 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2483
2484 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2485 {
2486 /* FORNOW: SLP if you can. */
2487 /* All unroll factors have the form:
2488
2489 GET_MODE_SIZE (vinfo->vector_mode) * X
2490
2491 for some rational X, so they must have a common multiple. */
2492 unrolling_factor
2493 = force_common_multiple (unrolling_factor,
2494 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2495
2496 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2497 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2498 loop-based vectorization. Such stmts will be marked as HYBRID. */
2499 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
2500 decided_to_slp++;
2501 }
2502
2503 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2504
2505 if (decided_to_slp && dump_enabled_p ())
2506 {
2507 dump_printf_loc (MSG_NOTE, vect_location,
2508 "Decided to SLP %d instances. Unrolling factor ",
2509 decided_to_slp);
2510 dump_dec (MSG_NOTE, unrolling_factor);
2511 dump_printf (MSG_NOTE, "\n");
2512 }
2513
2514 return (decided_to_slp > 0);
2515 }
2516
2517
2518 /* Find stmts that must be both vectorized and SLPed (since they feed stmts that
2519 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2520
2521 static void
2522 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
2523 hash_map<slp_tree, unsigned> &visited)
2524 {
2525 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2526 imm_use_iterator imm_iter;
2527 gimple *use_stmt;
2528 stmt_vec_info use_vinfo;
2529 slp_tree child;
2530 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2531 int j;
2532
2533 /* We need to union stype over the incoming graph edges but we still
2534 want to limit recursion to stay O(N+E). */
2535 unsigned visited_cnt = ++visited.get_or_insert (node);
2536 gcc_assert (visited_cnt <= node->refcnt);
2537 bool only_edge = (visited_cnt != node->refcnt);
2538
2539 /* Propagate hybrid down the SLP tree. */
2540 if (stype == hybrid)
2541 ;
2542 else if (HYBRID_SLP_STMT (stmt_vinfo))
2543 stype = hybrid;
2544 else if (!only_edge)
2545 {
2546 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2547 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2548 /* If we get a pattern stmt here we have to use the LHS of the
2549 original stmt for immediate uses. */
2550 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2551 tree def;
2552 if (gimple_code (stmt) == GIMPLE_PHI)
2553 def = gimple_phi_result (stmt);
2554 else
2555 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2556 if (def)
2557 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2558 {
2559 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2560 if (!use_vinfo)
2561 continue;
2562 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2563 if (!STMT_SLP_TYPE (use_vinfo)
2564 && (STMT_VINFO_RELEVANT (use_vinfo)
2565 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2566 && !(gimple_code (use_stmt) == GIMPLE_PHI
2567 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2568 {
2569 if (dump_enabled_p ())
2570 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2571 "def in non-SLP stmt: %G", use_stmt);
2572 stype = hybrid;
2573 }
2574 }
2575 }
2576
2577 if (stype == hybrid
2578 && !HYBRID_SLP_STMT (stmt_vinfo))
2579 {
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2582 stmt_vinfo->stmt);
2583 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2584 }
2585
2586 if (!only_edge)
2587 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2588 if (SLP_TREE_DEF_TYPE (child) != vect_external_def
2589 && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
2590 vect_detect_hybrid_slp_stmts (child, i, stype, visited);
2591 }
2592
2593 /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
2594
2595 static tree
2596 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2597 {
2598 walk_stmt_info *wi = (walk_stmt_info *)data;
2599 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2600
2601 if (wi->is_lhs)
2602 return NULL_TREE;
2603
2604 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2605 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2606 {
2607 if (dump_enabled_p ())
2608 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2609 def_stmt_info->stmt);
2610 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2611 }
2612
2613 return NULL_TREE;
2614 }
2615
2616 static tree
2617 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2618 walk_stmt_info *wi)
2619 {
2620 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2621 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2622 /* If the stmt is in a SLP instance then this isn't a reason
2623 to mark use definitions in other SLP instances as hybrid. */
2624 if (! STMT_SLP_TYPE (use_vinfo)
2625 && (STMT_VINFO_RELEVANT (use_vinfo)
2626 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2627 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2628 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2629 ;
2630 else
2631 *handled = true;
2632 return NULL_TREE;
2633 }
2634
2635 /* Find stmts that must be both vectorized and SLPed. */
2636
2637 void
2638 vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2639 {
2640 unsigned int i;
2641 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2642 slp_instance instance;
2643
2644 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2645
2646 /* First walk all pattern stmt in the loop and mark defs of uses as
2647 hybrid because immediate uses in them are not recorded. */
2648 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2649 {
2650 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2651 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2652 gsi_next (&gsi))
2653 {
2654 gimple *stmt = gsi_stmt (gsi);
2655 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2656 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2657 {
2658 walk_stmt_info wi;
2659 memset (&wi, 0, sizeof (wi));
2660 wi.info = loop_vinfo;
2661 gimple_stmt_iterator gsi2
2662 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2663 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2664 vect_detect_hybrid_slp_1, &wi);
2665 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2666 vect_detect_hybrid_slp_2,
2667 vect_detect_hybrid_slp_1, &wi);
2668 }
2669 }
2670 }
2671
2672 /* Then walk the SLP instance trees marking stmts with uses in
2673 non-SLP stmts as hybrid, also propagating hybrid down the
2674 SLP tree, collecting the above info on-the-fly. */
2675 for (unsigned j = 0;; ++j)
2676 {
2677 hash_map<slp_tree, unsigned> visited;
2678 bool any = false;
2679 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2680 if (j < SLP_INSTANCE_GROUP_SIZE (instance))
2681 {
2682 any = true;
2683 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2684 j, pure_slp, visited);
2685 }
2686 if (!any)
2687 break;
2688 }
2689 }
2690
2691
2692 /* Initialize a bb_vec_info struct for the statements between
2693 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2694
2695 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2696 gimple_stmt_iterator region_end_in,
2697 vec_info_shared *shared)
2698 : vec_info (vec_info::bb, init_cost (NULL), shared),
2699 bb (gsi_bb (region_begin_in)),
2700 region_begin (region_begin_in),
2701 region_end (region_end_in)
2702 {
2703 gimple_stmt_iterator gsi;
2704
2705 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2706 gsi_next (&gsi))
2707 {
2708 gimple *stmt = gsi_stmt (gsi);
2709 gimple_set_uid (stmt, 0);
2710 add_stmt (stmt);
2711 }
2712
2713 bb->aux = this;
2714 }
2715
2716
2717 /* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2718 stmts in the basic block. */
2719
2720 _bb_vec_info::~_bb_vec_info ()
2721 {
2722 for (gimple_stmt_iterator si = region_begin;
2723 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2724 /* Reset region marker. */
2725 gimple_set_uid (gsi_stmt (si), -1);
2726
2727 bb->aux = NULL;
2728 }
2729
2730 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2731 given then that child nodes have already been processed, and that
2732 their def types currently match their SLP node's def type. */
2733
2734 static bool
2735 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2736 slp_instance node_instance,
2737 stmt_vector_for_cost *cost_vec)
2738 {
2739 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2740 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2741
2742 /* Calculate the number of vector statements to be created for the
2743 scalar stmts in this node. For SLP reductions it is equal to the
2744 number of vector statements in the children (which has already been
2745 calculated by the recursive call). Otherwise it is the number of
2746 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2747 VF divided by the number of elements in a vector. */
2748 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2749 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2750 {
2751 for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
2752 if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
2753 {
2754 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2755 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
2756 break;
2757 }
2758 }
2759 else
2760 {
2761 poly_uint64 vf;
2762 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2763 vf = loop_vinfo->vectorization_factor;
2764 else
2765 vf = 1;
2766 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2767 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2768 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2769 = vect_get_num_vectors (vf * group_size, vectype);
2770 }
2771
2772 bool dummy;
2773 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2774 }
2775
2776 /* Try to build NODE from scalars, returning true on success.
2777 NODE_INSTANCE is the SLP instance that contains NODE. */
2778
2779 static bool
2780 vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
2781 slp_instance node_instance)
2782 {
2783 stmt_vec_info stmt_info;
2784 unsigned int i;
2785
2786 if (!is_a <bb_vec_info> (vinfo)
2787 || node == SLP_INSTANCE_TREE (node_instance)
2788 || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
2789 return false;
2790
2791 if (dump_enabled_p ())
2792 dump_printf_loc (MSG_NOTE, vect_location,
2793 "Building vector operands from scalars instead\n");
2794
2795 /* Don't remove and free the child nodes here, since they could be
2796 referenced by other structures. The analysis and scheduling phases
2797 (need to) ignore child nodes of anything that isn't vect_internal_def. */
2798 unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
2799 SLP_TREE_DEF_TYPE (node) = vect_external_def;
2800 SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
2801 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2802 {
2803 tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
2804 SLP_TREE_SCALAR_OPS (node)[i] = lhs;
2805 }
2806 return true;
2807 }
2808
2809 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2810 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2811
2812 Return true if the operations are supported. */
2813
2814 static bool
2815 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2816 slp_instance node_instance,
2817 hash_set<slp_tree> &visited,
2818 hash_set<slp_tree> &lvisited,
2819 stmt_vector_for_cost *cost_vec)
2820 {
2821 int i, j;
2822 slp_tree child;
2823
2824 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2825 return true;
2826
2827 /* If we already analyzed the exact same set of scalar stmts we're done.
2828 We share the generated vector stmts for those.
2829 The SLP graph is acyclic so not caching whether we failed or succeeded
2830 doesn't result in any issue since we throw away the lvisited set
2831 when we fail. */
2832 if (visited.contains (node)
2833 || lvisited.add (node))
2834 return true;
2835
2836 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2837 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2838 visited, lvisited, cost_vec))
2839 return false;
2840
2841 /* ??? We have to catch the case late where two first scalar stmts appear
2842 in multiple SLP children with different def type and fail. Remember
2843 original def types first since SLP_TREE_DEF_TYPE doesn't necessarily
2844 match it when that is vect_internal_def. */
2845 auto_vec<vect_def_type, 4> dt;
2846 dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
2847 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2848 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2849 dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
2850
2851 /* Push SLP node def-type to stmt operands. */
2852 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2853 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
2854 && SLP_TREE_SCALAR_STMTS (child).length () != 0)
2855 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2856 = SLP_TREE_DEF_TYPE (child);
2857
2858 /* Check everything worked out. */
2859 bool res = true;
2860 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2861 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2862 {
2863 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2864 {
2865 if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2866 != SLP_TREE_DEF_TYPE (child))
2867 res = false;
2868 }
2869 else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2870 != dt[j])
2871 res = false;
2872 }
2873 if (!res && dump_enabled_p ())
2874 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2875 "not vectorized: same operand with different "
2876 "def type in stmt.\n");
2877
2878 if (res)
2879 res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2880 cost_vec);
2881
2882 /* Restore def-types. */
2883 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2884 if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
2885 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
2886
2887 /* If this node can't be vectorized, try pruning the tree here rather
2888 than felling the whole thing. */
2889 if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
2890 res = true;
2891
2892 return res;
2893 }
2894
2895
2896 /* Analyze statements in SLP instances of VINFO. Return true if the
2897 operations are supported. */
2898
2899 bool
2900 vect_slp_analyze_operations (vec_info *vinfo)
2901 {
2902 slp_instance instance;
2903 int i;
2904
2905 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2906
2907 hash_set<slp_tree> visited;
2908 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2909 {
2910 hash_set<slp_tree> lvisited;
2911 stmt_vector_for_cost cost_vec;
2912 cost_vec.create (2);
2913 if (!vect_slp_analyze_node_operations (vinfo,
2914 SLP_INSTANCE_TREE (instance),
2915 instance, visited, lvisited,
2916 &cost_vec)
2917 /* Instances with a root stmt require vectorized defs for the
2918 SLP tree root. */
2919 || (SLP_INSTANCE_ROOT_STMT (instance)
2920 && (SLP_TREE_DEF_TYPE (SLP_INSTANCE_TREE (instance))
2921 != vect_internal_def)))
2922 {
2923 slp_tree node = SLP_INSTANCE_TREE (instance);
2924 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2925 if (dump_enabled_p ())
2926 dump_printf_loc (MSG_NOTE, vect_location,
2927 "removing SLP instance operations starting from: %G",
2928 stmt_info->stmt);
2929 vect_free_slp_instance (instance, false);
2930 vinfo->slp_instances.ordered_remove (i);
2931 cost_vec.release ();
2932 }
2933 else
2934 {
2935 for (hash_set<slp_tree>::iterator x = lvisited.begin();
2936 x != lvisited.end(); ++x)
2937 visited.add (*x);
2938 i++;
2939
2940 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2941 cost_vec.release ();
2942 }
2943 }
2944
2945 return !vinfo->slp_instances.is_empty ();
2946 }
2947
2948
2949 /* Compute the scalar cost of the SLP node NODE and its children
2950 and return it. Do not account defs that are marked in LIFE and
2951 update LIFE according to uses of NODE. */
2952
2953 static void
2954 vect_bb_slp_scalar_cost (basic_block bb,
2955 slp_tree node, vec<bool, va_heap> *life,
2956 stmt_vector_for_cost *cost_vec,
2957 hash_set<slp_tree> &visited)
2958 {
2959 unsigned i;
2960 stmt_vec_info stmt_info;
2961 slp_tree child;
2962
2963 if (visited.add (node))
2964 return;
2965
2966 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2967 {
2968 gimple *stmt = stmt_info->stmt;
2969 vec_info *vinfo = stmt_info->vinfo;
2970 ssa_op_iter op_iter;
2971 def_operand_p def_p;
2972
2973 if ((*life)[i])
2974 continue;
2975
2976 /* If there is a non-vectorized use of the defs then the scalar
2977 stmt is kept live in which case we do not account it or any
2978 required defs in the SLP children in the scalar cost. This
2979 way we make the vectorization more costly when compared to
2980 the scalar cost. */
2981 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2982 {
2983 imm_use_iterator use_iter;
2984 gimple *use_stmt;
2985 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2986 if (!is_gimple_debug (use_stmt))
2987 {
2988 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2989 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2990 {
2991 (*life)[i] = true;
2992 BREAK_FROM_IMM_USE_STMT (use_iter);
2993 }
2994 }
2995 }
2996 if ((*life)[i])
2997 continue;
2998
2999 /* Count scalar stmts only once. */
3000 if (gimple_visited_p (stmt))
3001 continue;
3002 gimple_set_visited (stmt, true);
3003
3004 vect_cost_for_stmt kind;
3005 if (STMT_VINFO_DATA_REF (stmt_info))
3006 {
3007 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
3008 kind = scalar_load;
3009 else
3010 kind = scalar_store;
3011 }
3012 else if (vect_nop_conversion_p (stmt_info))
3013 continue;
3014 else
3015 kind = scalar_stmt;
3016 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
3017 }
3018
3019 auto_vec<bool, 20> subtree_life;
3020 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3021 {
3022 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3023 {
3024 /* Do not directly pass LIFE to the recursive call, copy it to
3025 confine changes in the callee to the current child/subtree. */
3026 subtree_life.safe_splice (*life);
3027 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec,
3028 visited);
3029 subtree_life.truncate (0);
3030 }
3031 }
3032 }
3033
3034 /* Check if vectorization of the basic block is profitable. */
3035
3036 static bool
3037 vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
3038 {
3039 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3040 slp_instance instance;
3041 int i;
3042 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
3043 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
3044
3045 /* Calculate scalar cost. */
3046 stmt_vector_for_cost scalar_costs;
3047 scalar_costs.create (0);
3048 hash_set<slp_tree> visited;
3049 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3050 {
3051 auto_vec<bool, 20> life;
3052 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
3053 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
3054 SLP_INSTANCE_TREE (instance),
3055 &life, &scalar_costs, visited);
3056 }
3057 void *target_cost_data = init_cost (NULL);
3058 add_stmt_costs (target_cost_data, &scalar_costs);
3059 scalar_costs.release ();
3060 unsigned dummy;
3061 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
3062 destroy_cost_data (target_cost_data);
3063
3064 /* Unset visited flag. */
3065 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
3066 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3067 gimple_set_visited (gsi_stmt (gsi), false);
3068
3069 /* Complete the target-specific cost calculation. */
3070 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
3071 &vec_inside_cost, &vec_epilogue_cost);
3072
3073 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
3074
3075 if (dump_enabled_p ())
3076 {
3077 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
3078 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
3079 vec_inside_cost);
3080 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
3081 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
3082 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
3083 }
3084
3085 /* Vectorization is profitable if its cost is more than the cost of scalar
3086 version. Note that we err on the vector side for equal cost because
3087 the cost estimate is otherwise quite pessimistic (constant uses are
3088 free on the scalar side but cost a load on the vector side for
3089 example). */
3090 if (vec_outside_cost + vec_inside_cost > scalar_cost)
3091 return false;
3092
3093 return true;
3094 }
3095
3096 /* Find any vectorizable constructors and add them to the grouped_store
3097 array. */
3098
3099 static void
3100 vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
3101 {
3102 gimple_stmt_iterator gsi;
3103
3104 for (gsi = bb_vinfo->region_begin;
3105 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
3106 {
3107 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (gsi));
3108 if (!stmt || gimple_assign_rhs_code (stmt) != CONSTRUCTOR)
3109 continue;
3110
3111 tree rhs = gimple_assign_rhs1 (stmt);
3112 if (!VECTOR_TYPE_P (TREE_TYPE (rhs))
3113 || maybe_ne (TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs)),
3114 CONSTRUCTOR_NELTS (rhs))
3115 || VECTOR_TYPE_P (TREE_TYPE (CONSTRUCTOR_ELT (rhs, 0)->value))
3116 || uniform_vector_p (rhs))
3117 continue;
3118
3119 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
3120 BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
3121 }
3122 }
3123
3124 /* Check if the region described by BB_VINFO can be vectorized, returning
3125 true if so. When returning false, set FATAL to true if the same failure
3126 would prevent vectorization at other vector sizes, false if it is still
3127 worth trying other sizes. N_STMTS is the number of statements in the
3128 region. */
3129
3130 static bool
3131 vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
3132 {
3133 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
3134
3135 slp_instance instance;
3136 int i;
3137 poly_uint64 min_vf = 2;
3138
3139 /* The first group of checks is independent of the vector size. */
3140 fatal = true;
3141
3142 /* Analyze the data references. */
3143
3144 if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
3145 {
3146 if (dump_enabled_p ())
3147 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3148 "not vectorized: unhandled data-ref in basic "
3149 "block.\n");
3150 return false;
3151 }
3152
3153 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
3154 {
3155 if (dump_enabled_p ())
3156 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3157 "not vectorized: not enough data-refs in "
3158 "basic block.\n");
3159 return false;
3160 }
3161
3162 if (!vect_analyze_data_ref_accesses (bb_vinfo))
3163 {
3164 if (dump_enabled_p ())
3165 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3166 "not vectorized: unhandled data access in "
3167 "basic block.\n");
3168 return false;
3169 }
3170
3171 vect_slp_check_for_constructors (bb_vinfo);
3172
3173 /* If there are no grouped stores in the region there is no need
3174 to continue with pattern recog as vect_analyze_slp will fail
3175 anyway. */
3176 if (bb_vinfo->grouped_stores.is_empty ())
3177 {
3178 if (dump_enabled_p ())
3179 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3180 "not vectorized: no grouped stores in "
3181 "basic block.\n");
3182 return false;
3183 }
3184
3185 /* While the rest of the analysis below depends on it in some way. */
3186 fatal = false;
3187
3188 vect_pattern_recog (bb_vinfo);
3189
3190 /* Check the SLP opportunities in the basic block, analyze and build SLP
3191 trees. */
3192 if (!vect_analyze_slp (bb_vinfo, n_stmts))
3193 {
3194 if (dump_enabled_p ())
3195 {
3196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3197 "Failed to SLP the basic block.\n");
3198 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3199 "not vectorized: failed to find SLP opportunities "
3200 "in basic block.\n");
3201 }
3202 return false;
3203 }
3204
3205 vect_record_base_alignments (bb_vinfo);
3206
3207 /* Analyze and verify the alignment of data references and the
3208 dependence in the SLP instances. */
3209 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
3210 {
3211 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
3212 || ! vect_slp_analyze_instance_dependence (instance))
3213 {
3214 slp_tree node = SLP_INSTANCE_TREE (instance);
3215 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3216 if (dump_enabled_p ())
3217 dump_printf_loc (MSG_NOTE, vect_location,
3218 "removing SLP instance operations starting from: %G",
3219 stmt_info->stmt);
3220 vect_free_slp_instance (instance, false);
3221 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
3222 continue;
3223 }
3224
3225 /* Mark all the statements that we want to vectorize as pure SLP and
3226 relevant. */
3227 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
3228 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
3229 if (SLP_INSTANCE_ROOT_STMT (instance))
3230 STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
3231
3232 i++;
3233 }
3234 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
3235 return false;
3236
3237 if (!vect_slp_analyze_operations (bb_vinfo))
3238 {
3239 if (dump_enabled_p ())
3240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3241 "not vectorized: bad operation in basic block.\n");
3242 return false;
3243 }
3244
3245 /* Cost model: check if the vectorization is worthwhile. */
3246 if (!unlimited_cost_model (NULL)
3247 && !vect_bb_vectorization_profitable_p (bb_vinfo))
3248 {
3249 if (dump_enabled_p ())
3250 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3251 "not vectorized: vectorization is not "
3252 "profitable.\n");
3253 return false;
3254 }
3255
3256 if (dump_enabled_p ())
3257 dump_printf_loc (MSG_NOTE, vect_location,
3258 "Basic block will be vectorized using SLP\n");
3259 return true;
3260 }
3261
3262 /* Subroutine of vect_slp_bb. Try to vectorize the statements between
3263 REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
3264 on success. The region has N_STMTS statements and has the datarefs
3265 given by DATAREFS. */
3266
3267 static bool
3268 vect_slp_bb_region (gimple_stmt_iterator region_begin,
3269 gimple_stmt_iterator region_end,
3270 vec<data_reference_p> datarefs,
3271 unsigned int n_stmts)
3272 {
3273 bb_vec_info bb_vinfo;
3274 auto_vector_modes vector_modes;
3275
3276 /* Autodetect first vector size we try. */
3277 machine_mode next_vector_mode = VOIDmode;
3278 targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
3279 unsigned int mode_i = 0;
3280
3281 vec_info_shared shared;
3282
3283 machine_mode autodetected_vector_mode = VOIDmode;
3284 while (1)
3285 {
3286 bool vectorized = false;
3287 bool fatal = false;
3288 bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
3289
3290 bool first_time_p = shared.datarefs.is_empty ();
3291 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
3292 if (first_time_p)
3293 bb_vinfo->shared->save_datarefs ();
3294 else
3295 bb_vinfo->shared->check_datarefs ();
3296 bb_vinfo->vector_mode = next_vector_mode;
3297
3298 if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
3299 && dbg_cnt (vect_slp))
3300 {
3301 if (dump_enabled_p ())
3302 {
3303 dump_printf_loc (MSG_NOTE, vect_location,
3304 "***** Analysis succeeded with vector mode"
3305 " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
3306 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3307 }
3308
3309 bb_vinfo->shared->check_datarefs ();
3310 vect_schedule_slp (bb_vinfo);
3311
3312 unsigned HOST_WIDE_INT bytes;
3313 if (dump_enabled_p ())
3314 {
3315 if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
3316 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3317 "basic block part vectorized using %wu byte "
3318 "vectors\n", bytes);
3319 else
3320 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
3321 "basic block part vectorized using variable "
3322 "length vectors\n");
3323 }
3324
3325 vectorized = true;
3326 }
3327 else
3328 {
3329 if (dump_enabled_p ())
3330 dump_printf_loc (MSG_NOTE, vect_location,
3331 "***** Analysis failed with vector mode %s\n",
3332 GET_MODE_NAME (bb_vinfo->vector_mode));
3333 }
3334
3335 if (mode_i == 0)
3336 autodetected_vector_mode = bb_vinfo->vector_mode;
3337
3338 if (!fatal)
3339 while (mode_i < vector_modes.length ()
3340 && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
3341 {
3342 if (dump_enabled_p ())
3343 dump_printf_loc (MSG_NOTE, vect_location,
3344 "***** The result for vector mode %s would"
3345 " be the same\n",
3346 GET_MODE_NAME (vector_modes[mode_i]));
3347 mode_i += 1;
3348 }
3349
3350 delete bb_vinfo;
3351
3352 if (mode_i < vector_modes.length ()
3353 && VECTOR_MODE_P (autodetected_vector_mode)
3354 && (related_vector_mode (vector_modes[mode_i],
3355 GET_MODE_INNER (autodetected_vector_mode))
3356 == autodetected_vector_mode)
3357 && (related_vector_mode (autodetected_vector_mode,
3358 GET_MODE_INNER (vector_modes[mode_i]))
3359 == vector_modes[mode_i]))
3360 {
3361 if (dump_enabled_p ())
3362 dump_printf_loc (MSG_NOTE, vect_location,
3363 "***** Skipping vector mode %s, which would"
3364 " repeat the analysis for %s\n",
3365 GET_MODE_NAME (vector_modes[mode_i]),
3366 GET_MODE_NAME (autodetected_vector_mode));
3367 mode_i += 1;
3368 }
3369
3370 if (vectorized
3371 || mode_i == vector_modes.length ()
3372 || autodetected_vector_mode == VOIDmode
3373 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3374 vector sizes will fail do not bother iterating. */
3375 || fatal)
3376 return vectorized;
3377
3378 /* Try the next biggest vector size. */
3379 next_vector_mode = vector_modes[mode_i++];
3380 if (dump_enabled_p ())
3381 dump_printf_loc (MSG_NOTE, vect_location,
3382 "***** Re-trying analysis with vector mode %s\n",
3383 GET_MODE_NAME (next_vector_mode));
3384 }
3385 }
3386
3387 /* Main entry for the BB vectorizer. Analyze and transform BB, returns
3388 true if anything in the basic-block was vectorized. */
3389
3390 bool
3391 vect_slp_bb (basic_block bb)
3392 {
3393 gimple_stmt_iterator gsi;
3394 bool any_vectorized = false;
3395
3396 gsi = gsi_start_bb (bb);
3397 while (!gsi_end_p (gsi))
3398 {
3399 gimple_stmt_iterator region_begin = gsi;
3400 vec<data_reference_p> datarefs = vNULL;
3401 int insns = 0;
3402
3403 for (; !gsi_end_p (gsi); gsi_next (&gsi))
3404 {
3405 gimple *stmt = gsi_stmt (gsi);
3406 if (is_gimple_debug (stmt))
3407 continue;
3408 insns++;
3409
3410 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3411 vect_location = stmt;
3412
3413 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3414 break;
3415 }
3416
3417 /* Skip leading unhandled stmts. */
3418 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3419 {
3420 gsi_next (&gsi);
3421 continue;
3422 }
3423
3424 gimple_stmt_iterator region_end = gsi;
3425
3426 if (insns > param_slp_max_insns_in_bb)
3427 {
3428 if (dump_enabled_p ())
3429 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3430 "not vectorized: too many instructions in "
3431 "basic block.\n");
3432 }
3433 else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
3434 any_vectorized = true;
3435
3436 if (gsi_end_p (region_end))
3437 break;
3438
3439 /* Skip the unhandled stmt. */
3440 gsi_next (&gsi);
3441 }
3442
3443 return any_vectorized;
3444 }
3445
3446
3447 /* Return 1 if vector type STMT_VINFO is a boolean vector. */
3448
3449 static bool
3450 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, unsigned op_num)
3451 {
3452 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3453 tree op, vectype;
3454 enum vect_def_type dt;
3455
3456 /* For comparison and COND_EXPR type is chosen depending
3457 on the non-constant other comparison operand. */
3458 if (TREE_CODE_CLASS (code) == tcc_comparison)
3459 {
3460 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3461 op = gimple_assign_rhs1 (stmt);
3462
3463 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3464 gcc_unreachable ();
3465
3466 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3467 }
3468
3469 if (code == COND_EXPR)
3470 {
3471 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3472 tree cond = gimple_assign_rhs1 (stmt);
3473
3474 if (TREE_CODE (cond) == SSA_NAME)
3475 {
3476 if (op_num > 0)
3477 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3478 op = cond;
3479 }
3480 else
3481 {
3482 if (op_num > 1)
3483 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3484 op = TREE_OPERAND (cond, 0);
3485 }
3486
3487 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3488 gcc_unreachable ();
3489
3490 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3491 }
3492
3493 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3494 }
3495
3496 /* Build a variable-length vector in which the elements in ELTS are repeated
3497 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3498 RESULTS and add any new instructions to SEQ.
3499
3500 The approach we use is:
3501
3502 (1) Find a vector mode VM with integer elements of mode IM.
3503
3504 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3505 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3506 from small vectors to IM.
3507
3508 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3509
3510 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3511 correct byte contents.
3512
3513 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3514
3515 We try to find the largest IM for which this sequence works, in order
3516 to cut down on the number of interleaves. */
3517
3518 void
3519 duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
3520 vec<tree> elts, unsigned int nresults,
3521 vec<tree> &results)
3522 {
3523 unsigned int nelts = elts.length ();
3524 tree element_type = TREE_TYPE (vector_type);
3525
3526 /* (1) Find a vector mode VM with integer elements of mode IM. */
3527 unsigned int nvectors = 1;
3528 tree new_vector_type;
3529 tree permutes[2];
3530 if (!can_duplicate_and_interleave_p (vinfo, nelts, element_type,
3531 &nvectors, &new_vector_type,
3532 permutes))
3533 gcc_unreachable ();
3534
3535 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3536 unsigned int partial_nelts = nelts / nvectors;
3537 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3538
3539 tree_vector_builder partial_elts;
3540 auto_vec<tree, 32> pieces (nvectors * 2);
3541 pieces.quick_grow (nvectors * 2);
3542 for (unsigned int i = 0; i < nvectors; ++i)
3543 {
3544 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3545 ELTS' has mode IM. */
3546 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3547 for (unsigned int j = 0; j < partial_nelts; ++j)
3548 partial_elts.quick_push (elts[i * partial_nelts + j]);
3549 tree t = gimple_build_vector (seq, &partial_elts);
3550 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3551 TREE_TYPE (new_vector_type), t);
3552
3553 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3554 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3555 }
3556
3557 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3558 correct byte contents.
3559
3560 We need to repeat the following operation log2(nvectors) times:
3561
3562 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3563 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3564
3565 However, if each input repeats every N elements and the VF is
3566 a multiple of N * 2, the HI result is the same as the LO. */
3567 unsigned int in_start = 0;
3568 unsigned int out_start = nvectors;
3569 unsigned int hi_start = nvectors / 2;
3570 /* A bound on the number of outputs needed to produce NRESULTS results
3571 in the final iteration. */
3572 unsigned int noutputs_bound = nvectors * nresults;
3573 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3574 {
3575 noutputs_bound /= 2;
3576 unsigned int limit = MIN (noutputs_bound, nvectors);
3577 for (unsigned int i = 0; i < limit; ++i)
3578 {
3579 if ((i & 1) != 0
3580 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3581 2 * in_repeat))
3582 {
3583 pieces[out_start + i] = pieces[out_start + i - 1];
3584 continue;
3585 }
3586
3587 tree output = make_ssa_name (new_vector_type);
3588 tree input1 = pieces[in_start + (i / 2)];
3589 tree input2 = pieces[in_start + (i / 2) + hi_start];
3590 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3591 input1, input2,
3592 permutes[i & 1]);
3593 gimple_seq_add_stmt (seq, stmt);
3594 pieces[out_start + i] = output;
3595 }
3596 std::swap (in_start, out_start);
3597 }
3598
3599 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3600 results.reserve (nresults);
3601 for (unsigned int i = 0; i < nresults; ++i)
3602 if (i < nvectors)
3603 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3604 pieces[in_start + i]));
3605 else
3606 results.quick_push (results[i - nvectors]);
3607 }
3608
3609
3610 /* For constant and loop invariant defs of SLP_NODE this function returns
3611 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3612 OP_NODE determines the node for the operand containing the scalar
3613 operands. */
3614
3615 static void
3616 vect_get_constant_vectors (slp_tree slp_node, unsigned op_num,
3617 vec<tree> *vec_oprnds)
3618 {
3619 slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
3620 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3621 vec_info *vinfo = stmt_vinfo->vinfo;
3622 unsigned HOST_WIDE_INT nunits;
3623 tree vec_cst;
3624 unsigned j, number_of_places_left_in_vector;
3625 tree vector_type;
3626 tree vop;
3627 int group_size = op_node->ops.length ();
3628 unsigned int vec_num, i;
3629 unsigned number_of_copies = 1;
3630 bool constant_p;
3631 tree neutral_op = NULL;
3632 gimple_seq ctor_seq = NULL;
3633 auto_vec<tree, 16> permute_results;
3634
3635 /* ??? SLP analysis should compute the vector type for the
3636 constant / invariant and store it in the SLP node. */
3637 tree op = op_node->ops[0];
3638 /* Check if vector type is a boolean vector. */
3639 tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3640 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3641 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3642 vector_type = truth_type_for (stmt_vectype);
3643 else
3644 vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
3645
3646 /* ??? For lane-reducing ops we should also have the required number
3647 of vector stmts initialized rather than second-guessing here. */
3648 unsigned int number_of_vectors;
3649 if (is_gimple_assign (stmt_vinfo->stmt)
3650 && (gimple_assign_rhs_code (stmt_vinfo->stmt) == SAD_EXPR
3651 || gimple_assign_rhs_code (stmt_vinfo->stmt) == DOT_PROD_EXPR
3652 || gimple_assign_rhs_code (stmt_vinfo->stmt) == WIDEN_SUM_EXPR))
3653 number_of_vectors = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3654 else
3655 number_of_vectors
3656 = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
3657 * TYPE_VECTOR_SUBPARTS (stmt_vectype),
3658 vector_type);
3659 vec_oprnds->create (number_of_vectors);
3660 auto_vec<tree> voprnds (number_of_vectors);
3661
3662 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
3663 created vectors. It is greater than 1 if unrolling is performed.
3664
3665 For example, we have two scalar operands, s1 and s2 (e.g., group of
3666 strided accesses of size two), while NUNITS is four (i.e., four scalars
3667 of this type can be packed in a vector). The output vector will contain
3668 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
3669 will be 2).
3670
3671 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
3672 containing the operands.
3673
3674 For example, NUNITS is four as before, and the group size is 8
3675 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3676 {s5, s6, s7, s8}. */
3677
3678 /* When using duplicate_and_interleave, we just need one element for
3679 each scalar statement. */
3680 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3681 nunits = group_size;
3682
3683 number_of_copies = nunits * number_of_vectors / group_size;
3684
3685 number_of_places_left_in_vector = nunits;
3686 constant_p = true;
3687 tree_vector_builder elts (vector_type, nunits, 1);
3688 elts.quick_grow (nunits);
3689 bool place_after_defs = false;
3690 for (j = 0; j < number_of_copies; j++)
3691 {
3692 for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
3693 {
3694 /* Create 'vect_ = {op0,op1,...,opn}'. */
3695 number_of_places_left_in_vector--;
3696 tree orig_op = op;
3697 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
3698 {
3699 if (CONSTANT_CLASS_P (op))
3700 {
3701 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3702 {
3703 /* Can't use VIEW_CONVERT_EXPR for booleans because
3704 of possibly different sizes of scalar value and
3705 vector element. */
3706 if (integer_zerop (op))
3707 op = build_int_cst (TREE_TYPE (vector_type), 0);
3708 else if (integer_onep (op))
3709 op = build_all_ones_cst (TREE_TYPE (vector_type));
3710 else
3711 gcc_unreachable ();
3712 }
3713 else
3714 op = fold_unary (VIEW_CONVERT_EXPR,
3715 TREE_TYPE (vector_type), op);
3716 gcc_assert (op && CONSTANT_CLASS_P (op));
3717 }
3718 else
3719 {
3720 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
3721 gimple *init_stmt;
3722 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3723 {
3724 tree true_val
3725 = build_all_ones_cst (TREE_TYPE (vector_type));
3726 tree false_val
3727 = build_zero_cst (TREE_TYPE (vector_type));
3728 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
3729 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3730 op, true_val,
3731 false_val);
3732 }
3733 else
3734 {
3735 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3736 op);
3737 init_stmt
3738 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3739 op);
3740 }
3741 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3742 op = new_temp;
3743 }
3744 }
3745 elts[number_of_places_left_in_vector] = op;
3746 if (!CONSTANT_CLASS_P (op))
3747 constant_p = false;
3748 if (TREE_CODE (orig_op) == SSA_NAME
3749 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3750 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3751 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3752 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3753 place_after_defs = true;
3754
3755 if (number_of_places_left_in_vector == 0)
3756 {
3757 if (constant_p
3758 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3759 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3760 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3761 else
3762 {
3763 if (permute_results.is_empty ())
3764 duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
3765 elts, number_of_vectors,
3766 permute_results);
3767 vec_cst = permute_results[number_of_vectors - j - 1];
3768 }
3769 tree init;
3770 gimple_stmt_iterator gsi;
3771 if (place_after_defs)
3772 {
3773 stmt_vec_info last_stmt_info
3774 = vect_find_last_scalar_stmt_in_slp (slp_node);
3775 gsi = gsi_for_stmt (last_stmt_info->stmt);
3776 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3777 &gsi);
3778 }
3779 else
3780 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3781 NULL);
3782 if (ctor_seq != NULL)
3783 {
3784 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3785 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3786 ctor_seq = NULL;
3787 }
3788 voprnds.quick_push (init);
3789 place_after_defs = false;
3790 number_of_places_left_in_vector = nunits;
3791 constant_p = true;
3792 elts.new_vector (vector_type, nunits, 1);
3793 elts.quick_grow (nunits);
3794 }
3795 }
3796 }
3797
3798 /* Since the vectors are created in the reverse order, we should invert
3799 them. */
3800 vec_num = voprnds.length ();
3801 for (j = vec_num; j != 0; j--)
3802 {
3803 vop = voprnds[j - 1];
3804 vec_oprnds->quick_push (vop);
3805 }
3806
3807 /* In case that VF is greater than the unrolling factor needed for the SLP
3808 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3809 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
3810 to replicate the vectors. */
3811 while (number_of_vectors > vec_oprnds->length ())
3812 {
3813 tree neutral_vec = NULL;
3814
3815 if (neutral_op)
3816 {
3817 if (!neutral_vec)
3818 neutral_vec = build_vector_from_val (vector_type, neutral_op);
3819
3820 vec_oprnds->quick_push (neutral_vec);
3821 }
3822 else
3823 {
3824 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3825 vec_oprnds->quick_push (vop);
3826 }
3827 }
3828 }
3829
3830
3831 /* Get vectorized definitions from SLP_NODE that contains corresponding
3832 vectorized def-stmts. */
3833
3834 static void
3835 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3836 {
3837 stmt_vec_info vec_def_stmt_info;
3838 unsigned int i;
3839
3840 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3841
3842 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3843 vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
3844 }
3845
3846
3847 /* Get N vectorized definitions for SLP_NODE.
3848 If the scalar definitions are loop invariants or constants, collect them and
3849 call vect_get_constant_vectors() to create vector stmts.
3850 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
3851 must be stored in the corresponding child of SLP_NODE, and we call
3852 vect_get_slp_vect_defs () to retrieve them. */
3853
3854 void
3855 vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
3856 {
3857 if (n == -1U)
3858 n = SLP_TREE_CHILDREN (slp_node).length ();
3859
3860 for (unsigned i = 0; i < n; ++i)
3861 {
3862 slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
3863
3864 vec<tree> vec_defs = vNULL;
3865
3866 /* For each operand we check if it has vectorized definitions in a child
3867 node or we need to create them (for invariants and constants). */
3868 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3869 {
3870 vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
3871 vect_get_slp_vect_defs (child, &vec_defs);
3872 }
3873 else
3874 vect_get_constant_vectors (slp_node, i, &vec_defs);
3875
3876 vec_oprnds->quick_push (vec_defs);
3877 }
3878 }
3879
3880 /* Generate vector permute statements from a list of loads in DR_CHAIN.
3881 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
3882 permute statements for the SLP node NODE of the SLP instance
3883 SLP_NODE_INSTANCE. */
3884
3885 bool
3886 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3887 gimple_stmt_iterator *gsi, poly_uint64 vf,
3888 slp_instance slp_node_instance, bool analyze_only,
3889 unsigned *n_perms)
3890 {
3891 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3892 vec_info *vinfo = stmt_info->vinfo;
3893 int vec_index = 0;
3894 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3895 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3896 unsigned int mask_element;
3897 machine_mode mode;
3898
3899 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3900 return false;
3901
3902 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3903
3904 mode = TYPE_MODE (vectype);
3905 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3906
3907 /* Initialize the vect stmts of NODE to properly insert the generated
3908 stmts later. */
3909 if (! analyze_only)
3910 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3911 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3912 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
3913
3914 /* Generate permutation masks for every NODE. Number of masks for each NODE
3915 is equal to GROUP_SIZE.
3916 E.g., we have a group of three nodes with three loads from the same
3917 location in each node, and the vector size is 4. I.e., we have a
3918 a0b0c0a1b1c1... sequence and we need to create the following vectors:
3919 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3920 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3921 ...
3922
3923 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
3924 The last mask is illegal since we assume two operands for permute
3925 operation, and the mask element values can't be outside that range.
3926 Hence, the last mask must be converted into {2,5,5,5}.
3927 For the first two permutations we need the first and the second input
3928 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3929 we need the second and the third vectors: {b1,c1,a2,b2} and
3930 {c2,a3,b3,c3}. */
3931
3932 int vect_stmts_counter = 0;
3933 unsigned int index = 0;
3934 int first_vec_index = -1;
3935 int second_vec_index = -1;
3936 bool noop_p = true;
3937 *n_perms = 0;
3938
3939 vec_perm_builder mask;
3940 unsigned int nelts_to_build;
3941 unsigned int nvectors_per_build;
3942 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3943 && multiple_p (nunits, group_size));
3944 if (repeating_p)
3945 {
3946 /* A single vector contains a whole number of copies of the node, so:
3947 (a) all permutes can use the same mask; and
3948 (b) the permutes only need a single vector input. */
3949 mask.new_vector (nunits, group_size, 3);
3950 nelts_to_build = mask.encoded_nelts ();
3951 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3952 }
3953 else
3954 {
3955 /* We need to construct a separate mask for each vector statement. */
3956 unsigned HOST_WIDE_INT const_nunits, const_vf;
3957 if (!nunits.is_constant (&const_nunits)
3958 || !vf.is_constant (&const_vf))
3959 return false;
3960 mask.new_vector (const_nunits, const_nunits, 1);
3961 nelts_to_build = const_vf * group_size;
3962 nvectors_per_build = 1;
3963 }
3964
3965 unsigned int count = mask.encoded_nelts ();
3966 mask.quick_grow (count);
3967 vec_perm_indices indices;
3968
3969 for (unsigned int j = 0; j < nelts_to_build; j++)
3970 {
3971 unsigned int iter_num = j / group_size;
3972 unsigned int stmt_num = j % group_size;
3973 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3974 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3975 if (repeating_p)
3976 {
3977 first_vec_index = 0;
3978 mask_element = i;
3979 }
3980 else
3981 {
3982 /* Enforced before the loop when !repeating_p. */
3983 unsigned int const_nunits = nunits.to_constant ();
3984 vec_index = i / const_nunits;
3985 mask_element = i % const_nunits;
3986 if (vec_index == first_vec_index
3987 || first_vec_index == -1)
3988 {
3989 first_vec_index = vec_index;
3990 }
3991 else if (vec_index == second_vec_index
3992 || second_vec_index == -1)
3993 {
3994 second_vec_index = vec_index;
3995 mask_element += const_nunits;
3996 }
3997 else
3998 {
3999 if (dump_enabled_p ())
4000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
4001 "permutation requires at "
4002 "least three vectors %G",
4003 stmt_info->stmt);
4004 gcc_assert (analyze_only);
4005 return false;
4006 }
4007
4008 gcc_assert (mask_element < 2 * const_nunits);
4009 }
4010
4011 if (mask_element != index)
4012 noop_p = false;
4013 mask[index++] = mask_element;
4014
4015 if (index == count && !noop_p)
4016 {
4017 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
4018 if (!can_vec_perm_const_p (mode, indices))
4019 {
4020 if (dump_enabled_p ())
4021 {
4022 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
4023 vect_location,
4024 "unsupported vect permute { ");
4025 for (i = 0; i < count; ++i)
4026 {
4027 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
4028 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
4029 }
4030 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
4031 }
4032 gcc_assert (analyze_only);
4033 return false;
4034 }
4035
4036 ++*n_perms;
4037 }
4038
4039 if (index == count)
4040 {
4041 if (!analyze_only)
4042 {
4043 tree mask_vec = NULL_TREE;
4044
4045 if (! noop_p)
4046 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
4047
4048 if (second_vec_index == -1)
4049 second_vec_index = first_vec_index;
4050
4051 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
4052 {
4053 /* Generate the permute statement if necessary. */
4054 tree first_vec = dr_chain[first_vec_index + ri];
4055 tree second_vec = dr_chain[second_vec_index + ri];
4056 stmt_vec_info perm_stmt_info;
4057 if (! noop_p)
4058 {
4059 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4060 tree perm_dest
4061 = vect_create_destination_var (gimple_assign_lhs (stmt),
4062 vectype);
4063 perm_dest = make_ssa_name (perm_dest);
4064 gassign *perm_stmt
4065 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
4066 first_vec, second_vec,
4067 mask_vec);
4068 perm_stmt_info
4069 = vect_finish_stmt_generation (stmt_info, perm_stmt,
4070 gsi);
4071 }
4072 else
4073 /* If mask was NULL_TREE generate the requested
4074 identity transform. */
4075 perm_stmt_info = vinfo->lookup_def (first_vec);
4076
4077 /* Store the vector statement in NODE. */
4078 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
4079 = perm_stmt_info;
4080 }
4081 }
4082
4083 index = 0;
4084 first_vec_index = -1;
4085 second_vec_index = -1;
4086 noop_p = true;
4087 }
4088 }
4089
4090 return true;
4091 }
4092
4093 /* Vectorize SLP instance tree in postorder. */
4094
4095 static void
4096 vect_schedule_slp_instance (slp_tree node, slp_instance instance)
4097 {
4098 gimple_stmt_iterator si;
4099 stmt_vec_info stmt_info;
4100 unsigned int group_size;
4101 tree vectype;
4102 int i, j;
4103 slp_tree child;
4104
4105 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4106 return;
4107
4108 /* See if we have already vectorized the node in the graph of the
4109 SLP instance. */
4110 if (SLP_TREE_VEC_STMTS (node).exists ())
4111 return;
4112
4113 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4114 vect_schedule_slp_instance (child, instance);
4115
4116 /* Push SLP node def-type to stmts. */
4117 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4118 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4119 {
4120 stmt_vec_info child_stmt_info;
4121 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4122 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
4123 }
4124
4125 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
4126
4127 /* VECTYPE is the type of the destination. */
4128 vectype = STMT_VINFO_VECTYPE (stmt_info);
4129 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
4130 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
4131
4132 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
4133 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
4134
4135 if (dump_enabled_p ())
4136 dump_printf_loc (MSG_NOTE, vect_location,
4137 "------>vectorizing SLP node starting from: %G",
4138 stmt_info->stmt);
4139
4140 /* Vectorized stmts go before the last scalar stmt which is where
4141 all uses are ready. */
4142 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
4143 si = gsi_for_stmt (last_stmt_info->stmt);
4144
4145 /* Handle two-operation SLP nodes by vectorizing the group with
4146 both operations and then performing a merge. */
4147 bool done_p = false;
4148 if (SLP_TREE_TWO_OPERATORS (node))
4149 {
4150 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
4151 enum tree_code code0 = gimple_assign_rhs_code (stmt);
4152 enum tree_code ocode = ERROR_MARK;
4153 stmt_vec_info ostmt_info;
4154 vec_perm_builder mask (group_size, group_size, 1);
4155 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
4156 {
4157 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
4158 if (gimple_assign_rhs_code (ostmt) != code0)
4159 {
4160 mask.quick_push (1);
4161 ocode = gimple_assign_rhs_code (ostmt);
4162 }
4163 else
4164 mask.quick_push (0);
4165 }
4166 if (ocode != ERROR_MARK)
4167 {
4168 vec<stmt_vec_info> v0;
4169 vec<stmt_vec_info> v1;
4170 unsigned j;
4171 tree tmask = NULL_TREE;
4172 vect_transform_stmt (stmt_info, &si, node, instance);
4173 v0 = SLP_TREE_VEC_STMTS (node).copy ();
4174 SLP_TREE_VEC_STMTS (node).truncate (0);
4175 gimple_assign_set_rhs_code (stmt, ocode);
4176 vect_transform_stmt (stmt_info, &si, node, instance);
4177 gimple_assign_set_rhs_code (stmt, code0);
4178 v1 = SLP_TREE_VEC_STMTS (node).copy ();
4179 SLP_TREE_VEC_STMTS (node).truncate (0);
4180 tree meltype = build_nonstandard_integer_type
4181 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
4182 tree mvectype = get_same_sized_vectype (meltype, vectype);
4183 unsigned k = 0, l;
4184 for (j = 0; j < v0.length (); ++j)
4185 {
4186 /* Enforced by vect_build_slp_tree, which rejects variable-length
4187 vectors for SLP_TREE_TWO_OPERATORS. */
4188 unsigned int const_nunits = nunits.to_constant ();
4189 tree_vector_builder melts (mvectype, const_nunits, 1);
4190 for (l = 0; l < const_nunits; ++l)
4191 {
4192 if (k >= group_size)
4193 k = 0;
4194 tree t = build_int_cst (meltype,
4195 mask[k++] * const_nunits + l);
4196 melts.quick_push (t);
4197 }
4198 tmask = melts.build ();
4199
4200 /* ??? Not all targets support a VEC_PERM_EXPR with a
4201 constant mask that would translate to a vec_merge RTX
4202 (with their vec_perm_const_ok). We can either not
4203 vectorize in that case or let veclower do its job.
4204 Unfortunately that isn't too great and at least for
4205 plus/minus we'd eventually like to match targets
4206 vector addsub instructions. */
4207 gimple *vstmt;
4208 vstmt = gimple_build_assign (make_ssa_name (vectype),
4209 VEC_PERM_EXPR,
4210 gimple_assign_lhs (v0[j]->stmt),
4211 gimple_assign_lhs (v1[j]->stmt),
4212 tmask);
4213 SLP_TREE_VEC_STMTS (node).quick_push
4214 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
4215 }
4216 v0.release ();
4217 v1.release ();
4218 done_p = true;
4219 }
4220 }
4221 if (!done_p)
4222 vect_transform_stmt (stmt_info, &si, node, instance);
4223
4224 /* Restore stmt def-types. */
4225 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4226 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
4227 {
4228 stmt_vec_info child_stmt_info;
4229 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
4230 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
4231 }
4232 }
4233
4234 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
4235 For loop vectorization this is done in vectorizable_call, but for SLP
4236 it needs to be deferred until end of vect_schedule_slp, because multiple
4237 SLP instances may refer to the same scalar stmt. */
4238
4239 static void
4240 vect_remove_slp_scalar_calls (slp_tree node, hash_set<slp_tree> &visited)
4241 {
4242 gimple *new_stmt;
4243 gimple_stmt_iterator gsi;
4244 int i;
4245 slp_tree child;
4246 tree lhs;
4247 stmt_vec_info stmt_info;
4248
4249 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
4250 return;
4251
4252 if (visited.add (node))
4253 return;
4254
4255 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
4256 vect_remove_slp_scalar_calls (child, visited);
4257
4258 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
4259 {
4260 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
4261 if (!stmt || gimple_bb (stmt) == NULL)
4262 continue;
4263 if (is_pattern_stmt_p (stmt_info)
4264 || !PURE_SLP_STMT (stmt_info))
4265 continue;
4266 lhs = gimple_call_lhs (stmt);
4267 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
4268 gsi = gsi_for_stmt (stmt);
4269 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
4270 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
4271 }
4272 }
4273
4274 static void
4275 vect_remove_slp_scalar_calls (slp_tree node)
4276 {
4277 hash_set<slp_tree> visited;
4278 vect_remove_slp_scalar_calls (node, visited);
4279 }
4280
4281 /* Vectorize the instance root. */
4282
4283 void
4284 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
4285 {
4286 gassign *rstmt = NULL;
4287
4288 if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
4289 {
4290 stmt_vec_info child_stmt_info;
4291 int j;
4292
4293 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4294 {
4295 tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
4296 tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
4297 rstmt = gimple_build_assign (root_lhs, vect_lhs);
4298 break;
4299 }
4300 }
4301 else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
4302 {
4303 int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
4304 stmt_vec_info child_stmt_info;
4305 int j;
4306 vec<constructor_elt, va_gc> *v;
4307 vec_alloc (v, nelts);
4308
4309 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
4310 {
4311 CONSTRUCTOR_APPEND_ELT (v,
4312 NULL_TREE,
4313 gimple_get_lhs (child_stmt_info->stmt));
4314 }
4315 tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
4316 tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
4317 tree r_constructor = build_constructor (rtype, v);
4318 rstmt = gimple_build_assign (lhs, r_constructor);
4319 }
4320
4321 gcc_assert (rstmt);
4322
4323 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
4324 gsi_replace (&rgsi, rstmt, true);
4325 }
4326
4327 /* Generate vector code for all SLP instances in the loop/basic block. */
4328
4329 void
4330 vect_schedule_slp (vec_info *vinfo)
4331 {
4332 vec<slp_instance> slp_instances;
4333 slp_instance instance;
4334 unsigned int i;
4335
4336 slp_instances = vinfo->slp_instances;
4337 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4338 {
4339 slp_tree node = SLP_INSTANCE_TREE (instance);
4340 /* Schedule the tree of INSTANCE. */
4341 vect_schedule_slp_instance (node, instance);
4342
4343 if (SLP_INSTANCE_ROOT_STMT (instance))
4344 vectorize_slp_instance_root_stmt (node, instance);
4345
4346 if (dump_enabled_p ())
4347 dump_printf_loc (MSG_NOTE, vect_location,
4348 "vectorizing stmts using SLP.\n");
4349 }
4350
4351 FOR_EACH_VEC_ELT (slp_instances, i, instance)
4352 {
4353 slp_tree root = SLP_INSTANCE_TREE (instance);
4354 stmt_vec_info store_info;
4355 unsigned int j;
4356
4357 /* Remove scalar call stmts. Do not do this for basic-block
4358 vectorization as not all uses may be vectorized.
4359 ??? Why should this be necessary? DCE should be able to
4360 remove the stmts itself.
4361 ??? For BB vectorization we can as well remove scalar
4362 stmts starting from the SLP tree root if they have no
4363 uses. */
4364 if (is_a <loop_vec_info> (vinfo))
4365 vect_remove_slp_scalar_calls (root);
4366
4367 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
4368 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
4369 {
4370 if (!STMT_VINFO_DATA_REF (store_info))
4371 break;
4372
4373 if (SLP_INSTANCE_ROOT_STMT (instance))
4374 continue;
4375
4376 store_info = vect_orig_stmt (store_info);
4377 /* Free the attached stmt_vec_info and remove the stmt. */
4378 vinfo->remove_stmt (store_info);
4379 }
4380 }
4381 }