]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
Fix vectorizable_mask_load_store handling of invariant masks
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
cbe34bb5 2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
a70d6342
IR
44
45
ebfd146a
IR
46/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
47
48static void
49vect_free_slp_tree (slp_tree node)
50{
d092494c 51 int i;
d755c7ef 52 slp_tree child;
d092494c 53
9771b263 54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 55 vect_free_slp_tree (child);
b8698a0f 56
78810bd3
RB
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
61 {
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
64 }
65
9771b263
DN
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 69 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
70
71 free (node);
72}
73
74
75/* Free the memory allocated for the SLP instance. */
76
77void
78vect_free_slp_instance (slp_instance instance)
79{
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263 81 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 82 free (instance);
ebfd146a
IR
83}
84
85
d092494c
IR
86/* Create an SLP node for SCALAR_STMTS. */
87
88static slp_tree
355fe088 89vect_create_new_slp_node (vec<gimple *> scalar_stmts)
d092494c 90{
d3cfd39e 91 slp_tree node;
355fe088 92 gimple *stmt = scalar_stmts[0];
d092494c
IR
93 unsigned int nops;
94
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
f7e531cf
IR
98 {
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
102 }
e7baeb39
RB
103 else if (gimple_code (stmt) == GIMPLE_PHI)
104 nops = 0;
d092494c
IR
105 else
106 return NULL;
107
d3cfd39e 108 node = XNEW (struct _slp_tree);
d092494c 109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263
DN
110 SLP_TREE_VEC_STMTS (node).create (0);
111 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 113 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 114 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 115
78810bd3
RB
116 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
119
d092494c
IR
120 return node;
121}
122
123
ddf56386
RB
124/* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */
127typedef struct _slp_oprnd_info
128{
129 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */
ddf56386 134 tree first_op_type;
34e82342 135 enum vect_def_type first_dt;
ddf56386
RB
136 bool first_pattern;
137 bool second_pattern;
138} *slp_oprnd_info;
139
140
d092494c
IR
141/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142 operand. */
9771b263 143static vec<slp_oprnd_info>
d092494c
IR
144vect_create_oprnd_info (int nops, int group_size)
145{
146 int i;
147 slp_oprnd_info oprnd_info;
9771b263 148 vec<slp_oprnd_info> oprnds_info;
d092494c 149
9771b263 150 oprnds_info.create (nops);
d092494c
IR
151 for (i = 0; i < nops; i++)
152 {
153 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 154 oprnd_info->def_stmts.create (group_size);
d092494c 155 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 156 oprnd_info->first_op_type = NULL_TREE;
d092494c 157 oprnd_info->first_pattern = false;
effb52da 158 oprnd_info->second_pattern = false;
9771b263 159 oprnds_info.quick_push (oprnd_info);
d092494c
IR
160 }
161
162 return oprnds_info;
163}
164
165
d3cfd39e
JJ
166/* Free operands info. */
167
d092494c 168static void
9771b263 169vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
170{
171 int i;
172 slp_oprnd_info oprnd_info;
173
9771b263 174 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 175 {
9771b263 176 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
177 XDELETE (oprnd_info);
178 }
d092494c 179
9771b263 180 oprnds_info.release ();
d092494c
IR
181}
182
183
d755c7ef
RB
184/* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
186
187static int
355fe088 188vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 189{
355fe088 190 gimple *next_stmt = first_stmt;
d755c7ef
RB
191 int result = 0;
192
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
194 return -1;
195
196 do
197 {
198 if (next_stmt == stmt)
199 return result;
d755c7ef 200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699
RB
201 if (next_stmt)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
203 }
204 while (next_stmt);
205
206 return -1;
207}
208
209
d092494c
IR
210/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
4cecd659
BC
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */
ebfd146a 221
4cecd659
BC
222static int
223vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
355fe088 224 gimple *stmt, unsigned stmt_num,
4cecd659 225 vec<slp_oprnd_info> *oprnds_info)
ebfd146a
IR
226{
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
355fe088 229 gimple *def_stmt;
d092494c 230 enum vect_def_type dt = vect_uninitialized_def;
d092494c 231 bool pattern = false;
abf9bfbc 232 slp_oprnd_info oprnd_info;
b0b4483e
RB
233 int first_op_idx = 1;
234 bool commutative = false;
235 bool first_op_cond = false;
effb52da
RB
236 bool first = stmt_num == 0;
237 bool second = stmt_num == 1;
b8698a0f 238
d092494c 239 if (is_gimple_call (stmt))
190c2236
JJ
240 {
241 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 242 first_op_idx = 3;
190c2236 243 }
f7e531cf
IR
244 else if (is_gimple_assign (stmt))
245 {
b0b4483e 246 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 247 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
a414c77f
IE
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
252 {
253 first_op_cond = true;
b0b4483e
RB
254 number_of_oprnds++;
255 }
256 else
257 commutative = commutative_tree_code (code);
f7e531cf 258 }
d092494c 259 else
b0b4483e 260 return -1;
ebfd146a 261
4cecd659
BC
262 bool swapped = (*swap != 0);
263 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
264 for (i = 0; i < number_of_oprnds; i++)
265 {
b0b4483e
RB
266again:
267 if (first_op_cond)
f7e531cf 268 {
4cecd659
BC
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap];
272
273 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
b0b4483e 275 else
4cecd659 276 oprnd = gimple_op (stmt, map[i]);
f7e531cf
IR
277 }
278 else
4cecd659 279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 280
9771b263 281 oprnd_info = (*oprnds_info)[i];
ebfd146a 282
81c40241 283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
ebfd146a 284 {
73fbfcad 285 if (dump_enabled_p ())
ebfd146a 286 {
78c60e3d 287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 288 "Build SLP failed: can't analyze def for ");
78c60e3d 289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
291 }
292
b0b4483e 293 return -1;
ebfd146a
IR
294 }
295
a70d6342 296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 297 from the pattern. Check that all the stmts of the node are in the
ebfd146a 298 pattern. */
f5709183 299 if (def_stmt && gimple_bb (def_stmt)
61d371eb 300 && vect_stmt_in_region_p (vinfo, def_stmt)
ebfd146a 301 && vinfo_for_stmt (def_stmt)
83197f37 302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 305 {
d092494c 306 pattern = true;
effb52da
RB
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
d092494c 312 {
b0b4483e
RB
313 if (i == 0
314 && !swapped
315 && commutative)
316 {
317 swapped = true;
318 goto again;
319 }
320
73fbfcad 321 if (dump_enabled_p ())
d092494c 322 {
78c60e3d
SS
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 328 }
ebfd146a 329
b0b4483e 330 return 1;
ebfd146a
IR
331 }
332
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 335
f7e531cf 336 if (dt == vect_unknown_def_type)
ebfd146a 337 {
73fbfcad 338 if (dump_enabled_p ())
78c60e3d 339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 340 "Unsupported pattern.\n");
b0b4483e 341 return -1;
ebfd146a
IR
342 }
343
344 switch (gimple_code (def_stmt))
345 {
81c40241
RB
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
349
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
ebfd146a
IR
355 }
356 }
357
effb52da
RB
358 if (second)
359 oprnd_info->second_pattern = pattern;
360
d092494c 361 if (first)
ebfd146a 362 {
d092494c
IR
363 oprnd_info->first_dt = dt;
364 oprnd_info->first_pattern = pattern;
793d9a16 365 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 366 }
ebfd146a
IR
367 else
368 {
d092494c
IR
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt
375 && !(oprnd_info->first_dt == vect_reduction_def
793d9a16
RB
376 && dt == vect_internal_def)
377 && !((oprnd_info->first_dt == vect_external_def
378 || oprnd_info->first_dt == vect_constant_def)
379 && (dt == vect_external_def
380 || dt == vect_constant_def)))
381 || !types_compatible_p (oprnd_info->first_op_type,
382 TREE_TYPE (oprnd))))
ebfd146a 383 {
b0b4483e
RB
384 /* Try swapping operands if we got a mismatch. */
385 if (i == 0
386 && !swapped
387 && commutative)
388 {
389 swapped = true;
390 goto again;
391 }
392
abf9bfbc
RB
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 395 "Build SLP failed: different types\n");
d092494c 396
b0b4483e 397 return 1;
ebfd146a
IR
398 }
399 }
400
401 /* Check the types of the definitions. */
d092494c 402 switch (dt)
ebfd146a
IR
403 {
404 case vect_constant_def:
8644a673 405 case vect_external_def:
ebfd146a 406 break;
b8698a0f 407
c78e3652 408 case vect_reduction_def:
e7baeb39 409 case vect_induction_def:
8644a673 410 case vect_internal_def:
abf9bfbc 411 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
412 break;
413
414 default:
415 /* FORNOW: Not supported. */
73fbfcad 416 if (dump_enabled_p ())
ebfd146a 417 {
78c60e3d
SS
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "Build SLP failed: illegal type of def ");
81c40241 420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
422 }
423
b0b4483e 424 return -1;
ebfd146a
IR
425 }
426 }
427
b0b4483e
RB
428 /* Swap operands. */
429 if (swapped)
430 {
78810bd3
RB
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
434 {
435 if (dump_enabled_p ())
436 {
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: cannot swap operands of "
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
441 }
442 return -1;
443 }
444
b0b4483e
RB
445 if (first_op_cond)
446 {
447 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
448 enum tree_code code = TREE_CODE (cond);
449
450 /* Swap. */
451 if (*swap == 1)
452 {
453 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
454 &TREE_OPERAND (cond, 1));
455 TREE_SET_CODE (cond, swap_tree_comparison (code));
456 }
457 /* Invert. */
458 else
459 {
460 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
461 gimple_assign_rhs3_ptr (stmt));
462 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
463 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
464 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code);
466 }
b0b4483e
RB
467 }
468 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
470 gimple_assign_rhs2_ptr (stmt));
78810bd3
RB
471 if (dump_enabled_p ())
472 {
473 dump_printf_loc (MSG_NOTE, vect_location,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
476 }
b0b4483e
RB
477 }
478
4cecd659 479 *swap = swapped;
b0b4483e 480 return 0;
ebfd146a
IR
481}
482
483
6983e6b5
RB
484/* Verify if the scalar stmts STMTS are isomorphic, require data
485 permutation or are of unsupported types of operation. Return
486 true if they are, otherwise return false and indicate in *MATCHES
487 which stmts are not isomorphic to the first one. If MATCHES[0]
488 is false then this indicates the comparison could not be
4cecd659
BC
489 carried out or the stmts will never be vectorized by SLP.
490
491 Note COND_EXPR is possibly ismorphic to another one after swapping its
492 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
493 the first stmt by swapping the two operands of comparison; set SWAP[i]
494 to 2 if stmt I is isormorphic to the first stmt by inverting the code
495 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
496 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
497
498static bool
4cecd659 499vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
355fe088 500 vec<gimple *> stmts, unsigned int group_size,
6983e6b5 501 unsigned nops, unsigned int *max_nunits,
97a1a642 502 bool *matches, bool *two_operators)
ebfd146a 503{
ebfd146a 504 unsigned int i;
355fe088 505 gimple *first_stmt = stmts[0], *stmt = stmts[0];
6876e5bc
RB
506 enum tree_code first_stmt_code = ERROR_MARK;
507 enum tree_code alt_stmt_code = ERROR_MARK;
508 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 509 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 510 tree lhs;
6983e6b5 511 bool need_same_oprnds = false;
e00cdb8a 512 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
ebfd146a
IR
513 optab optab;
514 int icode;
ef4bddc2
RS
515 machine_mode optab_op2_mode;
516 machine_mode vec_mode;
ebfd146a 517 HOST_WIDE_INT dummy;
355fe088 518 gimple *first_load = NULL, *prev_first_load = NULL;
d092494c 519
ebfd146a 520 /* For every stmt in NODE find its def stmt/s. */
9771b263 521 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 522 {
4cecd659 523 swap[i] = 0;
6983e6b5
RB
524 matches[i] = false;
525
73fbfcad 526 if (dump_enabled_p ())
ebfd146a 527 {
78c60e3d
SS
528 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
529 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
530 }
531
4b5caab7
IR
532 /* Fail to vectorize statements marked as unvectorizable. */
533 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
534 {
73fbfcad 535 if (dump_enabled_p ())
4b5caab7 536 {
78c60e3d
SS
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
538 "Build SLP failed: unvectorizable statement ");
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7 540 }
6983e6b5
RB
541 /* Fatal mismatch. */
542 matches[0] = false;
4b5caab7
IR
543 return false;
544 }
545
ebfd146a
IR
546 lhs = gimple_get_lhs (stmt);
547 if (lhs == NULL_TREE)
548 {
73fbfcad 549 if (dump_enabled_p ())
ebfd146a 550 {
78c60e3d
SS
551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
552 "Build SLP failed: not GIMPLE_ASSIGN nor "
553 "GIMPLE_CALL ");
554 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 555 }
6983e6b5
RB
556 /* Fatal mismatch. */
557 matches[0] = false;
ebfd146a
IR
558 return false;
559 }
560
b8698a0f 561 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
562 vectype = get_vectype_for_scalar_type (scalar_type);
563 if (!vectype)
564 {
73fbfcad 565 if (dump_enabled_p ())
ebfd146a 566 {
78c60e3d
SS
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
568 "Build SLP failed: unsupported data-type ");
569 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
570 scalar_type);
e645e942 571 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 572 }
6983e6b5
RB
573 /* Fatal mismatch. */
574 matches[0] = false;
ebfd146a
IR
575 return false;
576 }
b8698a0f 577
dfc55d30
RB
578 /* If populating the vector type requires unrolling then fail
579 before adjusting *max_nunits for basic-block vectorization. */
310213d4 580 if (is_a <bb_vec_info> (vinfo)
dfc55d30
RB
581 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
582 {
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584 "Build SLP failed: unrolling required "
585 "in basic block SLP\n");
586 /* Fatal mismatch. */
587 matches[0] = false;
588 return false;
589 }
590
4ef69dfc
IR
591 /* In case of multiple types we need to detect the smallest type. */
592 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
97a1a642 593 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 594
538dd0b7 595 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
596 {
597 rhs_code = CALL_EXPR;
538dd0b7
DM
598 if (gimple_call_internal_p (call_stmt)
599 || gimple_call_tail_p (call_stmt)
600 || gimple_call_noreturn_p (call_stmt)
601 || !gimple_call_nothrow_p (call_stmt)
602 || gimple_call_chain (call_stmt))
190c2236 603 {
73fbfcad 604 if (dump_enabled_p ())
190c2236 605 {
78c60e3d
SS
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
607 "Build SLP failed: unsupported call type ");
538dd0b7
DM
608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
609 call_stmt, 0);
190c2236 610 }
6983e6b5
RB
611 /* Fatal mismatch. */
612 matches[0] = false;
190c2236
JJ
613 return false;
614 }
615 }
ebfd146a
IR
616 else
617 rhs_code = gimple_assign_rhs_code (stmt);
618
619 /* Check the operation. */
620 if (i == 0)
621 {
622 first_stmt_code = rhs_code;
623
b8698a0f 624 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
625 vector shift with scalar shift operand. */
626 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
627 || rhs_code == LROTATE_EXPR
628 || rhs_code == RROTATE_EXPR)
629 {
630 vec_mode = TYPE_MODE (vectype);
631
632 /* First see if we have a vector/vector shift. */
633 optab = optab_for_tree_code (rhs_code, vectype,
634 optab_vector);
635
636 if (!optab
947131ba 637 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
638 {
639 /* No vector/vector shift, try for a vector/scalar shift. */
640 optab = optab_for_tree_code (rhs_code, vectype,
641 optab_scalar);
642
643 if (!optab)
644 {
73fbfcad 645 if (dump_enabled_p ())
78c60e3d 646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 647 "Build SLP failed: no optab.\n");
6983e6b5
RB
648 /* Fatal mismatch. */
649 matches[0] = false;
ebfd146a
IR
650 return false;
651 }
947131ba 652 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
653 if (icode == CODE_FOR_nothing)
654 {
73fbfcad 655 if (dump_enabled_p ())
78c60e3d
SS
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657 "Build SLP failed: "
e645e942 658 "op not supported by target.\n");
6983e6b5
RB
659 /* Fatal mismatch. */
660 matches[0] = false;
ebfd146a
IR
661 return false;
662 }
663 optab_op2_mode = insn_data[icode].operand[2].mode;
664 if (!VECTOR_MODE_P (optab_op2_mode))
665 {
666 need_same_oprnds = true;
667 first_op1 = gimple_assign_rhs2 (stmt);
668 }
669 }
670 }
36ba4aae
IR
671 else if (rhs_code == WIDEN_LSHIFT_EXPR)
672 {
673 need_same_oprnds = true;
674 first_op1 = gimple_assign_rhs2 (stmt);
675 }
ebfd146a
IR
676 }
677 else
678 {
6876e5bc
RB
679 if (first_stmt_code != rhs_code
680 && alt_stmt_code == ERROR_MARK)
681 alt_stmt_code = rhs_code;
ebfd146a
IR
682 if (first_stmt_code != rhs_code
683 && (first_stmt_code != IMAGPART_EXPR
684 || rhs_code != REALPART_EXPR)
685 && (first_stmt_code != REALPART_EXPR
69f11a13 686 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
687 /* Handle mismatches in plus/minus by computing both
688 and merging the results. */
689 && !((first_stmt_code == PLUS_EXPR
690 || first_stmt_code == MINUS_EXPR)
691 && (alt_stmt_code == PLUS_EXPR
692 || alt_stmt_code == MINUS_EXPR)
693 && rhs_code == alt_stmt_code)
0d0293ac 694 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 695 && (first_stmt_code == ARRAY_REF
38000232 696 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
697 || first_stmt_code == INDIRECT_REF
698 || first_stmt_code == COMPONENT_REF
699 || first_stmt_code == MEM_REF)))
ebfd146a 700 {
73fbfcad 701 if (dump_enabled_p ())
ebfd146a 702 {
78c60e3d
SS
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
704 "Build SLP failed: different operation "
705 "in stmt ");
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "original stmt ");
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710 first_stmt, 0);
ebfd146a 711 }
6983e6b5
RB
712 /* Mismatch. */
713 continue;
ebfd146a 714 }
b8698a0f
L
715
716 if (need_same_oprnds
ebfd146a
IR
717 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
718 {
73fbfcad 719 if (dump_enabled_p ())
ebfd146a 720 {
78c60e3d
SS
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different shift "
723 "arguments in ");
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 725 }
6983e6b5
RB
726 /* Mismatch. */
727 continue;
ebfd146a 728 }
190c2236
JJ
729
730 if (rhs_code == CALL_EXPR)
731 {
355fe088 732 gimple *first_stmt = stmts[0];
190c2236
JJ
733 if (gimple_call_num_args (stmt) != nops
734 || !operand_equal_p (gimple_call_fn (first_stmt),
735 gimple_call_fn (stmt), 0)
736 || gimple_call_fntype (first_stmt)
737 != gimple_call_fntype (stmt))
738 {
73fbfcad 739 if (dump_enabled_p ())
190c2236 740 {
78c60e3d
SS
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: different calls in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
744 stmt, 0);
190c2236 745 }
6983e6b5
RB
746 /* Mismatch. */
747 continue;
190c2236
JJ
748 }
749 }
ebfd146a
IR
750 }
751
0d0293ac
MM
752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
754 {
755 if (REFERENCE_CLASS_P (lhs))
756 {
757 /* Store. */
6983e6b5 758 ;
ebfd146a 759 }
b5aeb3bb
IR
760 else
761 {
762 /* Load. */
e14c1050 763 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
764 if (prev_first_load)
765 {
766 /* Check that there are no loads from different interleaving
6983e6b5
RB
767 chains in the same node. */
768 if (prev_first_load != first_load)
78c60e3d 769 {
73fbfcad 770 if (dump_enabled_p ())
b5aeb3bb 771 {
78c60e3d
SS
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
773 vect_location,
774 "Build SLP failed: different "
775 "interleaving chains in one node ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
777 stmt, 0);
b5aeb3bb 778 }
6983e6b5
RB
779 /* Mismatch. */
780 continue;
b5aeb3bb
IR
781 }
782 }
783 else
784 prev_first_load = first_load;
ebfd146a 785 }
0d0293ac 786 } /* Grouped access. */
ebfd146a
IR
787 else
788 {
789 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
790 {
0d0293ac 791 /* Not grouped load. */
73fbfcad 792 if (dump_enabled_p ())
ebfd146a 793 {
78c60e3d
SS
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
795 "Build SLP failed: not grouped load ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
797 }
798
0d0293ac 799 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
800 /* Fatal mismatch. */
801 matches[0] = false;
ebfd146a
IR
802 return false;
803 }
804
805 /* Not memory operation. */
806 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 807 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 808 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 809 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 810 && rhs_code != CALL_EXPR)
ebfd146a 811 {
73fbfcad 812 if (dump_enabled_p ())
ebfd146a 813 {
78c60e3d
SS
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
815 "Build SLP failed: operation");
816 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 818 }
6983e6b5
RB
819 /* Fatal mismatch. */
820 matches[0] = false;
ebfd146a
IR
821 return false;
822 }
823
4cecd659
BC
824 if (rhs_code == COND_EXPR)
825 {
826 tree cond_expr = gimple_assign_rhs1 (stmt);
827 enum tree_code cond_code = TREE_CODE (cond_expr);
828 enum tree_code swap_code = ERROR_MARK;
829 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
830
831 if (i == 0)
832 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
833 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
834 {
835 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
836 swap_code = swap_tree_comparison (cond_code);
837 invert_code = invert_tree_comparison (cond_code, honor_nans);
838 }
839
840 if (first_cond_code == cond_code)
841 ;
842 /* Isomorphic can be achieved by swapping. */
843 else if (first_cond_code == swap_code)
844 swap[i] = 1;
845 /* Isomorphic can be achieved by inverting. */
846 else if (first_cond_code == invert_code)
847 swap[i] = 2;
848 else
849 {
850 if (dump_enabled_p ())
851 {
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
853 "Build SLP failed: different"
854 " operation");
4cecd659 855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
78c60e3d 856 stmt, 0);
4cecd659 857 }
6983e6b5
RB
858 /* Mismatch. */
859 continue;
f7e531cf 860 }
4cecd659 861 }
ebfd146a 862 }
6983e6b5
RB
863
864 matches[i] = true;
865 }
866
867 for (i = 0; i < group_size; ++i)
868 if (!matches[i])
869 return false;
870
6876e5bc
RB
871 /* If we allowed a two-operation SLP node verify the target can cope
872 with the permute we are going to use. */
873 if (alt_stmt_code != ERROR_MARK
874 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
875 {
908a1a16
RS
876 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype);
877 auto_vec_perm_indices sel (count);
878 for (i = 0; i < count; ++i)
6876e5bc 879 {
908a1a16 880 unsigned int elt = i;
6876e5bc 881 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
908a1a16
RS
882 elt += count;
883 sel.quick_push (elt);
6876e5bc 884 }
908a1a16 885 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
6876e5bc
RB
886 {
887 for (i = 0; i < group_size; ++i)
888 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
889 {
890 matches[i] = false;
891 if (dump_enabled_p ())
892 {
893 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
894 "Build SLP failed: different operation "
895 "in stmt ");
896 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
897 stmts[i], 0);
898 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
899 "original stmt ");
900 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
901 first_stmt, 0);
902 }
903 }
904 return false;
905 }
906 *two_operators = true;
907 }
908
6983e6b5
RB
909 return true;
910}
911
26d66f28
RB
912/* Traits for the hash_set to record failed SLP builds for a stmt set.
913 Note we never remove apart from at destruction time so we do not
914 need a special value for deleted that differs from empty. */
915struct bst_traits
916{
917 typedef vec <gimple *> value_type;
918 typedef vec <gimple *> compare_type;
919 static inline hashval_t hash (value_type);
920 static inline bool equal (value_type existing, value_type candidate);
921 static inline bool is_empty (value_type x) { return !x.exists (); }
922 static inline bool is_deleted (value_type x) { return !x.exists (); }
923 static inline void mark_empty (value_type &x) { x.release (); }
924 static inline void mark_deleted (value_type &x) { x.release (); }
925 static inline void remove (value_type &x) { x.release (); }
926};
927inline hashval_t
928bst_traits::hash (value_type x)
929{
930 inchash::hash h;
931 for (unsigned i = 0; i < x.length (); ++i)
932 h.add_int (gimple_uid (x[i]));
933 return h.end ();
934}
935inline bool
936bst_traits::equal (value_type existing, value_type candidate)
937{
938 if (existing.length () != candidate.length ())
939 return false;
940 for (unsigned i = 0; i < existing.length (); ++i)
941 if (existing[i] != candidate[i])
942 return false;
943 return true;
944}
945
946static hash_set <vec <gimple *>, bst_traits> *bst_fail;
947
948static slp_tree
949vect_build_slp_tree_2 (vec_info *vinfo,
950 vec<gimple *> stmts, unsigned int group_size,
951 unsigned int *max_nunits,
952 vec<slp_tree> *loads,
953 bool *matches, unsigned *npermutes, unsigned *tree_size,
954 unsigned max_tree_size);
6983e6b5 955
e403d17e 956static slp_tree
310213d4 957vect_build_slp_tree (vec_info *vinfo,
e403d17e 958 vec<gimple *> stmts, unsigned int group_size,
6983e6b5
RB
959 unsigned int *max_nunits,
960 vec<slp_tree> *loads,
1428105c
RB
961 bool *matches, unsigned *npermutes, unsigned *tree_size,
962 unsigned max_tree_size)
26d66f28
RB
963{
964 if (bst_fail->contains (stmts))
965 return NULL;
966 slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size, max_nunits,
967 loads, matches, npermutes, tree_size,
968 max_tree_size);
969 /* When SLP build fails for stmts record this, otherwise SLP build
970 can be exponential in time when we allow to construct parts from
971 scalars, see PR81723. */
972 if (! res)
973 {
974 vec <gimple *> x;
975 x.create (stmts.length ());
976 x.splice (stmts);
977 bst_fail->add (x);
978 }
979 return res;
980}
981
982/* Recursively build an SLP tree starting from NODE.
983 Fail (and return a value not equal to zero) if def-stmts are not
984 isomorphic, require data permutation or are of unsupported types of
985 operation. Otherwise, return 0.
986 The value returned is the depth in the SLP tree where a mismatch
987 was found. */
988
989static slp_tree
990vect_build_slp_tree_2 (vec_info *vinfo,
991 vec<gimple *> stmts, unsigned int group_size,
992 unsigned int *max_nunits,
993 vec<slp_tree> *loads,
994 bool *matches, unsigned *npermutes, unsigned *tree_size,
995 unsigned max_tree_size)
6983e6b5 996{
e403d17e 997 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
355fe088 998 gimple *stmt;
e403d17e 999 slp_tree node;
6983e6b5 1000
6983e6b5
RB
1001 matches[0] = false;
1002
e403d17e 1003 stmt = stmts[0];
6983e6b5
RB
1004 if (is_gimple_call (stmt))
1005 nops = gimple_call_num_args (stmt);
1006 else if (is_gimple_assign (stmt))
1007 {
1008 nops = gimple_num_ops (stmt) - 1;
1009 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1010 nops++;
ebfd146a 1011 }
e7baeb39
RB
1012 else if (gimple_code (stmt) == GIMPLE_PHI)
1013 nops = 0;
6983e6b5 1014 else
e403d17e 1015 return NULL;
6983e6b5 1016
c78e3652
RB
1017 /* If the SLP node is a PHI (induction or reduction), terminate
1018 the recursion. */
e7baeb39
RB
1019 if (gimple_code (stmt) == GIMPLE_PHI)
1020 {
719488f8 1021 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
c78e3652 1022 /* Induction from different IVs is not supported. */
719488f8
RB
1023 if (def_type == vect_induction_def)
1024 {
1025 FOR_EACH_VEC_ELT (stmts, i, stmt)
1026 if (stmt != stmts[0])
1027 return NULL;
1028 }
1029 else
1030 {
1031 /* Else def types have to match. */
1032 FOR_EACH_VEC_ELT (stmts, i, stmt)
1033 {
1034 /* But for reduction chains only check on the first stmt. */
1035 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
1036 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
1037 continue;
1038 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
1039 return NULL;
1040 }
1041 }
e7baeb39
RB
1042 node = vect_create_new_slp_node (stmts);
1043 return node;
1044 }
1045
1046
6876e5bc 1047 bool two_operators = false;
4cecd659
BC
1048 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1049 if (!vect_build_slp_tree_1 (vinfo, swap,
e403d17e
RB
1050 stmts, group_size, nops,
1051 &this_max_nunits, matches, &two_operators))
1052 return NULL;
ebfd146a 1053
6983e6b5
RB
1054 /* If the SLP node is a load, terminate the recursion. */
1055 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
1056 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 1057 {
e403d17e
RB
1058 *max_nunits = this_max_nunits;
1059 node = vect_create_new_slp_node (stmts);
1060 loads->safe_push (node);
1061 return node;
ebfd146a
IR
1062 }
1063
6983e6b5
RB
1064 /* Get at the operands, verifying they are compatible. */
1065 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1066 slp_oprnd_info oprnd_info;
e403d17e 1067 FOR_EACH_VEC_ELT (stmts, i, stmt)
6983e6b5 1068 {
4cecd659
BC
1069 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1070 stmt, i, &oprnds_info);
1071 if (res != 0)
1072 matches[(res == -1) ? 0 : i] = false;
1073 if (!matches[0])
1074 break;
6983e6b5 1075 }
b0b4483e
RB
1076 for (i = 0; i < group_size; ++i)
1077 if (!matches[i])
1078 {
1079 vect_free_oprnd_info (oprnds_info);
e403d17e 1080 return NULL;
b0b4483e 1081 }
6983e6b5 1082
e403d17e
RB
1083 auto_vec<slp_tree, 4> children;
1084 auto_vec<slp_tree> this_loads;
1085
1086 stmt = stmts[0];
6983e6b5 1087
26d66f28
RB
1088 if (tree_size)
1089 max_tree_size -= *tree_size;
1090
b8698a0f 1091 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1092 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1093 {
d092494c 1094 slp_tree child;
e403d17e
RB
1095 unsigned old_nloads = this_loads.length ();
1096 unsigned old_tree_size = this_tree_size;
1097 unsigned int j;
b8698a0f 1098
e7baeb39 1099 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1100 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1101 && oprnd_info->first_dt != vect_induction_def)
d092494c 1102 continue;
ebfd146a 1103
1428105c
RB
1104 if (++this_tree_size > max_tree_size)
1105 {
e403d17e
RB
1106 FOR_EACH_VEC_ELT (children, j, child)
1107 vect_free_slp_tree (child);
1428105c 1108 vect_free_oprnd_info (oprnds_info);
e403d17e 1109 return NULL;
1428105c
RB
1110 }
1111
e403d17e
RB
1112 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1113 group_size, &this_max_nunits,
1114 &this_loads, matches, npermutes,
1115 &this_tree_size,
1116 max_tree_size)) != NULL)
6983e6b5 1117 {
3fc356dc
RB
1118 /* If we have all children of child built up from scalars then just
1119 throw that away and build it up this node from scalars. */
995b6fe0
RB
1120 if (!SLP_TREE_CHILDREN (child).is_empty ()
1121 /* ??? Rejecting patterns this way doesn't work. We'd have to
1122 do extra work to cancel the pattern so the uses see the
1123 scalar version. */
1124 && !is_pattern_stmt_p
1125 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
3fc356dc 1126 {
3fc356dc
RB
1127 slp_tree grandchild;
1128
1129 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1130 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1131 break;
1132 if (!grandchild)
1133 {
1134 /* Roll back. */
e403d17e
RB
1135 this_loads.truncate (old_nloads);
1136 this_tree_size = old_tree_size;
3fc356dc 1137 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1138 vect_free_slp_tree (grandchild);
3fc356dc
RB
1139 SLP_TREE_CHILDREN (child).truncate (0);
1140
1141 dump_printf_loc (MSG_NOTE, vect_location,
1142 "Building parent vector operands from "
1143 "scalars instead\n");
1144 oprnd_info->def_stmts = vNULL;
603cca93 1145 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1146 children.safe_push (child);
3fc356dc
RB
1147 continue;
1148 }
1149 }
1150
6983e6b5 1151 oprnd_info->def_stmts = vNULL;
e403d17e 1152 children.safe_push (child);
6983e6b5
RB
1153 continue;
1154 }
1155
90dd6e3d
RB
1156 /* If the SLP build failed fatally and we analyze a basic-block
1157 simply treat nodes we fail to build as externally defined
1158 (and thus build vectors from the scalar defs).
1159 The cost model will reject outright expensive cases.
1160 ??? This doesn't treat cases where permutation ultimatively
1161 fails (or we don't try permutation below). Ideally we'd
1162 even compute a permutation that will end up with the maximum
1163 SLP tree size... */
310213d4 1164 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1165 && !matches[0]
1166 /* ??? Rejecting patterns this way doesn't work. We'd have to
1167 do extra work to cancel the pattern so the uses see the
1168 scalar version. */
1169 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1170 {
1171 dump_printf_loc (MSG_NOTE, vect_location,
1172 "Building vector operands from scalars\n");
e403d17e 1173 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1174 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1175 children.safe_push (child);
1176 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1177 continue;
1178 }
1179
6983e6b5
RB
1180 /* If the SLP build for operand zero failed and operand zero
1181 and one can be commutated try that for the scalar stmts
1182 that failed the match. */
1183 if (i == 0
1184 /* A first scalar stmt mismatch signals a fatal mismatch. */
1185 && matches[0]
1186 /* ??? For COND_EXPRs we can swap the comparison operands
1187 as well as the arms under some constraints. */
1188 && nops == 2
1189 && oprnds_info[1]->first_dt == vect_internal_def
1190 && is_gimple_assign (stmt)
1191 && commutative_tree_code (gimple_assign_rhs_code (stmt))
e403d17e 1192 && ! two_operators
6983e6b5
RB
1193 /* Do so only if the number of not successful permutes was nor more
1194 than a cut-ff as re-trying the recursive match on
1195 possibly each level of the tree would expose exponential
1196 behavior. */
1197 && *npermutes < 4)
1198 {
78810bd3
RB
1199 /* Verify if we can safely swap or if we committed to a specific
1200 operand order already. */
1201 for (j = 0; j < group_size; ++j)
1202 if (!matches[j]
4cecd659
BC
1203 && (swap[j] != 0
1204 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
78810bd3
RB
1205 {
1206 if (dump_enabled_p ())
1207 {
1208 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1209 "Build SLP failed: cannot swap operands "
1210 "of shared stmt ");
1211 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1212 stmts[j], 0);
1213 }
1214 goto fail;
1215 }
1216
6983e6b5 1217 /* Swap mismatched definition stmts. */
b0b4483e
RB
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "Re-trying with swapped operands of stmts ");
e72baed7 1220 for (j = 0; j < group_size; ++j)
6983e6b5
RB
1221 if (!matches[j])
1222 {
6b4db501
MM
1223 std::swap (oprnds_info[0]->def_stmts[j],
1224 oprnds_info[1]->def_stmts[j]);
b0b4483e 1225 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1226 }
b0b4483e 1227 dump_printf (MSG_NOTE, "\n");
74574669
RB
1228 /* And try again with scratch 'matches' ... */
1229 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1230 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1231 group_size, &this_max_nunits,
1232 &this_loads, tem, npermutes,
1233 &this_tree_size,
1234 max_tree_size)) != NULL)
6983e6b5 1235 {
60f2b864
RB
1236 /* ... so if successful we can apply the operand swapping
1237 to the GIMPLE IL. This is necessary because for example
1238 vect_get_slp_defs uses operand indexes and thus expects
1239 canonical operand order. This is also necessary even
1240 if we end up building the operand from scalars as
1241 we'll continue to process swapped operand two. */
1242 for (j = 0; j < group_size; ++j)
f47cda24 1243 {
e403d17e 1244 gimple *stmt = stmts[j];
f47cda24
RB
1245 gimple_set_plf (stmt, GF_PLF_1, false);
1246 }
1247 for (j = 0; j < group_size; ++j)
1248 {
e403d17e 1249 gimple *stmt = stmts[j];
f47cda24
RB
1250 if (!matches[j])
1251 {
1252 /* Avoid swapping operands twice. */
1253 if (gimple_plf (stmt, GF_PLF_1))
1254 continue;
1255 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1256 gimple_assign_rhs2_ptr (stmt));
1257 gimple_set_plf (stmt, GF_PLF_1, true);
1258 }
1259 }
1260 /* Verify we swap all duplicates or none. */
1261 if (flag_checking)
1262 for (j = 0; j < group_size; ++j)
60f2b864 1263 {
e403d17e 1264 gimple *stmt = stmts[j];
f47cda24 1265 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
60f2b864
RB
1266 }
1267
85c69b0b
RB
1268 /* If we have all children of child built up from scalars then
1269 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1270 if (!SLP_TREE_CHILDREN (child).is_empty ()
1271 /* ??? Rejecting patterns this way doesn't work. We'd have
1272 to do extra work to cancel the pattern so the uses see the
1273 scalar version. */
1274 && !is_pattern_stmt_p
1275 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
85c69b0b
RB
1276 {
1277 unsigned int j;
1278 slp_tree grandchild;
1279
1280 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1281 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1282 break;
1283 if (!grandchild)
1284 {
1285 /* Roll back. */
e403d17e
RB
1286 this_loads.truncate (old_nloads);
1287 this_tree_size = old_tree_size;
85c69b0b
RB
1288 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1289 vect_free_slp_tree (grandchild);
1290 SLP_TREE_CHILDREN (child).truncate (0);
1291
1292 dump_printf_loc (MSG_NOTE, vect_location,
1293 "Building parent vector operands from "
1294 "scalars instead\n");
1295 oprnd_info->def_stmts = vNULL;
603cca93 1296 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1297 children.safe_push (child);
85c69b0b
RB
1298 continue;
1299 }
1300 }
1301
6983e6b5 1302 oprnd_info->def_stmts = vNULL;
e403d17e 1303 children.safe_push (child);
6983e6b5
RB
1304 continue;
1305 }
1306
1307 ++*npermutes;
1308 }
1309
78810bd3 1310fail:
e403d17e
RB
1311 gcc_assert (child == NULL);
1312 FOR_EACH_VEC_ELT (children, j, child)
1313 vect_free_slp_tree (child);
6983e6b5 1314 vect_free_oprnd_info (oprnds_info);
e403d17e 1315 return NULL;
ebfd146a
IR
1316 }
1317
e403d17e
RB
1318 vect_free_oprnd_info (oprnds_info);
1319
1428105c
RB
1320 if (tree_size)
1321 *tree_size += this_tree_size;
e403d17e
RB
1322 *max_nunits = this_max_nunits;
1323 loads->safe_splice (this_loads);
1428105c 1324
e403d17e
RB
1325 node = vect_create_new_slp_node (stmts);
1326 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1327 SLP_TREE_CHILDREN (node).splice (children);
1328 return node;
ebfd146a
IR
1329}
1330
78c60e3d 1331/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1332
1333static void
1a817418 1334vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
ebfd146a
IR
1335{
1336 int i;
355fe088 1337 gimple *stmt;
d755c7ef 1338 slp_tree child;
ebfd146a 1339
603cca93
RB
1340 dump_printf_loc (dump_kind, loc, "node%s\n",
1341 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1342 ? " (external)" : "");
9771b263 1343 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1344 {
c2a12ca0 1345 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
78c60e3d 1346 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1347 }
9771b263 1348 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1349 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1350}
1351
1352
b8698a0f
L
1353/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1354 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1355 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1356 stmts in NODE are to be marked. */
1357
1358static void
1359vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1360{
1361 int i;
355fe088 1362 gimple *stmt;
d755c7ef 1363 slp_tree child;
ebfd146a 1364
603cca93 1365 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1366 return;
1367
9771b263 1368 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1369 if (j < 0 || i == j)
1370 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1371
9771b263 1372 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1373 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1374}
1375
1376
a70d6342
IR
1377/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1378
1379static void
1380vect_mark_slp_stmts_relevant (slp_tree node)
1381{
1382 int i;
355fe088 1383 gimple *stmt;
a70d6342 1384 stmt_vec_info stmt_info;
d755c7ef 1385 slp_tree child;
a70d6342 1386
603cca93 1387 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1388 return;
1389
9771b263 1390 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1391 {
1392 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1393 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1394 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1395 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1396 }
1397
9771b263 1398 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1399 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1400}
1401
1402
b5aeb3bb
IR
1403/* Rearrange the statements of NODE according to PERMUTATION. */
1404
1405static void
1406vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1407 vec<unsigned> permutation)
b5aeb3bb 1408{
355fe088
TS
1409 gimple *stmt;
1410 vec<gimple *> tmp_stmts;
d755c7ef
RB
1411 unsigned int i;
1412 slp_tree child;
b5aeb3bb 1413
9771b263 1414 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1415 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1416
9771b263
DN
1417 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1418 tmp_stmts.create (group_size);
d755c7ef 1419 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1420
9771b263 1421 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1422 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1423
9771b263 1424 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1425 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1426}
1427
1428
b266b968
RB
1429/* Attempt to reorder stmts in a reduction chain so that we don't
1430 require any load permutation. Return true if that was possible,
1431 otherwise return false. */
1432
1433static bool
1434vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1435{
1436 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1437 unsigned int i, j;
b266b968
RB
1438 unsigned int lidx;
1439 slp_tree node, load;
1440
1441 /* Compare all the permutation sequences to the first one. We know
1442 that at least one load is permuted. */
1443 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1444 if (!node->load_permutation.exists ())
1445 return false;
1446 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1447 {
1448 if (!load->load_permutation.exists ())
1449 return false;
1450 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1451 if (lidx != node->load_permutation[j])
1452 return false;
1453 }
1454
1455 /* Check that the loads in the first sequence are different and there
1456 are no gaps between them. */
7ba9e72d 1457 auto_sbitmap load_index (group_size);
b266b968
RB
1458 bitmap_clear (load_index);
1459 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1460 {
41eefe13 1461 if (lidx >= group_size)
7ba9e72d 1462 return false;
b266b968 1463 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1464 return false;
1465
b266b968
RB
1466 bitmap_set_bit (load_index, lidx);
1467 }
1468 for (i = 0; i < group_size; i++)
1469 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1470 return false;
b266b968
RB
1471
1472 /* This permutation is valid for reduction. Since the order of the
1473 statements in the nodes is not important unless they are memory
1474 accesses, we can rearrange the statements in all the nodes
1475 according to the order of the loads. */
1476 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1477 node->load_permutation);
1478
1479 /* We are done, no actual permutations need to be generated. */
c4e360f4 1480 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1481 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4
RB
1482 {
1483 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1484 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1485 /* But we have to keep those permutations that are required because
1486 of handling of gaps. */
1487 if (unrolling_factor == 1
1488 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1489 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1490 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1491 else
1492 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1493 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1494 }
1495
b266b968
RB
1496 return true;
1497}
1498
01d8bf07
RB
1499/* Check if the required load permutations in the SLP instance
1500 SLP_INSTN are supported. */
ebfd146a
IR
1501
1502static bool
01d8bf07 1503vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1504{
01d8bf07
RB
1505 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1506 unsigned int i, j, k, next;
6983e6b5 1507 slp_tree node;
a5b50aa1 1508 gimple *stmt, *load, *next_load;
ebfd146a 1509
73fbfcad 1510 if (dump_enabled_p ())
ebfd146a 1511 {
78c60e3d 1512 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1513 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1514 if (node->load_permutation.exists ())
1515 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1516 dump_printf (MSG_NOTE, "%d ", next);
1517 else
bddc974e
TJ
1518 for (k = 0; k < group_size; ++k)
1519 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1520 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1521 }
1522
b5aeb3bb
IR
1523 /* In case of reduction every load permutation is allowed, since the order
1524 of the reduction statements is not important (as opposed to the case of
0d0293ac 1525 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1526 load nodes are of the same size and have the same permutation (and then
1527 rearrange all the nodes of the SLP instance according to this
1528 permutation). */
1529
1530 /* Check that all the load nodes are of the same size. */
01d8bf07 1531 /* ??? Can't we assert this? */
9771b263 1532 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1533 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1534 return false;
2200fc49 1535
b5aeb3bb 1536 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1537 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1538
b010117a 1539 /* Reduction (there are no data-refs in the root).
b266b968 1540 In reduction chain the order of the loads is not important. */
b010117a
IR
1541 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1542 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
c4e360f4 1543 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1544
6aa904c4
IR
1545 /* In basic block vectorization we allow any subchain of an interleaving
1546 chain.
1547 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1548 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1549 {
240a94da
RB
1550 /* Check whether the loads in an instance form a subchain and thus
1551 no permutation is necessary. */
9771b263 1552 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1553 {
9626d143
RB
1554 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1555 continue;
240a94da 1556 bool subchain_p = true;
6aa904c4 1557 next_load = NULL;
9771b263 1558 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1559 {
5b5826c4
RB
1560 if (j != 0
1561 && (next_load != load
1562 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1563 {
1564 subchain_p = false;
1565 break;
1566 }
6aa904c4
IR
1567 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1568 }
240a94da
RB
1569 if (subchain_p)
1570 SLP_TREE_LOAD_PERMUTATION (node).release ();
1571 else
1572 {
fe73a332
RB
1573 stmt_vec_info group_info
1574 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1575 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1576 unsigned nunits
1577 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1578 unsigned k, maxk = 0;
1579 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1580 if (k > maxk)
1581 maxk = k;
1582 /* In BB vectorization we may not actually use a loaded vector
1583 accessing elements in excess of GROUP_SIZE. */
1584 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1585 {
1586 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1587 "BB vectorization with gaps at the end of "
1588 "a load is not supported\n");
1589 return false;
1590 }
1591
240a94da
RB
1592 /* Verify the permutation can be generated. */
1593 vec<tree> tem;
29afecdf 1594 unsigned n_perms;
240a94da 1595 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1596 1, slp_instn, true, &n_perms))
240a94da
RB
1597 {
1598 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1599 vect_location,
1600 "unsupported load permutation\n");
1601 return false;
1602 }
1603 }
6aa904c4 1604 }
01d8bf07 1605 return true;
6aa904c4
IR
1606 }
1607
9b999e8c 1608 /* For loop vectorization verify we can generate the permutation. */
29afecdf 1609 unsigned n_perms;
01d8bf07
RB
1610 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1611 if (node->load_permutation.exists ()
1612 && !vect_transform_slp_perm_load
1613 (node, vNULL, NULL,
29afecdf
RB
1614 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
1615 &n_perms))
01d8bf07 1616 return false;
9b999e8c 1617
01d8bf07 1618 return true;
ebfd146a
IR
1619}
1620
1621
e4a707c4 1622/* Find the last store in SLP INSTANCE. */
ff802fa1 1623
64900538 1624gimple *
2e8ab70c 1625vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1626{
355fe088 1627 gimple *last = NULL, *stmt;
e4a707c4 1628
2e8ab70c
RB
1629 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1630 {
1631 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1632 if (is_pattern_stmt_p (stmt_vinfo))
1633 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1634 else
1635 last = get_later_stmt (stmt, last);
1636 }
e4a707c4 1637
2e8ab70c 1638 return last;
e4a707c4
IR
1639}
1640
23847df4
RB
1641/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1642
1643static void
1a4b99c1 1644vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
23847df4 1645 stmt_vector_for_cost *prologue_cost_vec,
1a4b99c1 1646 stmt_vector_for_cost *body_cost_vec,
23847df4
RB
1647 unsigned ncopies_for_cost)
1648{
603cca93 1649 unsigned i, j;
23847df4 1650 slp_tree child;
8155f4d8 1651 gimple *stmt;
23847df4
RB
1652 stmt_vec_info stmt_info;
1653 tree lhs;
23847df4
RB
1654
1655 /* Recurse down the SLP tree. */
1656 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
603cca93 1657 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1a4b99c1
RB
1658 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1659 body_cost_vec, ncopies_for_cost);
23847df4
RB
1660
1661 /* Look at the first scalar stmt to determine the cost. */
1662 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1663 stmt_info = vinfo_for_stmt (stmt);
1664 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1665 {
2de001ee
RS
1666 vect_memory_access_type memory_access_type
1667 = (STMT_VINFO_STRIDED_P (stmt_info)
1668 ? VMAT_STRIDED_SLP
1669 : VMAT_CONTIGUOUS);
23847df4 1670 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
2de001ee
RS
1671 vect_model_store_cost (stmt_info, ncopies_for_cost,
1672 memory_access_type, vect_uninitialized_def,
23847df4
RB
1673 node, prologue_cost_vec, body_cost_vec);
1674 else
1675 {
23847df4 1676 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
52eab378
RB
1677 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1678 {
8155f4d8
RB
1679 /* If the load is permuted then the alignment is determined by
1680 the first group element not by the first scalar stmt DR. */
52eab378
RB
1681 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1682 stmt_info = vinfo_for_stmt (stmt);
8155f4d8 1683 /* Record the cost for the permutation. */
29afecdf
RB
1684 unsigned n_perms;
1685 vect_transform_slp_perm_load (node, vNULL, NULL,
1686 ncopies_for_cost, instance, true,
1687 &n_perms);
1688 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
8155f4d8 1689 stmt_info, 0, vect_body);
8155f4d8
RB
1690 unsigned nunits
1691 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
29afecdf
RB
1692 /* And adjust the number of loads performed. This handles
1693 redundancies as well as loads that are later dead. */
1694 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1695 bitmap_clear (perm);
1696 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1697 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1698 ncopies_for_cost = 0;
1699 bool load_seen = false;
1700 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1701 {
1702 if (i % nunits == 0)
1703 {
1704 if (load_seen)
1705 ncopies_for_cost++;
1706 load_seen = false;
1707 }
1708 if (bitmap_bit_p (perm, i))
1709 load_seen = true;
1710 }
1711 if (load_seen)
1712 ncopies_for_cost++;
1713 gcc_assert (ncopies_for_cost
1714 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1715 + nunits - 1) / nunits);
8155f4d8 1716 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
52eab378 1717 }
8155f4d8 1718 /* Record the cost for the vector loads. */
2de001ee
RS
1719 vect_model_load_cost (stmt_info, ncopies_for_cost,
1720 memory_access_type, node, prologue_cost_vec,
1721 body_cost_vec);
89483f99 1722 return;
23847df4
RB
1723 }
1724 }
e7baeb39
RB
1725 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1726 {
1727 /* ncopies_for_cost is the number of IVs we generate. */
1728 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1729 stmt_info, 0, vect_body);
1730
1731 /* Prologue cost for the initial values and step vector. */
1732 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1733 CONSTANT_CLASS_P
1734 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1735 (stmt_info))
1736 ? vector_load : vec_construct,
1737 stmt_info, 0, vect_prologue);
1738 record_stmt_cost (prologue_cost_vec, 1,
1739 CONSTANT_CLASS_P
1740 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1741 ? vector_load : vec_construct,
1742 stmt_info, 0, vect_prologue);
1743
1744 /* ??? No easy way to get at the actual number of vector stmts
1745 to be geneated and thus the derived IVs. */
1746 }
89483f99 1747 else
6876e5bc
RB
1748 {
1749 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1750 stmt_info, 0, vect_body);
89483f99
RB
1751 if (SLP_TREE_TWO_OPERATORS (node))
1752 {
1753 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1754 stmt_info, 0, vect_body);
1755 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1756 stmt_info, 0, vect_body);
1757 }
6876e5bc 1758 }
23847df4 1759
603cca93
RB
1760 /* Push SLP node def-type to stmts. */
1761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1762 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1763 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1764 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1765
23847df4
RB
1766 /* Scan operands and account for prologue cost of constants/externals.
1767 ??? This over-estimates cost for multiple uses and should be
1768 re-engineered. */
603cca93 1769 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
23847df4
RB
1770 lhs = gimple_get_lhs (stmt);
1771 for (i = 0; i < gimple_num_ops (stmt); ++i)
1772 {
81c40241 1773 tree op = gimple_op (stmt, i);
355fe088 1774 gimple *def_stmt;
23847df4
RB
1775 enum vect_def_type dt;
1776 if (!op || op == lhs)
1777 continue;
81c40241 1778 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
2e8ab70c
RB
1779 {
1780 /* Without looking at the actual initializer a vector of
1781 constants can be implemented as load from the constant pool.
1782 ??? We need to pass down stmt_info for a vector type
1783 even if it points to the wrong stmt. */
1784 if (dt == vect_constant_def)
1785 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1786 stmt_info, 0, vect_prologue);
1787 else if (dt == vect_external_def)
1788 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1789 stmt_info, 0, vect_prologue);
1790 }
23847df4 1791 }
603cca93
RB
1792
1793 /* Restore stmt def-types. */
1794 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1795 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1796 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1797 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
23847df4
RB
1798}
1799
1800/* Compute the cost for the SLP instance INSTANCE. */
1801
1802static void
1a4b99c1 1803vect_analyze_slp_cost (slp_instance instance, void *data)
23847df4
RB
1804{
1805 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1806 unsigned ncopies_for_cost;
1807 stmt_info_for_cost *si;
1808 unsigned i;
1809
b939ea86
RB
1810 if (dump_enabled_p ())
1811 dump_printf_loc (MSG_NOTE, vect_location,
1812 "=== vect_analyze_slp_cost ===\n");
1813
23847df4
RB
1814 /* Calculate the number of vector stmts to create based on the unrolling
1815 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1816 GROUP_SIZE / NUNITS otherwise. */
1817 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1a4b99c1
RB
1818 slp_tree node = SLP_INSTANCE_TREE (instance);
1819 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1820 /* Adjust the group_size by the vectorization factor which is always one
1821 for basic-block vectorization. */
1822 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1823 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1824 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1825 /* For reductions look at a reduction operand in case the reduction
1826 operation is widening like DOT_PROD or SAD. */
1827 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1828 {
355fe088 1829 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1a4b99c1
RB
1830 switch (gimple_assign_rhs_code (stmt))
1831 {
1832 case DOT_PROD_EXPR:
1833 case SAD_EXPR:
1834 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1835 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1836 break;
1837 default:;
1838 }
1839 }
23847df4
RB
1840 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1841
1842 prologue_cost_vec.create (10);
1843 body_cost_vec.create (10);
1a4b99c1
RB
1844 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1845 &prologue_cost_vec, &body_cost_vec,
1846 ncopies_for_cost);
23847df4
RB
1847
1848 /* Record the prologue costs, which were delayed until we were
1a4b99c1 1849 sure that SLP was successful. */
23847df4
RB
1850 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1851 {
1852 struct _stmt_vec_info *stmt_info
1853 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1854 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1855 si->misalign, vect_prologue);
1856 }
1857
1a4b99c1
RB
1858 /* Record the instance's instructions in the target cost model. */
1859 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1860 {
1861 struct _stmt_vec_info *stmt_info
1862 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1863 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1864 si->misalign, vect_body);
1865 }
1866
23847df4 1867 prologue_cost_vec.release ();
1a4b99c1 1868 body_cost_vec.release ();
23847df4 1869}
e4a707c4 1870
1ba91a49
AL
1871/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1872 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1873 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1874 containing the remainder.
1875 Return the first stmt in the second group. */
1876
1877static gimple *
1878vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1879{
1880 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1881 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1882 gcc_assert (group1_size > 0);
1883 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1884 gcc_assert (group2_size > 0);
1885 GROUP_SIZE (first_vinfo) = group1_size;
1886
1887 gimple *stmt = first_stmt;
1888 for (unsigned i = group1_size; i > 1; i--)
1889 {
1890 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1891 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1892 }
1893 /* STMT is now the last element of the first group. */
1894 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1895 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1896
1897 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1898 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1899 {
1900 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1901 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1902 }
1903
1904 /* For the second group, the GROUP_GAP is that before the original group,
1905 plus skipping over the first vector. */
1906 GROUP_GAP (vinfo_for_stmt (group2)) =
1907 GROUP_GAP (first_vinfo) + group1_size;
1908
1909 /* GROUP_GAP of the first group now has to skip over the second group too. */
1910 GROUP_GAP (first_vinfo) += group2_size;
1911
1912 if (dump_enabled_p ())
1913 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1914 group1_size, group2_size);
1915
1916 return group2;
1917}
1918
0d0293ac 1919/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1920 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1921 Return FALSE if it's impossible to SLP any stmt in the loop. */
1922
1923static bool
310213d4 1924vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1925 gimple *stmt, unsigned max_tree_size)
ebfd146a
IR
1926{
1927 slp_instance new_instance;
d092494c 1928 slp_tree node;
e14c1050 1929 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1930 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1931 tree vectype, scalar_type = NULL_TREE;
355fe088 1932 gimple *next;
1ba91a49 1933 unsigned int i;
ebfd146a 1934 unsigned int max_nunits = 0;
9771b263 1935 vec<slp_tree> loads;
b5aeb3bb 1936 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
355fe088 1937 vec<gimple *> scalar_stmts;
b5aeb3bb 1938
b010117a 1939 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1940 {
b010117a
IR
1941 if (dr)
1942 {
1943 scalar_type = TREE_TYPE (DR_REF (dr));
1944 vectype = get_vectype_for_scalar_type (scalar_type);
1945 }
1946 else
1947 {
310213d4 1948 gcc_assert (is_a <loop_vec_info> (vinfo));
b010117a
IR
1949 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1950 }
1951
e14c1050 1952 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1953 }
1954 else
1955 {
310213d4 1956 gcc_assert (is_a <loop_vec_info> (vinfo));
b5aeb3bb 1957 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
310213d4 1958 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1959 }
b8698a0f 1960
ebfd146a
IR
1961 if (!vectype)
1962 {
73fbfcad 1963 if (dump_enabled_p ())
ebfd146a 1964 {
78c60e3d
SS
1965 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1966 "Build SLP failed: unsupported data-type ");
1967 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1968 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1969 }
b5aeb3bb 1970
ebfd146a
IR
1971 return false;
1972 }
ebfd146a 1973 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1974
0d0293ac 1975 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1976 scalar_stmts.create (group_size);
ebfd146a 1977 next = stmt;
b010117a 1978 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1979 {
b5aeb3bb
IR
1980 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1981 while (next)
1982 {
f7e531cf
IR
1983 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1984 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1985 scalar_stmts.safe_push (
1986 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1987 else
9771b263 1988 scalar_stmts.safe_push (next);
e14c1050 1989 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 1990 }
14a61437
RB
1991 /* Mark the first element of the reduction chain as reduction to properly
1992 transform the node. In the reduction analysis phase only the last
1993 element of the chain is marked as reduction. */
1994 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1995 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
1996 }
1997 else
1998 {
1999 /* Collect reduction statements. */
310213d4 2000 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
9771b263
DN
2001 for (i = 0; reductions.iterate (i, &next); i++)
2002 scalar_stmts.safe_push (next);
ebfd146a
IR
2003 }
2004
9771b263 2005 loads.create (group_size);
ebfd146a
IR
2006
2007 /* Build the tree for the SLP instance. */
89d390e5
RB
2008 bool *matches = XALLOCAVEC (bool, group_size);
2009 unsigned npermutes = 0;
26d66f28 2010 bst_fail = new hash_set <vec <gimple *>, bst_traits> ();
e569db5f 2011 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
e403d17e 2012 &max_nunits, &loads, matches, &npermutes,
e569db5f 2013 NULL, max_tree_size);
26d66f28 2014 delete bst_fail;
e569db5f 2015 if (node != NULL)
ebfd146a 2016 {
4ef69dfc 2017 /* Calculate the unrolling factor based on the smallest type. */
e569db5f
VK
2018 unrolling_factor
2019 = least_common_multiple (max_nunits, group_size) / group_size;
b8698a0f 2020
e569db5f
VK
2021 if (unrolling_factor != 1
2022 && is_a <bb_vec_info> (vinfo))
2023 {
2024
2025 if (max_nunits > group_size)
4ef69dfc 2026 {
e645e942 2027 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e569db5f
VK
2028 "Build SLP failed: store group "
2029 "size not a multiple of the vector size "
2030 "in basic block SLP\n");
c7e62a26 2031 vect_free_slp_tree (node);
9771b263 2032 loads.release ();
4ef69dfc
IR
2033 return false;
2034 }
e569db5f
VK
2035 /* Fatal mismatch. */
2036 matches[group_size/max_nunits * max_nunits] = false;
2037 vect_free_slp_tree (node);
2038 loads.release ();
2039 }
2040 else
2041 {
4ef69dfc
IR
2042 /* Create a new SLP instance. */
2043 new_instance = XNEW (struct _slp_instance);
2044 SLP_INSTANCE_TREE (new_instance) = node;
2045 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 2046 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 2047 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
2048
2049 /* Compute the load permutation. */
2050 slp_tree load_node;
2051 bool loads_permuted = false;
abf9bfbc
RB
2052 FOR_EACH_VEC_ELT (loads, i, load_node)
2053 {
01d8bf07 2054 vec<unsigned> load_permutation;
abf9bfbc 2055 int j;
355fe088 2056 gimple *load, *first_stmt;
01d8bf07
RB
2057 bool this_load_permuted = false;
2058 load_permutation.create (group_size);
6983e6b5
RB
2059 first_stmt = GROUP_FIRST_ELEMENT
2060 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
2061 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
2062 {
e569db5f
VK
2063 int load_place = vect_get_place_in_interleaving_chain
2064 (load, first_stmt);
6983e6b5
RB
2065 gcc_assert (load_place != -1);
2066 if (load_place != j)
01d8bf07 2067 this_load_permuted = true;
abf9bfbc
RB
2068 load_permutation.safe_push (load_place);
2069 }
fe2bef71
RB
2070 if (!this_load_permuted
2071 /* The load requires permutation when unrolling exposes
2072 a gap either because the group is larger than the SLP
2073 group-size or because there is a gap between the groups. */
2074 && (unrolling_factor == 1
2075 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2076 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
2077 {
2078 load_permutation.release ();
2079 continue;
2080 }
2081 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2082 loads_permuted = true;
abf9bfbc 2083 }
6aa904c4
IR
2084
2085 if (loads_permuted)
ebfd146a 2086 {
01d8bf07 2087 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 2088 {
73fbfcad 2089 if (dump_enabled_p ())
ebfd146a 2090 {
e645e942 2091 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
2092 "Build SLP failed: unsupported load "
2093 "permutation ");
e569db5f
VK
2094 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2095 TDF_SLIM, stmt, 0);
ebfd146a 2096 }
ebfd146a
IR
2097 vect_free_slp_instance (new_instance);
2098 return false;
2099 }
ebfd146a 2100 }
ebfd146a 2101
e569db5f 2102 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
2103 instructions do not generate this SLP instance. */
2104 if (is_a <loop_vec_info> (vinfo)
2105 && loads_permuted
2106 && dr && vect_store_lanes_supported (vectype, group_size))
2107 {
2108 slp_tree load_node;
2109 FOR_EACH_VEC_ELT (loads, i, load_node)
2110 {
2111 gimple *first_stmt = GROUP_FIRST_ELEMENT
2112 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2113 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
e569db5f
VK
2114 /* Use SLP for strided accesses (or if we
2115 can't load-lanes). */
bb0f5ca7
AL
2116 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2117 || ! vect_load_lanes_supported
2118 (STMT_VINFO_VECTYPE (stmt_vinfo),
2119 GROUP_SIZE (stmt_vinfo)))
2120 break;
2121 }
2122 if (i == loads.length ())
2123 {
2124 if (dump_enabled_p ())
2125 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2126 "Built SLP cancelled: can use "
2127 "load/store-lanes\n");
2128 vect_free_slp_instance (new_instance);
2129 return false;
2130 }
2131 }
2132
310213d4 2133 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 2134
73fbfcad 2135 if (dump_enabled_p ())
c2a12ca0
RB
2136 {
2137 dump_printf_loc (MSG_NOTE, vect_location,
2138 "Final SLP tree for instance:\n");
2139 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2140 }
ebfd146a
IR
2141
2142 return true;
2143 }
e569db5f
VK
2144 }
2145 else
2146 {
ebfd146a
IR
2147 /* Failed to SLP. */
2148 /* Free the allocated memory. */
e403d17e 2149 scalar_stmts.release ();
9771b263 2150 loads.release ();
e569db5f 2151 }
b8698a0f 2152
1ba91a49 2153 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2154 vector size. */
1ba91a49
AL
2155 if (is_a <bb_vec_info> (vinfo)
2156 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2157 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2158 {
2159 /* We consider breaking the group only on VF boundaries from the existing
2160 start. */
2161 for (i = 0; i < group_size; i++)
2162 if (!matches[i]) break;
2163
97a1a642 2164 if (i >= nunits && i < group_size)
1ba91a49
AL
2165 {
2166 /* Split into two groups at the first vector boundary before i. */
97a1a642
RB
2167 gcc_assert ((nunits & (nunits - 1)) == 0);
2168 unsigned group1_size = i & ~(nunits - 1);
1ba91a49
AL
2169
2170 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2171 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2172 /* If the first non-match was in the middle of a vector,
2173 skip the rest of that vector. */
2174 if (group1_size < i)
2175 {
97a1a642 2176 i = group1_size + nunits;
1ba91a49 2177 if (i < group_size)
97a1a642 2178 rest = vect_split_slp_store_group (rest, nunits);
1ba91a49
AL
2179 }
2180 if (i < group_size)
2181 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2182 return res;
2183 }
2184 /* Even though the first vector did not all match, we might be able to SLP
2185 (some) of the remainder. FORNOW ignore this possibility. */
2186 }
2187
a70d6342 2188 return false;
ebfd146a
IR
2189}
2190
2191
ff802fa1 2192/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2193 trees of packed scalar stmts if SLP is possible. */
2194
2195bool
310213d4 2196vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2197{
2198 unsigned int i;
355fe088 2199 gimple *first_element;
ebfd146a 2200
73fbfcad 2201 if (dump_enabled_p ())
e645e942 2202 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
ebfd146a 2203
0d0293ac 2204 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2205 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2206 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2207
310213d4 2208 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2209 {
310213d4
RB
2210 if (loop_vinfo->reduction_chains.length () > 0)
2211 {
2212 /* Find SLP sequences starting from reduction chains. */
2213 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2214 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2215 max_tree_size))
6b5e165b
RB
2216 {
2217 /* Dissolve reduction chain group. */
2218 gimple *next, *stmt = first_element;
2219 while (stmt)
2220 {
2221 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2222 next = GROUP_NEXT_ELEMENT (vinfo);
2223 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2224 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2225 stmt = next;
2226 }
2227 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2228 = vect_internal_def;
2229 }
310213d4 2230 }
b010117a 2231
310213d4 2232 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2233 if (loop_vinfo->reductions.length () > 1)
2234 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2235 max_tree_size);
310213d4 2236 }
b5aeb3bb 2237
ebfd146a
IR
2238 return true;
2239}
2240
2241
2242/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2243 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2244 least one instance. */
ebfd146a 2245
437f4a00 2246bool
ebfd146a
IR
2247vect_make_slp_decision (loop_vec_info loop_vinfo)
2248{
2249 unsigned int i, unrolling_factor = 1;
9771b263 2250 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2251 slp_instance instance;
2252 int decided_to_slp = 0;
2253
73fbfcad 2254 if (dump_enabled_p ())
e645e942
TJ
2255 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2256 "\n");
ebfd146a 2257
9771b263 2258 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2259 {
2260 /* FORNOW: SLP if you can. */
2261 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2262 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2263
ff802fa1 2264 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2265 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2266 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2267 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2268 decided_to_slp++;
2269 }
2270
2271 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2272
73fbfcad 2273 if (decided_to_slp && dump_enabled_p ())
ccb3ad87 2274 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2275 "Decided to SLP %d instances. Unrolling factor %d\n",
78c60e3d 2276 decided_to_slp, unrolling_factor);
437f4a00
IR
2277
2278 return (decided_to_slp > 0);
ebfd146a
IR
2279}
2280
2281
2282/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2283 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2284
2285static void
642fce57 2286vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2287{
355fe088 2288 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2289 imm_use_iterator imm_iter;
355fe088 2290 gimple *use_stmt;
642fce57 2291 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 2292 slp_tree child;
f2c74cc4 2293 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2294 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2295 int j;
2296
2297 /* Propagate hybrid down the SLP tree. */
2298 if (stype == hybrid)
2299 ;
2300 else if (HYBRID_SLP_STMT (stmt_vinfo))
2301 stype = hybrid;
2302 else
2303 {
2304 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2305 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2306 /* If we get a pattern stmt here we have to use the LHS of the
2307 original stmt for immediate uses. */
2308 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2309 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
29764870 2310 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
e7baeb39
RB
2311 tree def;
2312 if (gimple_code (stmt) == GIMPLE_PHI)
2313 def = gimple_phi_result (stmt);
2314 else
2315 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2316 if (def)
2317 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870
RB
2318 {
2319 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2320 continue;
2321 use_vinfo = vinfo_for_stmt (use_stmt);
2322 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2323 && STMT_VINFO_RELATED_STMT (use_vinfo))
2324 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2325 if (!STMT_SLP_TYPE (use_vinfo)
2326 && (STMT_VINFO_RELEVANT (use_vinfo)
2327 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2328 && !(gimple_code (use_stmt) == GIMPLE_PHI
2329 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2330 {
2331 if (dump_enabled_p ())
2332 {
2333 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2334 "def in non-SLP stmt: ");
2335 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2336 }
2337 stype = hybrid;
2338 }
29764870 2339 }
642fce57 2340 }
ebfd146a 2341
502f0263
RB
2342 if (stype == hybrid
2343 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2344 {
2345 if (dump_enabled_p ())
2346 {
2347 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2348 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2349 }
2350 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2351 }
ebfd146a 2352
642fce57 2353 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2354 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2355 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2356}
f2c74cc4 2357
642fce57 2358/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2359
642fce57
RB
2360static tree
2361vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2362{
2363 walk_stmt_info *wi = (walk_stmt_info *)data;
2364 struct loop *loopp = (struct loop *)wi->info;
2365
2366 if (wi->is_lhs)
2367 return NULL_TREE;
2368
2369 if (TREE_CODE (*tp) == SSA_NAME
2370 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2371 {
355fe088 2372 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
642fce57
RB
2373 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2374 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
b1af7da6
RB
2375 {
2376 if (dump_enabled_p ())
2377 {
2378 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2379 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2380 }
2381 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2382 }
642fce57
RB
2383 }
2384
2385 return NULL_TREE;
ebfd146a
IR
2386}
2387
642fce57
RB
2388static tree
2389vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2390 walk_stmt_info *)
2391{
9186a9d3 2392 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
642fce57
RB
2393 /* If the stmt is in a SLP instance then this isn't a reason
2394 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2395 if (! STMT_SLP_TYPE (use_vinfo)
2396 && (STMT_VINFO_RELEVANT (use_vinfo)
2397 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2398 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2399 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2400 ;
2401 else
642fce57
RB
2402 *handled = true;
2403 return NULL_TREE;
2404}
ebfd146a
IR
2405
2406/* Find stmts that must be both vectorized and SLPed. */
2407
2408void
2409vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2410{
2411 unsigned int i;
9771b263 2412 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2413 slp_instance instance;
2414
73fbfcad 2415 if (dump_enabled_p ())
e645e942
TJ
2416 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2417 "\n");
ebfd146a 2418
642fce57
RB
2419 /* First walk all pattern stmt in the loop and mark defs of uses as
2420 hybrid because immediate uses in them are not recorded. */
2421 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2422 {
2423 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2424 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2425 gsi_next (&gsi))
2426 {
355fe088 2427 gimple *stmt = gsi_stmt (gsi);
642fce57
RB
2428 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2429 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2430 {
2431 walk_stmt_info wi;
2432 memset (&wi, 0, sizeof (wi));
2433 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2434 gimple_stmt_iterator gsi2
2435 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2436 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2437 vect_detect_hybrid_slp_1, &wi);
2438 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2439 vect_detect_hybrid_slp_2,
2440 vect_detect_hybrid_slp_1, &wi);
2441 }
2442 }
2443 }
2444
2445 /* Then walk the SLP instance trees marking stmts with uses in
2446 non-SLP stmts as hybrid, also propagating hybrid down the
2447 SLP tree, collecting the above info on-the-fly. */
9771b263 2448 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2449 {
2450 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2451 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2452 i, pure_slp);
2453 }
ebfd146a
IR
2454}
2455
a70d6342 2456
2c515559
RS
2457/* Initialize a bb_vec_info struct for the statements between
2458 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2459
2460_bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2461 gimple_stmt_iterator region_end_in)
2462 : vec_info (vec_info::bb, init_cost (NULL)),
2463 bb (gsi_bb (region_begin_in)),
2464 region_begin (region_begin_in),
2465 region_end (region_end_in)
a70d6342 2466{
a70d6342
IR
2467 gimple_stmt_iterator gsi;
2468
61d371eb
RB
2469 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2470 gsi_next (&gsi))
a70d6342 2471 {
355fe088 2472 gimple *stmt = gsi_stmt (gsi);
a70d6342 2473 gimple_set_uid (stmt, 0);
2c515559 2474 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
a70d6342
IR
2475 }
2476
2c515559 2477 bb->aux = this;
a70d6342
IR
2478}
2479
2480
2481/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2482 stmts in the basic block. */
2483
2c515559 2484_bb_vec_info::~_bb_vec_info ()
a70d6342 2485{
2c515559
RS
2486 for (gimple_stmt_iterator si = region_begin;
2487 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
a70d6342 2488 {
355fe088 2489 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2490 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2491
2492 if (stmt_info)
2493 /* Free stmt_vec_info. */
2494 free_stmt_vec_info (stmt);
61d371eb
RB
2495
2496 /* Reset region marker. */
2497 gimple_set_uid (stmt, -1);
a70d6342
IR
2498 }
2499
2c515559 2500 bb->aux = NULL;
a70d6342
IR
2501}
2502
2503
2504/* Analyze statements contained in SLP tree node after recursively analyzing
2505 the subtree. Return TRUE if the operations are supported. */
2506
2507static bool
891ad31c 2508vect_slp_analyze_node_operations (slp_tree node, slp_instance node_instance)
a70d6342
IR
2509{
2510 bool dummy;
603cca93 2511 int i, j;
355fe088 2512 gimple *stmt;
d755c7ef 2513 slp_tree child;
a70d6342 2514
603cca93 2515 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
2516 return true;
2517
9771b263 2518 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
891ad31c 2519 if (!vect_slp_analyze_node_operations (child, node_instance))
d092494c 2520 return false;
a70d6342 2521
bd2f172f
RB
2522 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2523 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2524 gcc_assert (stmt_info);
2525 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2526
2527 /* For BB vectorization vector types are assigned here.
2528 Memory accesses already got their vector type assigned
2529 in vect_analyze_data_refs. */
2530 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2531 if (bb_vinfo
2532 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2533 {
bd2f172f
RB
2534 gcc_assert (PURE_SLP_STMT (stmt_info));
2535
2536 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2537 if (dump_enabled_p ())
2538 {
2539 dump_printf_loc (MSG_NOTE, vect_location,
2540 "get vectype for scalar type: ");
2541 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2542 dump_printf (MSG_NOTE, "\n");
2543 }
2544
2545 tree vectype = get_vectype_for_scalar_type (scalar_type);
2546 if (!vectype)
2547 {
2548 if (dump_enabled_p ())
2549 {
2550 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2551 "not SLPed: unsupported data-type ");
2552 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2553 scalar_type);
2554 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2555 }
2556 return false;
2557 }
2558
2559 if (dump_enabled_p ())
2560 {
2561 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2562 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2563 dump_printf (MSG_NOTE, "\n");
2564 }
2565
2566 gimple *sstmt;
2567 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2568 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
a70d6342
IR
2569 }
2570
bd2f172f
RB
2571 /* Push SLP node def-type to stmt operands. */
2572 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2573 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2574 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2575 = SLP_TREE_DEF_TYPE (child);
891ad31c 2576 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
bd2f172f
RB
2577 /* Restore def-types. */
2578 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2579 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2580 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2581 = vect_internal_def;
2582 if (! res)
2583 return false;
2584
2585 return true;
a70d6342
IR
2586}
2587
2588
ff802fa1 2589/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
2590 operations are supported. */
2591
a12e42fc 2592bool
1a4b99c1 2593vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
a70d6342 2594{
a70d6342
IR
2595 slp_instance instance;
2596 int i;
2597
a12e42fc
RB
2598 if (dump_enabled_p ())
2599 dump_printf_loc (MSG_NOTE, vect_location,
2600 "=== vect_slp_analyze_operations ===\n");
2601
9771b263 2602 for (i = 0; slp_instances.iterate (i, &instance); )
a70d6342 2603 {
891ad31c
RB
2604 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance),
2605 instance))
a70d6342 2606 {
a12e42fc
RB
2607 dump_printf_loc (MSG_NOTE, vect_location,
2608 "removing SLP instance operations starting from: ");
2609 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2610 SLP_TREE_SCALAR_STMTS
2611 (SLP_INSTANCE_TREE (instance))[0], 0);
2612 vect_free_slp_instance (instance);
9771b263 2613 slp_instances.ordered_remove (i);
a70d6342
IR
2614 }
2615 else
1a4b99c1
RB
2616 {
2617 /* Compute the costs of the SLP instance. */
2618 vect_analyze_slp_cost (instance, data);
2619 i++;
2620 }
b8698a0f
L
2621 }
2622
9771b263 2623 if (!slp_instances.length ())
a70d6342
IR
2624 return false;
2625
2626 return true;
2627}
2628
6eddf228
RB
2629
2630/* Compute the scalar cost of the SLP node NODE and its children
2631 and return it. Do not account defs that are marked in LIFE and
2632 update LIFE according to uses of NODE. */
2633
2634static unsigned
292cba13 2635vect_bb_slp_scalar_cost (basic_block bb,
ff4c81cc 2636 slp_tree node, vec<bool, va_heap> *life)
6eddf228
RB
2637{
2638 unsigned scalar_cost = 0;
2639 unsigned i;
355fe088 2640 gimple *stmt;
6eddf228
RB
2641 slp_tree child;
2642
2643 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2644 {
2645 unsigned stmt_cost;
2646 ssa_op_iter op_iter;
2647 def_operand_p def_p;
2648 stmt_vec_info stmt_info;
2649
ff4c81cc 2650 if ((*life)[i])
6eddf228
RB
2651 continue;
2652
2653 /* If there is a non-vectorized use of the defs then the scalar
2654 stmt is kept live in which case we do not account it or any
2655 required defs in the SLP children in the scalar cost. This
2656 way we make the vectorization more costly when compared to
2657 the scalar cost. */
2658 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2659 {
2660 imm_use_iterator use_iter;
355fe088 2661 gimple *use_stmt;
6eddf228 2662 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2663 if (!is_gimple_debug (use_stmt)
61d371eb
RB
2664 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2665 use_stmt)
603cca93 2666 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
6eddf228 2667 {
ff4c81cc 2668 (*life)[i] = true;
6eddf228
RB
2669 BREAK_FROM_IMM_USE_STMT (use_iter);
2670 }
2671 }
ff4c81cc 2672 if ((*life)[i])
6eddf228
RB
2673 continue;
2674
b555a2e4
RB
2675 /* Count scalar stmts only once. */
2676 if (gimple_visited_p (stmt))
2677 continue;
2678 gimple_set_visited (stmt, true);
2679
6eddf228
RB
2680 stmt_info = vinfo_for_stmt (stmt);
2681 if (STMT_VINFO_DATA_REF (stmt_info))
2682 {
2683 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2684 stmt_cost = vect_get_stmt_cost (scalar_load);
2685 else
2686 stmt_cost = vect_get_stmt_cost (scalar_store);
2687 }
2688 else
2689 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2690
2691 scalar_cost += stmt_cost;
2692 }
2693
faa5399b 2694 auto_vec<bool, 20> subtree_life;
6eddf228 2695 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
faa5399b
RB
2696 {
2697 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2698 {
2699 /* Do not directly pass LIFE to the recursive call, copy it to
2700 confine changes in the callee to the current child/subtree. */
2701 subtree_life.safe_splice (*life);
2702 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life);
2703 subtree_life.truncate (0);
2704 }
2705 }
6eddf228
RB
2706
2707 return scalar_cost;
2708}
2709
69f11a13
IR
2710/* Check if vectorization of the basic block is profitable. */
2711
2712static bool
2713vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2714{
9771b263 2715 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2716 slp_instance instance;
1a4b99c1 2717 int i;
c3e7ee41 2718 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2719 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2720
2721 /* Calculate scalar cost. */
6eddf228 2722 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2723 {
00f96dc9 2724 auto_vec<bool, 20> life;
ff4c81cc 2725 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
292cba13
RB
2726 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2727 SLP_INSTANCE_TREE (instance),
ff4c81cc 2728 &life);
69f11a13
IR
2729 }
2730
b555a2e4
RB
2731 /* Unset visited flag. */
2732 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2733 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2734 gimple_set_visited (gsi_stmt (gsi), false);
2735
c3e7ee41 2736 /* Complete the target-specific cost calculation. */
92345349
BS
2737 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2738 &vec_inside_cost, &vec_epilogue_cost);
2739
2740 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2741
73fbfcad 2742 if (dump_enabled_p ())
69f11a13 2743 {
78c60e3d
SS
2744 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2745 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2746 vec_inside_cost);
2747 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2748 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2749 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2750 }
2751
a6524bba
RB
2752 /* Vectorization is profitable if its cost is more than the cost of scalar
2753 version. Note that we err on the vector side for equal cost because
2754 the cost estimate is otherwise quite pessimistic (constant uses are
2755 free on the scalar side but cost a load on the vector side for
2756 example). */
2757 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2758 return false;
2759
2760 return true;
2761}
2762
a5b50aa1
RB
2763/* Check if the basic block can be vectorized. Returns a bb_vec_info
2764 if so and sets fatal to true if failure is independent of
2765 current_vector_size. */
a70d6342 2766
8e19f5a1 2767static bb_vec_info
61d371eb
RB
2768vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2769 gimple_stmt_iterator region_end,
a5b50aa1
RB
2770 vec<data_reference_p> datarefs, int n_stmts,
2771 bool &fatal)
a70d6342
IR
2772{
2773 bb_vec_info bb_vinfo;
a70d6342 2774 slp_instance instance;
8e19f5a1 2775 int i;
777e1f09 2776 int min_vf = 2;
e4a707c4 2777
a5b50aa1
RB
2778 /* The first group of checks is independent of the vector size. */
2779 fatal = true;
2780
61d371eb
RB
2781 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2782 {
2783 if (dump_enabled_p ())
2784 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2785 "not vectorized: too many instructions in "
2786 "basic block.\n");
2787 free_data_refs (datarefs);
2788 return NULL;
2789 }
2790
2c515559 2791 bb_vinfo = new _bb_vec_info (region_begin, region_end);
a70d6342
IR
2792 if (!bb_vinfo)
2793 return NULL;
2794
61d371eb 2795 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
428db0ba
RB
2796
2797 /* Analyze the data references. */
2798
2799 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2800 {
73fbfcad 2801 if (dump_enabled_p ())
78c60e3d
SS
2802 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2803 "not vectorized: unhandled data-ref in basic "
2804 "block.\n");
b8698a0f 2805
2c515559 2806 delete bb_vinfo;
a70d6342
IR
2807 return NULL;
2808 }
2809
fcac74a1 2810 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2811 {
73fbfcad 2812 if (dump_enabled_p ())
78c60e3d
SS
2813 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2814 "not vectorized: not enough data-refs in "
2815 "basic block.\n");
a70d6342 2816
2c515559 2817 delete bb_vinfo;
a70d6342
IR
2818 return NULL;
2819 }
2820
310213d4 2821 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2822 {
2823 if (dump_enabled_p ())
2824 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2825 "not vectorized: unhandled data access in "
2826 "basic block.\n");
2827
2c515559 2828 delete bb_vinfo;
5abe1e05
RB
2829 return NULL;
2830 }
2831
a5b50aa1
RB
2832 /* If there are no grouped stores in the region there is no need
2833 to continue with pattern recog as vect_analyze_slp will fail
2834 anyway. */
2835 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2836 {
73fbfcad 2837 if (dump_enabled_p ())
a5b50aa1
RB
2838 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2839 "not vectorized: no grouped stores in "
2840 "basic block.\n");
b8698a0f 2841
2c515559 2842 delete bb_vinfo;
a70d6342
IR
2843 return NULL;
2844 }
b8698a0f 2845
a5b50aa1
RB
2846 /* While the rest of the analysis below depends on it in some way. */
2847 fatal = false;
2848
2849 vect_pattern_recog (bb_vinfo);
2850
a70d6342
IR
2851 /* Check the SLP opportunities in the basic block, analyze and build SLP
2852 trees. */
310213d4 2853 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2854 {
73fbfcad 2855 if (dump_enabled_p ())
effb52da
RB
2856 {
2857 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2858 "Failed to SLP the basic block.\n");
2859 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2860 "not vectorized: failed to find SLP opportunities "
2861 "in basic block.\n");
2862 }
a70d6342 2863
2c515559 2864 delete bb_vinfo;
a70d6342
IR
2865 return NULL;
2866 }
b8698a0f 2867
62c8a2cf
RS
2868 vect_record_base_alignments (bb_vinfo);
2869
c2a12ca0
RB
2870 /* Analyze and verify the alignment of data references and the
2871 dependence in the SLP instances. */
a5b50aa1
RB
2872 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2873 {
c2a12ca0
RB
2874 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2875 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1
RB
2876 {
2877 dump_printf_loc (MSG_NOTE, vect_location,
2878 "removing SLP instance operations starting from: ");
2879 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2880 SLP_TREE_SCALAR_STMTS
2881 (SLP_INSTANCE_TREE (instance))[0], 0);
2882 vect_free_slp_instance (instance);
2883 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2884 continue;
2885 }
c2a12ca0
RB
2886
2887 /* Mark all the statements that we want to vectorize as pure SLP and
2888 relevant. */
2889 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2890 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2891
a5b50aa1
RB
2892 i++;
2893 }
a5b50aa1
RB
2894 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2895 {
2c515559 2896 delete bb_vinfo;
a5b50aa1
RB
2897 return NULL;
2898 }
2899
1a4b99c1
RB
2900 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2901 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
a70d6342 2902 {
73fbfcad 2903 if (dump_enabled_p ())
e645e942 2904 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2905 "not vectorized: bad operation in basic block.\n");
a70d6342 2906
2c515559 2907 delete bb_vinfo;
a70d6342
IR
2908 return NULL;
2909 }
2910
69f11a13 2911 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2912 if (!unlimited_cost_model (NULL)
69f11a13
IR
2913 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2914 {
73fbfcad 2915 if (dump_enabled_p ())
78c60e3d
SS
2916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2917 "not vectorized: vectorization is not "
2918 "profitable.\n");
69f11a13 2919
2c515559 2920 delete bb_vinfo;
69f11a13
IR
2921 return NULL;
2922 }
2923
73fbfcad 2924 if (dump_enabled_p ())
78c60e3d
SS
2925 dump_printf_loc (MSG_NOTE, vect_location,
2926 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2927
2928 return bb_vinfo;
2929}
2930
2931
428db0ba
RB
2932/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2933 true if anything in the basic-block was vectorized. */
2934
2935bool
2936vect_slp_bb (basic_block bb)
8e19f5a1
IR
2937{
2938 bb_vec_info bb_vinfo;
8e19f5a1
IR
2939 gimple_stmt_iterator gsi;
2940 unsigned int vector_sizes;
61d371eb 2941 bool any_vectorized = false;
8e19f5a1 2942
73fbfcad 2943 if (dump_enabled_p ())
78c60e3d 2944 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1 2945
8e19f5a1
IR
2946 /* Autodetect first vector size we try. */
2947 current_vector_size = 0;
2948 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2949
61d371eb
RB
2950 gsi = gsi_start_bb (bb);
2951
8e19f5a1
IR
2952 while (1)
2953 {
61d371eb
RB
2954 if (gsi_end_p (gsi))
2955 break;
2956
2957 gimple_stmt_iterator region_begin = gsi;
2958 vec<data_reference_p> datarefs = vNULL;
2959 int insns = 0;
2960
2961 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 2962 {
61d371eb
RB
2963 gimple *stmt = gsi_stmt (gsi);
2964 if (is_gimple_debug (stmt))
2965 continue;
2966 insns++;
2967
2968 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2969 vect_location = gimple_location (stmt);
2970
2971 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2972 break;
2973 }
2974
2975 /* Skip leading unhandled stmts. */
2976 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2977 {
2978 gsi_next (&gsi);
2979 continue;
2980 }
428db0ba 2981
61d371eb
RB
2982 gimple_stmt_iterator region_end = gsi;
2983
2984 bool vectorized = false;
a5b50aa1 2985 bool fatal = false;
61d371eb 2986 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
a5b50aa1 2987 datarefs, insns, fatal);
61d371eb
RB
2988 if (bb_vinfo
2989 && dbg_cnt (vect_slp))
2990 {
428db0ba 2991 if (dump_enabled_p ())
61d371eb 2992 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba
RB
2993
2994 vect_schedule_slp (bb_vinfo);
2995
2996 if (dump_enabled_p ())
2997 dump_printf_loc (MSG_NOTE, vect_location,
61d371eb 2998 "basic block part vectorized\n");
428db0ba 2999
61d371eb 3000 vectorized = true;
428db0ba 3001 }
2c515559 3002 delete bb_vinfo;
8e19f5a1 3003
61d371eb 3004 any_vectorized |= vectorized;
8e19f5a1
IR
3005
3006 vector_sizes &= ~current_vector_size;
61d371eb
RB
3007 if (vectorized
3008 || vector_sizes == 0
a5b50aa1
RB
3009 || current_vector_size == 0
3010 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3011 vector sizes will fail do not bother iterating. */
3012 || fatal)
61d371eb
RB
3013 {
3014 if (gsi_end_p (region_end))
3015 break;
8e19f5a1 3016
61d371eb
RB
3017 /* Skip the unhandled stmt. */
3018 gsi_next (&gsi);
3019
3020 /* And reset vector sizes. */
3021 current_vector_size = 0;
3022 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
3023 }
3024 else
3025 {
3026 /* Try the next biggest vector size. */
3027 current_vector_size = 1 << floor_log2 (vector_sizes);
3028 if (dump_enabled_p ())
3029 dump_printf_loc (MSG_NOTE, vect_location,
3030 "***** Re-trying analysis with "
3031 "vector size %d\n", current_vector_size);
3032
3033 /* Start over. */
3034 gsi = region_begin;
3035 }
8e19f5a1 3036 }
61d371eb
RB
3037
3038 return any_vectorized;
8e19f5a1
IR
3039}
3040
3041
e4af0bc4
IE
3042/* Return 1 if vector type of boolean constant which is OPNUM
3043 operand in statement STMT is a boolean vector. */
3044
3045static bool
3046vect_mask_constant_operand_p (gimple *stmt, int opnum)
3047{
3048 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3049 enum tree_code code = gimple_expr_code (stmt);
3050 tree op, vectype;
3051 gimple *def_stmt;
3052 enum vect_def_type dt;
3053
3054 /* For comparison and COND_EXPR type is chosen depending
3055 on the other comparison operand. */
3056 if (TREE_CODE_CLASS (code) == tcc_comparison)
3057 {
3058 if (opnum)
3059 op = gimple_assign_rhs1 (stmt);
3060 else
3061 op = gimple_assign_rhs2 (stmt);
3062
3063 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3064 &dt, &vectype))
3065 gcc_unreachable ();
3066
3067 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3068 }
3069
3070 if (code == COND_EXPR)
3071 {
3072 tree cond = gimple_assign_rhs1 (stmt);
3073
3074 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3075 op = cond;
3076 else if (opnum)
e4af0bc4
IE
3077 op = TREE_OPERAND (cond, 1);
3078 else
3079 op = TREE_OPERAND (cond, 0);
3080
3081 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3082 &dt, &vectype))
3083 gcc_unreachable ();
3084
3085 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3086 }
3087
3088 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3089}
3090
3091
b8698a0f
L
3092/* For constant and loop invariant defs of SLP_NODE this function returns
3093 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3094 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3095 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3096 REDUC_INDEX is the index of the reduction operand in the statements, unless
3097 it is -1. */
ebfd146a
IR
3098
3099static void
9dc3f7de 3100vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3101 vec<tree> *vec_oprnds,
306b0c92 3102 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3103{
355fe088
TS
3104 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3105 gimple *stmt = stmts[0];
ebfd146a 3106 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 3107 unsigned nunits;
ebfd146a 3108 tree vec_cst;
d2a12ae7 3109 unsigned j, number_of_places_left_in_vector;
ebfd146a 3110 tree vector_type;
9dc3f7de 3111 tree vop;
9771b263 3112 int group_size = stmts.length ();
ebfd146a 3113 unsigned int vec_num, i;
d2a12ae7 3114 unsigned number_of_copies = 1;
9771b263
DN
3115 vec<tree> voprnds;
3116 voprnds.create (number_of_vectors);
ebfd146a 3117 bool constant_p, is_store;
b5aeb3bb 3118 tree neutral_op = NULL;
bac430c9 3119 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3120 gimple_seq ctor_seq = NULL;
b5aeb3bb 3121
42fd8198 3122 /* Check if vector type is a boolean vector. */
2568d8a1 3123 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
e4af0bc4 3124 && vect_mask_constant_operand_p (stmt, op_num))
42fd8198
IE
3125 vector_type
3126 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3127 else
3128 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325
RB
3129 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3130
ebfd146a
IR
3131 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3132 {
3133 is_store = true;
3134 op = gimple_assign_rhs1 (stmt);
3135 }
3136 else
9dc3f7de
IR
3137 is_store = false;
3138
3139 gcc_assert (op);
ebfd146a 3140
ebfd146a 3141 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3142 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3143
3144 For example, we have two scalar operands, s1 and s2 (e.g., group of
3145 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3146 of this type can be packed in a vector). The output vector will contain
3147 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3148 will be 2).
3149
b8698a0f 3150 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3151 containing the operands.
3152
3153 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3154 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3155 {s5, s6, s7, s8}. */
b8698a0f 3156
14a61437 3157 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3158
3159 number_of_places_left_in_vector = nunits;
62cf7335 3160 constant_p = true;
794e3180
RS
3161 auto_vec<tree, 32> elts (nunits);
3162 elts.quick_grow (nunits);
90dd6e3d 3163 bool place_after_defs = false;
ebfd146a
IR
3164 for (j = 0; j < number_of_copies; j++)
3165 {
9771b263 3166 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
3167 {
3168 if (is_store)
3169 op = gimple_assign_rhs1 (stmt);
bac430c9 3170 else
f7e531cf 3171 {
bac430c9 3172 switch (code)
f7e531cf 3173 {
bac430c9 3174 case COND_EXPR:
a989bcc3
IE
3175 {
3176 tree cond = gimple_assign_rhs1 (stmt);
3177 if (TREE_CODE (cond) == SSA_NAME)
3178 op = gimple_op (stmt, op_num + 1);
3179 else if (op_num == 0 || op_num == 1)
bac430c9 3180 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3181 else
3182 {
3183 if (op_num == 2)
3184 op = gimple_assign_rhs2 (stmt);
3185 else
3186 op = gimple_assign_rhs3 (stmt);
3187 }
3188 }
bac430c9
IR
3189 break;
3190
3191 case CALL_EXPR:
3192 op = gimple_call_arg (stmt, op_num);
3193 break;
3194
b84b294a
JJ
3195 case LSHIFT_EXPR:
3196 case RSHIFT_EXPR:
3197 case LROTATE_EXPR:
3198 case RROTATE_EXPR:
3199 op = gimple_op (stmt, op_num + 1);
3200 /* Unlike the other binary operators, shifts/rotates have
3201 the shift count being int, instead of the same type as
3202 the lhs, so make sure the scalar is the right type if
3203 we are dealing with vectors of
3204 long long/long/short/char. */
793d9a16 3205 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3206 op = fold_convert (TREE_TYPE (vector_type), op);
3207 break;
3208
bac430c9
IR
3209 default:
3210 op = gimple_op (stmt, op_num + 1);
b84b294a 3211 break;
f7e531cf
IR
3212 }
3213 }
b8698a0f 3214
ebfd146a 3215 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3216 number_of_places_left_in_vector--;
90dd6e3d 3217 tree orig_op = op;
13396b6e 3218 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3219 {
793d9a16 3220 if (CONSTANT_CLASS_P (op))
13396b6e 3221 {
42fd8198
IE
3222 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3223 {
3224 /* Can't use VIEW_CONVERT_EXPR for booleans because
3225 of possibly different sizes of scalar value and
3226 vector element. */
3227 if (integer_zerop (op))
3228 op = build_int_cst (TREE_TYPE (vector_type), 0);
3229 else if (integer_onep (op))
158beb4a 3230 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3231 else
3232 gcc_unreachable ();
3233 }
3234 else
3235 op = fold_unary (VIEW_CONVERT_EXPR,
3236 TREE_TYPE (vector_type), op);
13396b6e
JJ
3237 gcc_assert (op && CONSTANT_CLASS_P (op));
3238 }
3239 else
3240 {
b731b390 3241 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3242 gimple *init_stmt;
262a363f
JJ
3243 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3244 {
158beb4a
JJ
3245 tree true_val
3246 = build_all_ones_cst (TREE_TYPE (vector_type));
3247 tree false_val
3248 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3249 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3250 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3251 op, true_val,
3252 false_val);
262a363f 3253 }
262a363f
JJ
3254 else
3255 {
3256 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3257 op);
3258 init_stmt
3259 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3260 op);
3261 }
13396b6e
JJ
3262 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3263 op = new_temp;
3264 }
50eeef09 3265 }
d2a12ae7 3266 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3267 if (!CONSTANT_CLASS_P (op))
3268 constant_p = false;
90dd6e3d
RB
3269 if (TREE_CODE (orig_op) == SSA_NAME
3270 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3271 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3272 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3273 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3274 place_after_defs = true;
ebfd146a
IR
3275
3276 if (number_of_places_left_in_vector == 0)
3277 {
ebfd146a 3278 if (constant_p)
d2a12ae7 3279 vec_cst = build_vector (vector_type, elts);
ebfd146a 3280 else
d2a12ae7 3281 {
9771b263 3282 vec<constructor_elt, va_gc> *v;
d2a12ae7 3283 unsigned k;
9771b263 3284 vec_alloc (v, nunits);
d2a12ae7
RG
3285 for (k = 0; k < nunits; ++k)
3286 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3287 vec_cst = build_constructor (vector_type, v);
3288 }
90dd6e3d
RB
3289 tree init;
3290 gimple_stmt_iterator gsi;
3291 if (place_after_defs)
3292 {
3293 gsi = gsi_for_stmt
3294 (vect_find_last_scalar_stmt_in_slp (slp_node));
3295 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3296 }
3297 else
3298 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
3299 if (ctor_seq != NULL)
3300 {
90dd6e3d 3301 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
13396b6e
JJ
3302 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3303 GSI_SAME_STMT);
3304 ctor_seq = NULL;
3305 }
90dd6e3d
RB
3306 voprnds.quick_push (init);
3307 place_after_defs = false;
62cf7335
RB
3308 number_of_places_left_in_vector = nunits;
3309 constant_p = true;
ebfd146a
IR
3310 }
3311 }
3312 }
3313
b8698a0f 3314 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3315 them. */
9771b263 3316 vec_num = voprnds.length ();
d2a12ae7 3317 for (j = vec_num; j != 0; j--)
ebfd146a 3318 {
9771b263
DN
3319 vop = voprnds[j - 1];
3320 vec_oprnds->quick_push (vop);
ebfd146a
IR
3321 }
3322
9771b263 3323 voprnds.release ();
ebfd146a
IR
3324
3325 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3326 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3327 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3328 to replicate the vectors. */
9771b263 3329 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3330 {
b5aeb3bb
IR
3331 tree neutral_vec = NULL;
3332
3333 if (neutral_op)
3334 {
3335 if (!neutral_vec)
b9acc9f1 3336 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3337
9771b263 3338 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3339 }
3340 else
3341 {
9771b263
DN
3342 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3343 vec_oprnds->quick_push (vop);
b5aeb3bb 3344 }
ebfd146a
IR
3345 }
3346}
3347
3348
3349/* Get vectorized definitions from SLP_NODE that contains corresponding
3350 vectorized def-stmts. */
3351
3352static void
9771b263 3353vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3354{
3355 tree vec_oprnd;
355fe088 3356 gimple *vec_def_stmt;
ebfd146a
IR
3357 unsigned int i;
3358
9771b263 3359 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3360
9771b263 3361 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
3362 {
3363 gcc_assert (vec_def_stmt);
e7baeb39
RB
3364 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3365 vec_oprnd = gimple_phi_result (vec_def_stmt);
3366 else
3367 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 3368 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3369 }
3370}
3371
3372
b8698a0f
L
3373/* Get vectorized definitions for SLP_NODE.
3374 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3375 call vect_get_constant_vectors() to create vector stmts.
3376 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3377 must be stored in the corresponding child of SLP_NODE, and we call
3378 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3379
ebfd146a 3380void
9771b263 3381vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3382 vec<vec<tree> > *vec_oprnds)
ebfd146a 3383{
355fe088 3384 gimple *first_stmt;
d092494c 3385 int number_of_vects = 0, i;
77eefb71 3386 unsigned int child_index = 0;
b8698a0f 3387 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3388 slp_tree child = NULL;
37b5ec8f 3389 vec<tree> vec_defs;
e44978dc 3390 tree oprnd;
77eefb71 3391 bool vectorized_defs;
ebfd146a 3392
9771b263
DN
3393 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3394 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3395 {
d092494c
IR
3396 /* For each operand we check if it has vectorized definitions in a child
3397 node or we need to create them (for invariants and constants). We
3398 check if the LHS of the first stmt of the next child matches OPRND.
3399 If it does, we found the correct child. Otherwise, we call
77eefb71
RB
3400 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
3401 to check this child node for the next operand. */
3402 vectorized_defs = false;
3403 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 3404 {
01d8bf07 3405 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3406
e44978dc 3407 /* We have to check both pattern and original def, if available. */
603cca93 3408 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3409 {
355fe088
TS
3410 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3411 gimple *related
90dd6e3d 3412 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
e7baeb39 3413 tree first_def_op;
90dd6e3d 3414
e7baeb39
RB
3415 if (gimple_code (first_def) == GIMPLE_PHI)
3416 first_def_op = gimple_phi_result (first_def);
3417 else
3418 first_def_op = gimple_get_lhs (first_def);
3419 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d
RB
3420 || (related
3421 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3422 {
3423 /* The number of vector defs is determined by the number of
3424 vector statements in the node from which we get those
3425 statements. */
3426 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3427 vectorized_defs = true;
77eefb71 3428 child_index++;
90dd6e3d 3429 }
e44978dc 3430 }
77eefb71
RB
3431 else
3432 child_index++;
d092494c 3433 }
ebfd146a 3434
77eefb71
RB
3435 if (!vectorized_defs)
3436 {
3437 if (i == 0)
3438 {
3439 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3440 /* Number of vector stmts was calculated according to LHS in
3441 vect_schedule_slp_instance (), fix it by replacing LHS with
3442 RHS, if necessary. See vect_get_smallest_scalar_type () for
3443 details. */
3444 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3445 &rhs_size_unit);
3446 if (rhs_size_unit != lhs_size_unit)
3447 {
3448 number_of_vects *= rhs_size_unit;
3449 number_of_vects /= lhs_size_unit;
3450 }
3451 }
d092494c 3452 }
b5aeb3bb 3453
d092494c 3454 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3455 vec_defs = vNULL;
3456 vec_defs.create (number_of_vects);
ebfd146a 3457
d092494c
IR
3458 /* For reduction defs we call vect_get_constant_vectors (), since we are
3459 looking for initial loop invariant values. */
306b0c92 3460 if (vectorized_defs)
d092494c 3461 /* The defs are already vectorized. */
37b5ec8f 3462 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3463 else
e7baeb39 3464 /* Build vectors from scalar defs. */
37b5ec8f 3465 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3466 number_of_vects);
ebfd146a 3467
37b5ec8f 3468 vec_oprnds->quick_push (vec_defs);
d092494c 3469 }
ebfd146a
IR
3470}
3471
ebfd146a
IR
3472/* Generate vector permute statements from a list of loads in DR_CHAIN.
3473 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3474 permute statements for the SLP node NODE of the SLP instance
3475 SLP_NODE_INSTANCE. */
3476
ebfd146a 3477bool
01d8bf07 3478vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
ebfd146a 3479 gimple_stmt_iterator *gsi, int vf,
29afecdf
RB
3480 slp_instance slp_node_instance, bool analyze_only,
3481 unsigned *n_perms)
ebfd146a 3482{
355fe088 3483 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3484 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3485 tree mask_element_type = NULL_TREE, mask_type;
2ce27200 3486 int nunits, vec_index = 0;
2635892a 3487 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3488 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
61fdfd8c 3489 int mask_element;
ef4bddc2 3490 machine_mode mode;
ebfd146a 3491
91ff1504
RB
3492 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3493 return false;
3494
3495 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3496
22e4dee7
RH
3497 mode = TYPE_MODE (vectype);
3498
2635892a
RH
3499 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3500 same size as the vector element being permuted. */
96f9265a 3501 mask_element_type = lang_hooks.types.type_for_mode
304b9962 3502 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
ebfd146a 3503 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 3504 nunits = TYPE_VECTOR_SUBPARTS (vectype);
908a1a16
RS
3505 auto_vec_perm_indices mask (nunits);
3506 mask.quick_grow (nunits);
ebfd146a 3507
61fdfd8c
RB
3508 /* Initialize the vect stmts of NODE to properly insert the generated
3509 stmts later. */
3510 if (! analyze_only)
3511 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3512 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3513 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3514
b8698a0f
L
3515 /* Generate permutation masks for every NODE. Number of masks for each NODE
3516 is equal to GROUP_SIZE.
3517 E.g., we have a group of three nodes with three loads from the same
3518 location in each node, and the vector size is 4. I.e., we have a
3519 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3520 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3521 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3522 ...
3523
2635892a 3524 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3525 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3526 operation, and the mask element values can't be outside that range.
3527 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3528 For the first two permutations we need the first and the second input
ebfd146a 3529 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3530 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3531 {c2,a3,b3,c3}. */
3532
2ce27200
RB
3533 int vect_stmts_counter = 0;
3534 int index = 0;
3535 int first_vec_index = -1;
3536 int second_vec_index = -1;
be377c80 3537 bool noop_p = true;
29afecdf 3538 *n_perms = 0;
ebfd146a 3539
61fdfd8c 3540 for (int j = 0; j < vf; j++)
2ce27200
RB
3541 {
3542 for (int k = 0; k < group_size; k++)
3543 {
3544 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3545 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3546 vec_index = i / nunits;
3547 mask_element = i % nunits;
3548 if (vec_index == first_vec_index
3549 || first_vec_index == -1)
3550 {
3551 first_vec_index = vec_index;
3552 }
3553 else if (vec_index == second_vec_index
3554 || second_vec_index == -1)
3555 {
3556 second_vec_index = vec_index;
3557 mask_element += nunits;
3558 }
3559 else
3560 {
3561 if (dump_enabled_p ())
3562 {
3563 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3564 "permutation requires at "
3565 "least three vectors ");
3566 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3567 stmt, 0);
2ce27200
RB
3568 }
3569 return false;
3570 }
ebfd146a 3571
2ce27200
RB
3572 gcc_assert (mask_element >= 0
3573 && mask_element < 2 * nunits);
be377c80
RB
3574 if (mask_element != index)
3575 noop_p = false;
2ce27200
RB
3576 mask[index++] = mask_element;
3577
3578 if (index == nunits)
3579 {
be377c80 3580 if (! noop_p
908a1a16 3581 && ! can_vec_perm_p (mode, false, &mask))
2ce27200
RB
3582 {
3583 if (dump_enabled_p ())
22e4dee7 3584 {
2ce27200
RB
3585 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3586 vect_location,
3587 "unsupported vect permute { ");
3588 for (i = 0; i < nunits; ++i)
3589 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3590 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3591 }
2ce27200
RB
3592 return false;
3593 }
22e4dee7 3594
29afecdf
RB
3595 if (! noop_p)
3596 ++*n_perms;
3597
2ce27200
RB
3598 if (!analyze_only)
3599 {
be377c80
RB
3600 tree mask_vec = NULL_TREE;
3601
3602 if (! noop_p)
3603 {
794e3180 3604 auto_vec<tree, 32> mask_elts (nunits);
be377c80 3605 for (int l = 0; l < nunits; ++l)
794e3180
RS
3606 mask_elts.quick_push (build_int_cst (mask_element_type,
3607 mask[l]));
be377c80
RB
3608 mask_vec = build_vector (mask_type, mask_elts);
3609 }
2ce27200
RB
3610
3611 if (second_vec_index == -1)
3612 second_vec_index = first_vec_index;
61fdfd8c
RB
3613
3614 /* Generate the permute statement if necessary. */
3615 tree first_vec = dr_chain[first_vec_index];
3616 tree second_vec = dr_chain[second_vec_index];
3617 gimple *perm_stmt;
3618 if (! noop_p)
3619 {
3620 tree perm_dest
3621 = vect_create_destination_var (gimple_assign_lhs (stmt),
3622 vectype);
3623 perm_dest = make_ssa_name (perm_dest);
3624 perm_stmt = gimple_build_assign (perm_dest,
3625 VEC_PERM_EXPR,
3626 first_vec, second_vec,
3627 mask_vec);
3628 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3629 }
3630 else
3631 /* If mask was NULL_TREE generate the requested
3632 identity transform. */
3633 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3634
3635 /* Store the vector statement in NODE. */
3636 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
2ce27200 3637 }
ebfd146a 3638
2ce27200
RB
3639 index = 0;
3640 first_vec_index = -1;
3641 second_vec_index = -1;
be377c80 3642 noop_p = true;
2ce27200
RB
3643 }
3644 }
b8698a0f 3645 }
ebfd146a 3646
ebfd146a
IR
3647 return true;
3648}
3649
3650
3651
3652/* Vectorize SLP instance tree in postorder. */
3653
3654static bool
3655vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 3656 unsigned int vectorization_factor)
ebfd146a 3657{
355fe088 3658 gimple *stmt;
0d0293ac 3659 bool grouped_store, is_store;
ebfd146a
IR
3660 gimple_stmt_iterator si;
3661 stmt_vec_info stmt_info;
3662 unsigned int vec_stmts_size, nunits, group_size;
3663 tree vectype;
603cca93 3664 int i, j;
d755c7ef 3665 slp_tree child;
ebfd146a 3666
603cca93 3667 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
3668 return false;
3669
9771b263 3670 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3671 vect_schedule_slp_instance (child, instance, vectorization_factor);
b8698a0f 3672
603cca93
RB
3673 /* Push SLP node def-type to stmts. */
3674 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3675 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3676 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3677 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3678
9771b263 3679 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3680 stmt_info = vinfo_for_stmt (stmt);
3681
3682 /* VECTYPE is the type of the destination. */
b690cc0f 3683 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3684 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3685 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3686
3687 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 3688 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
3689 elements in one vector iteration is the number of scalar elements in
3690 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
14a61437
RB
3691 size.
3692 Unless this is a SLP reduction in which case the number of vector
3693 stmts is equal to the number of vector stmts of the children. */
3694 if (GROUP_FIRST_ELEMENT (stmt_info)
3695 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3696 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3697 else
3698 vec_stmts_size = (vectorization_factor * group_size) / nunits;
ebfd146a 3699
9771b263 3700 if (!SLP_TREE_VEC_STMTS (node).exists ())
ebfd146a 3701 {
9771b263 3702 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
ebfd146a
IR
3703 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3704 }
3705
73fbfcad 3706 if (dump_enabled_p ())
ebfd146a 3707 {
78c60e3d
SS
3708 dump_printf_loc (MSG_NOTE,vect_location,
3709 "------>vectorizing SLP node starting from: ");
3710 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 3711 }
ebfd146a 3712
2e8ab70c
RB
3713 /* Vectorized stmts go before the last scalar stmt which is where
3714 all uses are ready. */
3715 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3716
b010117a
IR
3717 /* Mark the first element of the reduction chain as reduction to properly
3718 transform the node. In the analysis phase only the last element of the
3719 chain is marked as reduction. */
0d0293ac 3720 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
3721 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3722 {
3723 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3724 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3725 }
3726
6876e5bc
RB
3727 /* Handle two-operation SLP nodes by vectorizing the group with
3728 both operations and then performing a merge. */
3729 if (SLP_TREE_TWO_OPERATORS (node))
3730 {
3731 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3732 enum tree_code ocode = ERROR_MARK;
355fe088 3733 gimple *ostmt;
908a1a16 3734 auto_vec_perm_indices mask (group_size);
6876e5bc
RB
3735 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3736 if (gimple_assign_rhs_code (ostmt) != code0)
3737 {
908a1a16 3738 mask.quick_push (1);
6876e5bc
RB
3739 ocode = gimple_assign_rhs_code (ostmt);
3740 }
3741 else
908a1a16 3742 mask.quick_push (0);
567a3691 3743 if (ocode != ERROR_MARK)
6876e5bc 3744 {
355fe088
TS
3745 vec<gimple *> v0;
3746 vec<gimple *> v1;
6876e5bc
RB
3747 unsigned j;
3748 tree tmask = NULL_TREE;
3749 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3750 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3751 SLP_TREE_VEC_STMTS (node).truncate (0);
3752 gimple_assign_set_rhs_code (stmt, ocode);
3753 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3754 gimple_assign_set_rhs_code (stmt, code0);
3755 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3756 SLP_TREE_VEC_STMTS (node).truncate (0);
3757 tree meltype = build_nonstandard_integer_type
b397965c 3758 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
6876e5bc
RB
3759 tree mvectype = get_same_sized_vectype (meltype, vectype);
3760 unsigned k = 0, l;
3761 for (j = 0; j < v0.length (); ++j)
3762 {
794e3180
RS
3763 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype);
3764 auto_vec<tree, 32> melts (nunits);
3765 for (l = 0; l < nunits; ++l)
6876e5bc 3766 {
1ece8d4c 3767 if (k >= group_size)
6876e5bc 3768 k = 0;
794e3180
RS
3769 tree t = build_int_cst (meltype, mask[k++] * nunits + l);
3770 melts.quick_push (t);
6876e5bc
RB
3771 }
3772 tmask = build_vector (mvectype, melts);
3773
3774 /* ??? Not all targets support a VEC_PERM_EXPR with a
3775 constant mask that would translate to a vec_merge RTX
3776 (with their vec_perm_const_ok). We can either not
3777 vectorize in that case or let veclower do its job.
3778 Unfortunately that isn't too great and at least for
3779 plus/minus we'd eventually like to match targets
3780 vector addsub instructions. */
355fe088 3781 gimple *vstmt;
6876e5bc
RB
3782 vstmt = gimple_build_assign (make_ssa_name (vectype),
3783 VEC_PERM_EXPR,
3784 gimple_assign_lhs (v0[j]),
3785 gimple_assign_lhs (v1[j]), tmask);
3786 vect_finish_stmt_generation (stmt, vstmt, &si);
3787 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3788 }
3789 v0.release ();
3790 v1.release ();
3791 return false;
3792 }
3793 }
0d0293ac 3794 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
603cca93
RB
3795
3796 /* Restore stmt def-types. */
3797 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3798 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3799 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3800 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3801
b5aeb3bb 3802 return is_store;
ebfd146a
IR
3803}
3804
dd34c087
JJ
3805/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3806 For loop vectorization this is done in vectorizable_call, but for SLP
3807 it needs to be deferred until end of vect_schedule_slp, because multiple
3808 SLP instances may refer to the same scalar stmt. */
3809
3810static void
3811vect_remove_slp_scalar_calls (slp_tree node)
3812{
355fe088 3813 gimple *stmt, *new_stmt;
dd34c087
JJ
3814 gimple_stmt_iterator gsi;
3815 int i;
d755c7ef 3816 slp_tree child;
dd34c087
JJ
3817 tree lhs;
3818 stmt_vec_info stmt_info;
3819
603cca93 3820 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
3821 return;
3822
9771b263 3823 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3824 vect_remove_slp_scalar_calls (child);
dd34c087 3825
9771b263 3826 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
3827 {
3828 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3829 continue;
3830 stmt_info = vinfo_for_stmt (stmt);
3831 if (stmt_info == NULL
3832 || is_pattern_stmt_p (stmt_info)
3833 || !PURE_SLP_STMT (stmt_info))
3834 continue;
3835 lhs = gimple_call_lhs (stmt);
3836 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3837 set_vinfo_for_stmt (new_stmt, stmt_info);
3838 set_vinfo_for_stmt (stmt, NULL);
3839 STMT_VINFO_STMT (stmt_info) = new_stmt;
3840 gsi = gsi_for_stmt (stmt);
3841 gsi_replace (&gsi, new_stmt, false);
3842 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3843 }
3844}
ebfd146a 3845
ff802fa1
IR
3846/* Generate vector code for all SLP instances in the loop/basic block. */
3847
ebfd146a 3848bool
310213d4 3849vect_schedule_slp (vec_info *vinfo)
ebfd146a 3850{
9771b263 3851 vec<slp_instance> slp_instances;
ebfd146a 3852 slp_instance instance;
01d8bf07 3853 unsigned int i, vf;
ebfd146a
IR
3854 bool is_store = false;
3855
310213d4
RB
3856 slp_instances = vinfo->slp_instances;
3857 if (is_a <loop_vec_info> (vinfo))
3858 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
a70d6342 3859 else
310213d4 3860 vf = 1;
a70d6342 3861
9771b263 3862 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3863 {
3864 /* Schedule the tree of INSTANCE. */
3865 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 3866 instance, vf);
73fbfcad 3867 if (dump_enabled_p ())
78c60e3d 3868 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3869 "vectorizing stmts using SLP.\n");
ebfd146a
IR
3870 }
3871
9771b263 3872 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3873 {
3874 slp_tree root = SLP_INSTANCE_TREE (instance);
355fe088 3875 gimple *store;
b5aeb3bb
IR
3876 unsigned int j;
3877 gimple_stmt_iterator gsi;
3878
c40eced0
RB
3879 /* Remove scalar call stmts. Do not do this for basic-block
3880 vectorization as not all uses may be vectorized.
3881 ??? Why should this be necessary? DCE should be able to
3882 remove the stmts itself.
3883 ??? For BB vectorization we can as well remove scalar
3884 stmts starting from the SLP tree root if they have no
3885 uses. */
310213d4 3886 if (is_a <loop_vec_info> (vinfo))
c40eced0 3887 vect_remove_slp_scalar_calls (root);
dd34c087 3888
9771b263 3889 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
3890 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3891 {
3892 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3893 break;
3894
a024e70e
IR
3895 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3896 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
3897 /* Free the attached stmt_vec_info and remove the stmt. */
3898 gsi = gsi_for_stmt (store);
3d3f2249 3899 unlink_stmt_vdef (store);
b5aeb3bb 3900 gsi_remove (&gsi, true);
3d3f2249 3901 release_defs (store);
b5aeb3bb
IR
3902 free_stmt_vec_info (store);
3903 }
3904 }
3905
ebfd146a
IR
3906 return is_store;
3907}