]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
Daily bump.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
cbe34bb5 2 Copyright (C) 2007-2017 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
a70d6342
IR
44
45
ebfd146a
IR
46/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
47
48static void
49vect_free_slp_tree (slp_tree node)
50{
d092494c 51 int i;
d755c7ef 52 slp_tree child;
d092494c 53
9771b263 54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 55 vect_free_slp_tree (child);
b8698a0f 56
78810bd3
RB
57 gimple *stmt;
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
59 /* After transform some stmts are removed and thus their vinfo is gone. */
60 if (vinfo_for_stmt (stmt))
61 {
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0);
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--;
64 }
65
9771b263
DN
66 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 69 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
70
71 free (node);
72}
73
74
75/* Free the memory allocated for the SLP instance. */
76
77void
78vect_free_slp_instance (slp_instance instance)
79{
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263 81 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 82 free (instance);
ebfd146a
IR
83}
84
85
d092494c
IR
86/* Create an SLP node for SCALAR_STMTS. */
87
88static slp_tree
355fe088 89vect_create_new_slp_node (vec<gimple *> scalar_stmts)
d092494c 90{
d3cfd39e 91 slp_tree node;
355fe088 92 gimple *stmt = scalar_stmts[0];
d092494c
IR
93 unsigned int nops;
94
95 if (is_gimple_call (stmt))
96 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt))
f7e531cf
IR
98 {
99 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++;
102 }
e7baeb39
RB
103 else if (gimple_code (stmt) == GIMPLE_PHI)
104 nops = 0;
d092494c
IR
105 else
106 return NULL;
107
d3cfd39e 108 node = XNEW (struct _slp_tree);
d092494c 109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263
DN
110 SLP_TREE_VEC_STMTS (node).create (0);
111 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 113 SLP_TREE_TWO_OPERATORS (node) = false;
603cca93 114 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
d092494c 115
78810bd3
RB
116 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++;
119
d092494c
IR
120 return node;
121}
122
123
ddf56386
RB
124/* This structure is used in creation of an SLP tree. Each instance
125 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */
127typedef struct _slp_oprnd_info
128{
129 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */
ddf56386 134 tree first_op_type;
34e82342 135 enum vect_def_type first_dt;
ddf56386
RB
136 bool first_pattern;
137 bool second_pattern;
138} *slp_oprnd_info;
139
140
d092494c
IR
141/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
142 operand. */
9771b263 143static vec<slp_oprnd_info>
d092494c
IR
144vect_create_oprnd_info (int nops, int group_size)
145{
146 int i;
147 slp_oprnd_info oprnd_info;
9771b263 148 vec<slp_oprnd_info> oprnds_info;
d092494c 149
9771b263 150 oprnds_info.create (nops);
d092494c
IR
151 for (i = 0; i < nops; i++)
152 {
153 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 154 oprnd_info->def_stmts.create (group_size);
d092494c 155 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 156 oprnd_info->first_op_type = NULL_TREE;
d092494c 157 oprnd_info->first_pattern = false;
effb52da 158 oprnd_info->second_pattern = false;
9771b263 159 oprnds_info.quick_push (oprnd_info);
d092494c
IR
160 }
161
162 return oprnds_info;
163}
164
165
d3cfd39e
JJ
166/* Free operands info. */
167
d092494c 168static void
9771b263 169vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
170{
171 int i;
172 slp_oprnd_info oprnd_info;
173
9771b263 174 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 175 {
9771b263 176 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
177 XDELETE (oprnd_info);
178 }
d092494c 179
9771b263 180 oprnds_info.release ();
d092494c
IR
181}
182
183
d755c7ef
RB
184/* Find the place of the data-ref in STMT in the interleaving chain that starts
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
186
187static int
355fe088 188vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 189{
355fe088 190 gimple *next_stmt = first_stmt;
d755c7ef
RB
191 int result = 0;
192
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
194 return -1;
195
196 do
197 {
198 if (next_stmt == stmt)
199 return result;
d755c7ef 200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699
RB
201 if (next_stmt)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
203 }
204 while (next_stmt);
205
206 return -1;
207}
208
209
d092494c
IR
210/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of
4cecd659
BC
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond
216 and code of comparison needs to be inverted. If there is any operand swap
217 in this function, *SWAP is set to non-zero value.
218 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */
ebfd146a 221
4cecd659
BC
222static int
223vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
355fe088 224 gimple *stmt, unsigned stmt_num,
4cecd659 225 vec<slp_oprnd_info> *oprnds_info)
ebfd146a
IR
226{
227 tree oprnd;
228 unsigned int i, number_of_oprnds;
355fe088 229 gimple *def_stmt;
d092494c 230 enum vect_def_type dt = vect_uninitialized_def;
d092494c 231 bool pattern = false;
abf9bfbc 232 slp_oprnd_info oprnd_info;
b0b4483e
RB
233 int first_op_idx = 1;
234 bool commutative = false;
235 bool first_op_cond = false;
effb52da
RB
236 bool first = stmt_num == 0;
237 bool second = stmt_num == 1;
b8698a0f 238
d092494c 239 if (is_gimple_call (stmt))
190c2236
JJ
240 {
241 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 242 first_op_idx = 3;
190c2236 243 }
f7e531cf
IR
244 else if (is_gimple_assign (stmt))
245 {
b0b4483e 246 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 247 number_of_oprnds = gimple_num_ops (stmt) - 1;
4cecd659
BC
248 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */
a414c77f
IE
250 if (gimple_assign_rhs_code (stmt) == COND_EXPR
251 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
252 {
253 first_op_cond = true;
b0b4483e
RB
254 number_of_oprnds++;
255 }
256 else
257 commutative = commutative_tree_code (code);
f7e531cf 258 }
d092494c 259 else
b0b4483e 260 return -1;
ebfd146a 261
4cecd659
BC
262 bool swapped = (*swap != 0);
263 gcc_assert (!swapped || first_op_cond);
ebfd146a
IR
264 for (i = 0; i < number_of_oprnds; i++)
265 {
b0b4483e
RB
266again:
267 if (first_op_cond)
f7e531cf 268 {
4cecd659
BC
269 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap];
272
273 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]);
b0b4483e 275 else
4cecd659 276 oprnd = gimple_op (stmt, map[i]);
f7e531cf
IR
277 }
278 else
4cecd659 279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 280
9771b263 281 oprnd_info = (*oprnds_info)[i];
ebfd146a 282
81c40241 283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
ebfd146a 284 {
73fbfcad 285 if (dump_enabled_p ())
ebfd146a 286 {
78c60e3d 287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 288 "Build SLP failed: can't analyze def for ");
78c60e3d 289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
291 }
292
b0b4483e 293 return -1;
ebfd146a
IR
294 }
295
a70d6342 296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 297 from the pattern. Check that all the stmts of the node are in the
ebfd146a 298 pattern. */
f5709183 299 if (def_stmt && gimple_bb (def_stmt)
61d371eb 300 && vect_stmt_in_region_p (vinfo, def_stmt)
ebfd146a 301 && vinfo_for_stmt (def_stmt)
83197f37 302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 305 {
d092494c 306 pattern = true;
effb52da
RB
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
d092494c 312 {
b0b4483e
RB
313 if (i == 0
314 && !swapped
315 && commutative)
316 {
317 swapped = true;
318 goto again;
319 }
320
73fbfcad 321 if (dump_enabled_p ())
d092494c 322 {
78c60e3d
SS
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 328 }
ebfd146a 329
b0b4483e 330 return 1;
ebfd146a
IR
331 }
332
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 335
f7e531cf 336 if (dt == vect_unknown_def_type)
ebfd146a 337 {
73fbfcad 338 if (dump_enabled_p ())
78c60e3d 339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 340 "Unsupported pattern.\n");
b0b4483e 341 return -1;
ebfd146a
IR
342 }
343
344 switch (gimple_code (def_stmt))
345 {
81c40241
RB
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
349
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
ebfd146a
IR
355 }
356 }
357
effb52da
RB
358 if (second)
359 oprnd_info->second_pattern = pattern;
360
d092494c 361 if (first)
ebfd146a 362 {
d092494c
IR
363 oprnd_info->first_dt = dt;
364 oprnd_info->first_pattern = pattern;
793d9a16 365 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 366 }
ebfd146a
IR
367 else
368 {
d092494c
IR
369 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt
375 && !(oprnd_info->first_dt == vect_reduction_def
793d9a16
RB
376 && dt == vect_internal_def)
377 && !((oprnd_info->first_dt == vect_external_def
378 || oprnd_info->first_dt == vect_constant_def)
379 && (dt == vect_external_def
380 || dt == vect_constant_def)))
381 || !types_compatible_p (oprnd_info->first_op_type,
382 TREE_TYPE (oprnd))))
ebfd146a 383 {
b0b4483e
RB
384 /* Try swapping operands if we got a mismatch. */
385 if (i == 0
386 && !swapped
387 && commutative)
388 {
389 swapped = true;
390 goto again;
391 }
392
abf9bfbc
RB
393 if (dump_enabled_p ())
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 395 "Build SLP failed: different types\n");
d092494c 396
b0b4483e 397 return 1;
ebfd146a
IR
398 }
399 }
400
401 /* Check the types of the definitions. */
d092494c 402 switch (dt)
ebfd146a
IR
403 {
404 case vect_constant_def:
8644a673 405 case vect_external_def:
ebfd146a 406 break;
b8698a0f 407
c78e3652 408 case vect_reduction_def:
e7baeb39 409 case vect_induction_def:
8644a673 410 case vect_internal_def:
abf9bfbc 411 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
412 break;
413
414 default:
415 /* FORNOW: Not supported. */
73fbfcad 416 if (dump_enabled_p ())
ebfd146a 417 {
78c60e3d
SS
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
419 "Build SLP failed: illegal type of def ");
81c40241 420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
422 }
423
b0b4483e 424 return -1;
ebfd146a
IR
425 }
426 }
427
b0b4483e
RB
428 /* Swap operands. */
429 if (swapped)
430 {
78810bd3
RB
431 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0)
434 {
435 if (dump_enabled_p ())
436 {
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
438 "Build SLP failed: cannot swap operands of "
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
441 }
442 return -1;
443 }
444
b0b4483e
RB
445 if (first_op_cond)
446 {
447 tree cond = gimple_assign_rhs1 (stmt);
4cecd659
BC
448 enum tree_code code = TREE_CODE (cond);
449
450 /* Swap. */
451 if (*swap == 1)
452 {
453 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
454 &TREE_OPERAND (cond, 1));
455 TREE_SET_CODE (cond, swap_tree_comparison (code));
456 }
457 /* Invert. */
458 else
459 {
460 swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
461 gimple_assign_rhs3_ptr (stmt));
462 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
463 code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
464 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code);
466 }
b0b4483e
RB
467 }
468 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
470 gimple_assign_rhs2_ptr (stmt));
78810bd3
RB
471 if (dump_enabled_p ())
472 {
473 dump_printf_loc (MSG_NOTE, vect_location,
474 "swapped operands to match def types in ");
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
476 }
b0b4483e
RB
477 }
478
4cecd659 479 *swap = swapped;
b0b4483e 480 return 0;
ebfd146a
IR
481}
482
483
6983e6b5
RB
484/* Verify if the scalar stmts STMTS are isomorphic, require data
485 permutation or are of unsupported types of operation. Return
486 true if they are, otherwise return false and indicate in *MATCHES
487 which stmts are not isomorphic to the first one. If MATCHES[0]
488 is false then this indicates the comparison could not be
4cecd659
BC
489 carried out or the stmts will never be vectorized by SLP.
490
491 Note COND_EXPR is possibly ismorphic to another one after swapping its
492 operands. Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
493 the first stmt by swapping the two operands of comparison; set SWAP[i]
494 to 2 if stmt I is isormorphic to the first stmt by inverting the code
495 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
496 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
ebfd146a
IR
497
498static bool
4cecd659 499vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
355fe088 500 vec<gimple *> stmts, unsigned int group_size,
6983e6b5 501 unsigned nops, unsigned int *max_nunits,
97a1a642 502 bool *matches, bool *two_operators)
ebfd146a 503{
ebfd146a 504 unsigned int i;
355fe088 505 gimple *first_stmt = stmts[0], *stmt = stmts[0];
6876e5bc
RB
506 enum tree_code first_stmt_code = ERROR_MARK;
507 enum tree_code alt_stmt_code = ERROR_MARK;
508 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 509 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 510 tree lhs;
6983e6b5 511 bool need_same_oprnds = false;
e00cdb8a 512 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
ebfd146a
IR
513 optab optab;
514 int icode;
ef4bddc2
RS
515 machine_mode optab_op2_mode;
516 machine_mode vec_mode;
ebfd146a 517 HOST_WIDE_INT dummy;
355fe088 518 gimple *first_load = NULL, *prev_first_load = NULL;
d092494c 519
ebfd146a 520 /* For every stmt in NODE find its def stmt/s. */
9771b263 521 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 522 {
4cecd659 523 swap[i] = 0;
6983e6b5
RB
524 matches[i] = false;
525
73fbfcad 526 if (dump_enabled_p ())
ebfd146a 527 {
78c60e3d
SS
528 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
529 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
ebfd146a
IR
530 }
531
4b5caab7
IR
532 /* Fail to vectorize statements marked as unvectorizable. */
533 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
534 {
73fbfcad 535 if (dump_enabled_p ())
4b5caab7 536 {
78c60e3d
SS
537 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
538 "Build SLP failed: unvectorizable statement ");
539 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
4b5caab7 540 }
6983e6b5
RB
541 /* Fatal mismatch. */
542 matches[0] = false;
4b5caab7
IR
543 return false;
544 }
545
ebfd146a
IR
546 lhs = gimple_get_lhs (stmt);
547 if (lhs == NULL_TREE)
548 {
73fbfcad 549 if (dump_enabled_p ())
ebfd146a 550 {
78c60e3d
SS
551 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
552 "Build SLP failed: not GIMPLE_ASSIGN nor "
553 "GIMPLE_CALL ");
554 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 555 }
6983e6b5
RB
556 /* Fatal mismatch. */
557 matches[0] = false;
ebfd146a
IR
558 return false;
559 }
560
b8698a0f 561 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
562 vectype = get_vectype_for_scalar_type (scalar_type);
563 if (!vectype)
564 {
73fbfcad 565 if (dump_enabled_p ())
ebfd146a 566 {
78c60e3d
SS
567 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
568 "Build SLP failed: unsupported data-type ");
569 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
570 scalar_type);
e645e942 571 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 572 }
6983e6b5
RB
573 /* Fatal mismatch. */
574 matches[0] = false;
ebfd146a
IR
575 return false;
576 }
b8698a0f 577
dfc55d30
RB
578 /* If populating the vector type requires unrolling then fail
579 before adjusting *max_nunits for basic-block vectorization. */
310213d4 580 if (is_a <bb_vec_info> (vinfo)
dfc55d30
RB
581 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
582 {
583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
584 "Build SLP failed: unrolling required "
585 "in basic block SLP\n");
586 /* Fatal mismatch. */
587 matches[0] = false;
588 return false;
589 }
590
4ef69dfc
IR
591 /* In case of multiple types we need to detect the smallest type. */
592 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
97a1a642 593 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
b8698a0f 594
538dd0b7 595 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
596 {
597 rhs_code = CALL_EXPR;
538dd0b7
DM
598 if (gimple_call_internal_p (call_stmt)
599 || gimple_call_tail_p (call_stmt)
600 || gimple_call_noreturn_p (call_stmt)
601 || !gimple_call_nothrow_p (call_stmt)
602 || gimple_call_chain (call_stmt))
190c2236 603 {
73fbfcad 604 if (dump_enabled_p ())
190c2236 605 {
78c60e3d
SS
606 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
607 "Build SLP failed: unsupported call type ");
538dd0b7
DM
608 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
609 call_stmt, 0);
190c2236 610 }
6983e6b5
RB
611 /* Fatal mismatch. */
612 matches[0] = false;
190c2236
JJ
613 return false;
614 }
615 }
ebfd146a
IR
616 else
617 rhs_code = gimple_assign_rhs_code (stmt);
618
619 /* Check the operation. */
620 if (i == 0)
621 {
622 first_stmt_code = rhs_code;
623
b8698a0f 624 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
625 vector shift with scalar shift operand. */
626 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
627 || rhs_code == LROTATE_EXPR
628 || rhs_code == RROTATE_EXPR)
629 {
630 vec_mode = TYPE_MODE (vectype);
631
632 /* First see if we have a vector/vector shift. */
633 optab = optab_for_tree_code (rhs_code, vectype,
634 optab_vector);
635
636 if (!optab
947131ba 637 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
638 {
639 /* No vector/vector shift, try for a vector/scalar shift. */
640 optab = optab_for_tree_code (rhs_code, vectype,
641 optab_scalar);
642
643 if (!optab)
644 {
73fbfcad 645 if (dump_enabled_p ())
78c60e3d 646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 647 "Build SLP failed: no optab.\n");
6983e6b5
RB
648 /* Fatal mismatch. */
649 matches[0] = false;
ebfd146a
IR
650 return false;
651 }
947131ba 652 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
653 if (icode == CODE_FOR_nothing)
654 {
73fbfcad 655 if (dump_enabled_p ())
78c60e3d
SS
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657 "Build SLP failed: "
e645e942 658 "op not supported by target.\n");
6983e6b5
RB
659 /* Fatal mismatch. */
660 matches[0] = false;
ebfd146a
IR
661 return false;
662 }
663 optab_op2_mode = insn_data[icode].operand[2].mode;
664 if (!VECTOR_MODE_P (optab_op2_mode))
665 {
666 need_same_oprnds = true;
667 first_op1 = gimple_assign_rhs2 (stmt);
668 }
669 }
670 }
36ba4aae
IR
671 else if (rhs_code == WIDEN_LSHIFT_EXPR)
672 {
673 need_same_oprnds = true;
674 first_op1 = gimple_assign_rhs2 (stmt);
675 }
ebfd146a
IR
676 }
677 else
678 {
6876e5bc
RB
679 if (first_stmt_code != rhs_code
680 && alt_stmt_code == ERROR_MARK)
681 alt_stmt_code = rhs_code;
ebfd146a
IR
682 if (first_stmt_code != rhs_code
683 && (first_stmt_code != IMAGPART_EXPR
684 || rhs_code != REALPART_EXPR)
685 && (first_stmt_code != REALPART_EXPR
69f11a13 686 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
687 /* Handle mismatches in plus/minus by computing both
688 and merging the results. */
689 && !((first_stmt_code == PLUS_EXPR
690 || first_stmt_code == MINUS_EXPR)
691 && (alt_stmt_code == PLUS_EXPR
692 || alt_stmt_code == MINUS_EXPR)
693 && rhs_code == alt_stmt_code)
0d0293ac 694 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 695 && (first_stmt_code == ARRAY_REF
38000232 696 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
697 || first_stmt_code == INDIRECT_REF
698 || first_stmt_code == COMPONENT_REF
699 || first_stmt_code == MEM_REF)))
ebfd146a 700 {
73fbfcad 701 if (dump_enabled_p ())
ebfd146a 702 {
78c60e3d
SS
703 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
704 "Build SLP failed: different operation "
705 "in stmt ");
706 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
707 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
708 "original stmt ");
709 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
710 first_stmt, 0);
ebfd146a 711 }
6983e6b5
RB
712 /* Mismatch. */
713 continue;
ebfd146a 714 }
b8698a0f
L
715
716 if (need_same_oprnds
ebfd146a
IR
717 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
718 {
73fbfcad 719 if (dump_enabled_p ())
ebfd146a 720 {
78c60e3d
SS
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different shift "
723 "arguments in ");
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 725 }
6983e6b5
RB
726 /* Mismatch. */
727 continue;
ebfd146a 728 }
190c2236
JJ
729
730 if (rhs_code == CALL_EXPR)
731 {
355fe088 732 gimple *first_stmt = stmts[0];
190c2236
JJ
733 if (gimple_call_num_args (stmt) != nops
734 || !operand_equal_p (gimple_call_fn (first_stmt),
735 gimple_call_fn (stmt), 0)
736 || gimple_call_fntype (first_stmt)
737 != gimple_call_fntype (stmt))
738 {
73fbfcad 739 if (dump_enabled_p ())
190c2236 740 {
78c60e3d
SS
741 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
742 "Build SLP failed: different calls in ");
743 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
744 stmt, 0);
190c2236 745 }
6983e6b5
RB
746 /* Mismatch. */
747 continue;
190c2236
JJ
748 }
749 }
ebfd146a
IR
750 }
751
0d0293ac
MM
752 /* Grouped store or load. */
753 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
754 {
755 if (REFERENCE_CLASS_P (lhs))
756 {
757 /* Store. */
6983e6b5 758 ;
ebfd146a 759 }
b5aeb3bb
IR
760 else
761 {
762 /* Load. */
e14c1050 763 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
764 if (prev_first_load)
765 {
766 /* Check that there are no loads from different interleaving
6983e6b5
RB
767 chains in the same node. */
768 if (prev_first_load != first_load)
78c60e3d 769 {
73fbfcad 770 if (dump_enabled_p ())
b5aeb3bb 771 {
78c60e3d
SS
772 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
773 vect_location,
774 "Build SLP failed: different "
775 "interleaving chains in one node ");
776 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
777 stmt, 0);
b5aeb3bb 778 }
6983e6b5
RB
779 /* Mismatch. */
780 continue;
b5aeb3bb
IR
781 }
782 }
783 else
784 prev_first_load = first_load;
ebfd146a 785 }
0d0293ac 786 } /* Grouped access. */
ebfd146a
IR
787 else
788 {
789 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
790 {
0d0293ac 791 /* Not grouped load. */
73fbfcad 792 if (dump_enabled_p ())
ebfd146a 793 {
78c60e3d
SS
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
795 "Build SLP failed: not grouped load ");
796 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a
IR
797 }
798
0d0293ac 799 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
800 /* Fatal mismatch. */
801 matches[0] = false;
ebfd146a
IR
802 return false;
803 }
804
805 /* Not memory operation. */
806 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 807 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 808 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 809 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 810 && rhs_code != CALL_EXPR)
ebfd146a 811 {
73fbfcad 812 if (dump_enabled_p ())
ebfd146a 813 {
78c60e3d
SS
814 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
815 "Build SLP failed: operation");
816 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
817 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
ebfd146a 818 }
6983e6b5
RB
819 /* Fatal mismatch. */
820 matches[0] = false;
ebfd146a
IR
821 return false;
822 }
823
4cecd659
BC
824 if (rhs_code == COND_EXPR)
825 {
826 tree cond_expr = gimple_assign_rhs1 (stmt);
827 enum tree_code cond_code = TREE_CODE (cond_expr);
828 enum tree_code swap_code = ERROR_MARK;
829 enum tree_code invert_code = ERROR_MARK;
f7e531cf
IR
830
831 if (i == 0)
832 first_cond_code = TREE_CODE (cond_expr);
4cecd659
BC
833 else if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
834 {
835 bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
836 swap_code = swap_tree_comparison (cond_code);
837 invert_code = invert_tree_comparison (cond_code, honor_nans);
838 }
839
840 if (first_cond_code == cond_code)
841 ;
842 /* Isomorphic can be achieved by swapping. */
843 else if (first_cond_code == swap_code)
844 swap[i] = 1;
845 /* Isomorphic can be achieved by inverting. */
846 else if (first_cond_code == invert_code)
847 swap[i] = 2;
848 else
849 {
850 if (dump_enabled_p ())
851 {
852 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
853 "Build SLP failed: different"
854 " operation");
4cecd659 855 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
78c60e3d 856 stmt, 0);
4cecd659 857 }
6983e6b5
RB
858 /* Mismatch. */
859 continue;
f7e531cf 860 }
4cecd659 861 }
ebfd146a 862 }
6983e6b5
RB
863
864 matches[i] = true;
865 }
866
867 for (i = 0; i < group_size; ++i)
868 if (!matches[i])
869 return false;
870
6876e5bc
RB
871 /* If we allowed a two-operation SLP node verify the target can cope
872 with the permute we are going to use. */
873 if (alt_stmt_code != ERROR_MARK
874 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
875 {
876 unsigned char *sel
877 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
878 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
879 {
880 sel[i] = i;
881 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
882 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
883 }
884 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
885 {
886 for (i = 0; i < group_size; ++i)
887 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
888 {
889 matches[i] = false;
890 if (dump_enabled_p ())
891 {
892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
893 "Build SLP failed: different operation "
894 "in stmt ");
895 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
896 stmts[i], 0);
897 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
898 "original stmt ");
899 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
900 first_stmt, 0);
901 }
902 }
903 return false;
904 }
905 *two_operators = true;
906 }
907
6983e6b5
RB
908 return true;
909}
910
911/* Recursively build an SLP tree starting from NODE.
912 Fail (and return a value not equal to zero) if def-stmts are not
913 isomorphic, require data permutation or are of unsupported types of
914 operation. Otherwise, return 0.
915 The value returned is the depth in the SLP tree where a mismatch
916 was found. */
917
e403d17e 918static slp_tree
310213d4 919vect_build_slp_tree (vec_info *vinfo,
e403d17e 920 vec<gimple *> stmts, unsigned int group_size,
6983e6b5
RB
921 unsigned int *max_nunits,
922 vec<slp_tree> *loads,
1428105c
RB
923 bool *matches, unsigned *npermutes, unsigned *tree_size,
924 unsigned max_tree_size)
6983e6b5 925{
e403d17e 926 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits;
355fe088 927 gimple *stmt;
e403d17e 928 slp_tree node;
6983e6b5 929
6983e6b5
RB
930 matches[0] = false;
931
e403d17e 932 stmt = stmts[0];
6983e6b5
RB
933 if (is_gimple_call (stmt))
934 nops = gimple_call_num_args (stmt);
935 else if (is_gimple_assign (stmt))
936 {
937 nops = gimple_num_ops (stmt) - 1;
938 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
939 nops++;
ebfd146a 940 }
e7baeb39
RB
941 else if (gimple_code (stmt) == GIMPLE_PHI)
942 nops = 0;
6983e6b5 943 else
e403d17e 944 return NULL;
6983e6b5 945
c78e3652
RB
946 /* If the SLP node is a PHI (induction or reduction), terminate
947 the recursion. */
e7baeb39
RB
948 if (gimple_code (stmt) == GIMPLE_PHI)
949 {
719488f8 950 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt));
c78e3652 951 /* Induction from different IVs is not supported. */
719488f8
RB
952 if (def_type == vect_induction_def)
953 {
954 FOR_EACH_VEC_ELT (stmts, i, stmt)
955 if (stmt != stmts[0])
956 return NULL;
957 }
958 else
959 {
960 /* Else def types have to match. */
961 FOR_EACH_VEC_ELT (stmts, i, stmt)
962 {
963 /* But for reduction chains only check on the first stmt. */
964 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
965 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt)
966 continue;
967 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type)
968 return NULL;
969 }
970 }
e7baeb39
RB
971 node = vect_create_new_slp_node (stmts);
972 return node;
973 }
974
975
6876e5bc 976 bool two_operators = false;
4cecd659
BC
977 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
978 if (!vect_build_slp_tree_1 (vinfo, swap,
e403d17e
RB
979 stmts, group_size, nops,
980 &this_max_nunits, matches, &two_operators))
981 return NULL;
ebfd146a 982
6983e6b5
RB
983 /* If the SLP node is a load, terminate the recursion. */
984 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
985 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 986 {
e403d17e
RB
987 *max_nunits = this_max_nunits;
988 node = vect_create_new_slp_node (stmts);
989 loads->safe_push (node);
990 return node;
ebfd146a
IR
991 }
992
6983e6b5
RB
993 /* Get at the operands, verifying they are compatible. */
994 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
995 slp_oprnd_info oprnd_info;
e403d17e 996 FOR_EACH_VEC_ELT (stmts, i, stmt)
6983e6b5 997 {
4cecd659
BC
998 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
999 stmt, i, &oprnds_info);
1000 if (res != 0)
1001 matches[(res == -1) ? 0 : i] = false;
1002 if (!matches[0])
1003 break;
6983e6b5 1004 }
b0b4483e
RB
1005 for (i = 0; i < group_size; ++i)
1006 if (!matches[i])
1007 {
1008 vect_free_oprnd_info (oprnds_info);
e403d17e 1009 return NULL;
b0b4483e 1010 }
6983e6b5 1011
e403d17e
RB
1012 auto_vec<slp_tree, 4> children;
1013 auto_vec<slp_tree> this_loads;
1014
1015 stmt = stmts[0];
6983e6b5 1016
b8698a0f 1017 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 1018 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 1019 {
d092494c 1020 slp_tree child;
e403d17e
RB
1021 unsigned old_nloads = this_loads.length ();
1022 unsigned old_tree_size = this_tree_size;
1023 unsigned int j;
b8698a0f 1024
e7baeb39 1025 if (oprnd_info->first_dt != vect_internal_def
c78e3652 1026 && oprnd_info->first_dt != vect_reduction_def
e7baeb39 1027 && oprnd_info->first_dt != vect_induction_def)
d092494c 1028 continue;
ebfd146a 1029
1428105c
RB
1030 if (++this_tree_size > max_tree_size)
1031 {
e403d17e
RB
1032 FOR_EACH_VEC_ELT (children, j, child)
1033 vect_free_slp_tree (child);
1428105c 1034 vect_free_oprnd_info (oprnds_info);
e403d17e 1035 return NULL;
1428105c
RB
1036 }
1037
e403d17e
RB
1038 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1039 group_size, &this_max_nunits,
1040 &this_loads, matches, npermutes,
1041 &this_tree_size,
1042 max_tree_size)) != NULL)
6983e6b5 1043 {
3fc356dc
RB
1044 /* If we have all children of child built up from scalars then just
1045 throw that away and build it up this node from scalars. */
995b6fe0
RB
1046 if (!SLP_TREE_CHILDREN (child).is_empty ()
1047 /* ??? Rejecting patterns this way doesn't work. We'd have to
1048 do extra work to cancel the pattern so the uses see the
1049 scalar version. */
1050 && !is_pattern_stmt_p
1051 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
3fc356dc 1052 {
3fc356dc
RB
1053 slp_tree grandchild;
1054
1055 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1056 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
3fc356dc
RB
1057 break;
1058 if (!grandchild)
1059 {
1060 /* Roll back. */
e403d17e
RB
1061 this_loads.truncate (old_nloads);
1062 this_tree_size = old_tree_size;
3fc356dc 1063 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1064 vect_free_slp_tree (grandchild);
3fc356dc
RB
1065 SLP_TREE_CHILDREN (child).truncate (0);
1066
1067 dump_printf_loc (MSG_NOTE, vect_location,
1068 "Building parent vector operands from "
1069 "scalars instead\n");
1070 oprnd_info->def_stmts = vNULL;
603cca93 1071 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1072 children.safe_push (child);
3fc356dc
RB
1073 continue;
1074 }
1075 }
1076
6983e6b5 1077 oprnd_info->def_stmts = vNULL;
e403d17e 1078 children.safe_push (child);
6983e6b5
RB
1079 continue;
1080 }
1081
90dd6e3d
RB
1082 /* If the SLP build failed fatally and we analyze a basic-block
1083 simply treat nodes we fail to build as externally defined
1084 (and thus build vectors from the scalar defs).
1085 The cost model will reject outright expensive cases.
1086 ??? This doesn't treat cases where permutation ultimatively
1087 fails (or we don't try permutation below). Ideally we'd
1088 even compute a permutation that will end up with the maximum
1089 SLP tree size... */
310213d4 1090 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1091 && !matches[0]
1092 /* ??? Rejecting patterns this way doesn't work. We'd have to
1093 do extra work to cancel the pattern so the uses see the
1094 scalar version. */
1095 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1096 {
1097 dump_printf_loc (MSG_NOTE, vect_location,
1098 "Building vector operands from scalars\n");
e403d17e 1099 child = vect_create_new_slp_node (oprnd_info->def_stmts);
603cca93 1100 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e
RB
1101 children.safe_push (child);
1102 oprnd_info->def_stmts = vNULL;
90dd6e3d
RB
1103 continue;
1104 }
1105
6983e6b5
RB
1106 /* If the SLP build for operand zero failed and operand zero
1107 and one can be commutated try that for the scalar stmts
1108 that failed the match. */
1109 if (i == 0
1110 /* A first scalar stmt mismatch signals a fatal mismatch. */
1111 && matches[0]
1112 /* ??? For COND_EXPRs we can swap the comparison operands
1113 as well as the arms under some constraints. */
1114 && nops == 2
1115 && oprnds_info[1]->first_dt == vect_internal_def
1116 && is_gimple_assign (stmt)
1117 && commutative_tree_code (gimple_assign_rhs_code (stmt))
e403d17e 1118 && ! two_operators
6983e6b5
RB
1119 /* Do so only if the number of not successful permutes was nor more
1120 than a cut-ff as re-trying the recursive match on
1121 possibly each level of the tree would expose exponential
1122 behavior. */
1123 && *npermutes < 4)
1124 {
78810bd3
RB
1125 /* Verify if we can safely swap or if we committed to a specific
1126 operand order already. */
1127 for (j = 0; j < group_size; ++j)
1128 if (!matches[j]
4cecd659
BC
1129 && (swap[j] != 0
1130 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j]))))
78810bd3
RB
1131 {
1132 if (dump_enabled_p ())
1133 {
1134 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1135 "Build SLP failed: cannot swap operands "
1136 "of shared stmt ");
1137 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1138 stmts[j], 0);
1139 }
1140 goto fail;
1141 }
1142
6983e6b5 1143 /* Swap mismatched definition stmts. */
b0b4483e
RB
1144 dump_printf_loc (MSG_NOTE, vect_location,
1145 "Re-trying with swapped operands of stmts ");
e72baed7 1146 for (j = 0; j < group_size; ++j)
6983e6b5
RB
1147 if (!matches[j])
1148 {
6b4db501
MM
1149 std::swap (oprnds_info[0]->def_stmts[j],
1150 oprnds_info[1]->def_stmts[j]);
b0b4483e 1151 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1152 }
b0b4483e 1153 dump_printf (MSG_NOTE, "\n");
74574669
RB
1154 /* And try again with scratch 'matches' ... */
1155 bool *tem = XALLOCAVEC (bool, group_size);
e403d17e
RB
1156 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1157 group_size, &this_max_nunits,
1158 &this_loads, tem, npermutes,
1159 &this_tree_size,
1160 max_tree_size)) != NULL)
6983e6b5 1161 {
60f2b864
RB
1162 /* ... so if successful we can apply the operand swapping
1163 to the GIMPLE IL. This is necessary because for example
1164 vect_get_slp_defs uses operand indexes and thus expects
1165 canonical operand order. This is also necessary even
1166 if we end up building the operand from scalars as
1167 we'll continue to process swapped operand two. */
1168 for (j = 0; j < group_size; ++j)
f47cda24 1169 {
e403d17e 1170 gimple *stmt = stmts[j];
f47cda24
RB
1171 gimple_set_plf (stmt, GF_PLF_1, false);
1172 }
1173 for (j = 0; j < group_size; ++j)
1174 {
e403d17e 1175 gimple *stmt = stmts[j];
f47cda24
RB
1176 if (!matches[j])
1177 {
1178 /* Avoid swapping operands twice. */
1179 if (gimple_plf (stmt, GF_PLF_1))
1180 continue;
1181 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1182 gimple_assign_rhs2_ptr (stmt));
1183 gimple_set_plf (stmt, GF_PLF_1, true);
1184 }
1185 }
1186 /* Verify we swap all duplicates or none. */
1187 if (flag_checking)
1188 for (j = 0; j < group_size; ++j)
60f2b864 1189 {
e403d17e 1190 gimple *stmt = stmts[j];
f47cda24 1191 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
60f2b864
RB
1192 }
1193
85c69b0b
RB
1194 /* If we have all children of child built up from scalars then
1195 just throw that away and build it up this node from scalars. */
995b6fe0
RB
1196 if (!SLP_TREE_CHILDREN (child).is_empty ()
1197 /* ??? Rejecting patterns this way doesn't work. We'd have
1198 to do extra work to cancel the pattern so the uses see the
1199 scalar version. */
1200 && !is_pattern_stmt_p
1201 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
85c69b0b
RB
1202 {
1203 unsigned int j;
1204 slp_tree grandchild;
1205
1206 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
603cca93 1207 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
85c69b0b
RB
1208 break;
1209 if (!grandchild)
1210 {
1211 /* Roll back. */
e403d17e
RB
1212 this_loads.truncate (old_nloads);
1213 this_tree_size = old_tree_size;
85c69b0b
RB
1214 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1215 vect_free_slp_tree (grandchild);
1216 SLP_TREE_CHILDREN (child).truncate (0);
1217
1218 dump_printf_loc (MSG_NOTE, vect_location,
1219 "Building parent vector operands from "
1220 "scalars instead\n");
1221 oprnd_info->def_stmts = vNULL;
603cca93 1222 SLP_TREE_DEF_TYPE (child) = vect_external_def;
e403d17e 1223 children.safe_push (child);
85c69b0b
RB
1224 continue;
1225 }
1226 }
1227
6983e6b5 1228 oprnd_info->def_stmts = vNULL;
e403d17e 1229 children.safe_push (child);
6983e6b5
RB
1230 continue;
1231 }
1232
1233 ++*npermutes;
1234 }
1235
78810bd3 1236fail:
e403d17e
RB
1237 gcc_assert (child == NULL);
1238 FOR_EACH_VEC_ELT (children, j, child)
1239 vect_free_slp_tree (child);
6983e6b5 1240 vect_free_oprnd_info (oprnds_info);
e403d17e 1241 return NULL;
ebfd146a
IR
1242 }
1243
e403d17e
RB
1244 vect_free_oprnd_info (oprnds_info);
1245
1428105c
RB
1246 if (tree_size)
1247 *tree_size += this_tree_size;
e403d17e
RB
1248 *max_nunits = this_max_nunits;
1249 loads->safe_splice (this_loads);
1428105c 1250
e403d17e
RB
1251 node = vect_create_new_slp_node (stmts);
1252 SLP_TREE_TWO_OPERATORS (node) = two_operators;
1253 SLP_TREE_CHILDREN (node).splice (children);
1254 return node;
ebfd146a
IR
1255}
1256
78c60e3d 1257/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1258
1259static void
1a817418 1260vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node)
ebfd146a
IR
1261{
1262 int i;
355fe088 1263 gimple *stmt;
d755c7ef 1264 slp_tree child;
ebfd146a 1265
603cca93
RB
1266 dump_printf_loc (dump_kind, loc, "node%s\n",
1267 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1268 ? " (external)" : "");
9771b263 1269 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1270 {
c2a12ca0 1271 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
78c60e3d 1272 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1273 }
9771b263 1274 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
c2a12ca0 1275 vect_print_slp_tree (dump_kind, loc, child);
ebfd146a
IR
1276}
1277
1278
b8698a0f
L
1279/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1280 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1281 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1282 stmts in NODE are to be marked. */
1283
1284static void
1285vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1286{
1287 int i;
355fe088 1288 gimple *stmt;
d755c7ef 1289 slp_tree child;
ebfd146a 1290
603cca93 1291 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
1292 return;
1293
9771b263 1294 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1295 if (j < 0 || i == j)
1296 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1297
9771b263 1298 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1299 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1300}
1301
1302
a70d6342
IR
1303/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1304
1305static void
1306vect_mark_slp_stmts_relevant (slp_tree node)
1307{
1308 int i;
355fe088 1309 gimple *stmt;
a70d6342 1310 stmt_vec_info stmt_info;
d755c7ef 1311 slp_tree child;
a70d6342 1312
603cca93 1313 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
1314 return;
1315
9771b263 1316 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1317 {
1318 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1319 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1320 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1321 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1322 }
1323
9771b263 1324 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1325 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1326}
1327
1328
b5aeb3bb
IR
1329/* Rearrange the statements of NODE according to PERMUTATION. */
1330
1331static void
1332vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1333 vec<unsigned> permutation)
b5aeb3bb 1334{
355fe088
TS
1335 gimple *stmt;
1336 vec<gimple *> tmp_stmts;
d755c7ef
RB
1337 unsigned int i;
1338 slp_tree child;
b5aeb3bb 1339
9771b263 1340 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1341 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1342
9771b263
DN
1343 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1344 tmp_stmts.create (group_size);
d755c7ef 1345 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1346
9771b263 1347 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1348 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1349
9771b263 1350 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1351 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1352}
1353
1354
b266b968
RB
1355/* Attempt to reorder stmts in a reduction chain so that we don't
1356 require any load permutation. Return true if that was possible,
1357 otherwise return false. */
1358
1359static bool
1360vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1361{
1362 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1363 unsigned int i, j;
b266b968
RB
1364 unsigned int lidx;
1365 slp_tree node, load;
1366
1367 /* Compare all the permutation sequences to the first one. We know
1368 that at least one load is permuted. */
1369 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1370 if (!node->load_permutation.exists ())
1371 return false;
1372 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1373 {
1374 if (!load->load_permutation.exists ())
1375 return false;
1376 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1377 if (lidx != node->load_permutation[j])
1378 return false;
1379 }
1380
1381 /* Check that the loads in the first sequence are different and there
1382 are no gaps between them. */
7ba9e72d 1383 auto_sbitmap load_index (group_size);
b266b968
RB
1384 bitmap_clear (load_index);
1385 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1386 {
41eefe13 1387 if (lidx >= group_size)
7ba9e72d 1388 return false;
b266b968 1389 if (bitmap_bit_p (load_index, lidx))
7ba9e72d
TS
1390 return false;
1391
b266b968
RB
1392 bitmap_set_bit (load_index, lidx);
1393 }
1394 for (i = 0; i < group_size; i++)
1395 if (!bitmap_bit_p (load_index, i))
7ba9e72d 1396 return false;
b266b968
RB
1397
1398 /* This permutation is valid for reduction. Since the order of the
1399 statements in the nodes is not important unless they are memory
1400 accesses, we can rearrange the statements in all the nodes
1401 according to the order of the loads. */
1402 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1403 node->load_permutation);
1404
1405 /* We are done, no actual permutations need to be generated. */
c4e360f4 1406 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
b266b968 1407 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
c4e360f4
RB
1408 {
1409 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1410 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
1411 /* But we have to keep those permutations that are required because
1412 of handling of gaps. */
1413 if (unrolling_factor == 1
1414 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1415 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
1416 SLP_TREE_LOAD_PERMUTATION (node).release ();
cbd400b4
RB
1417 else
1418 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1419 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
c4e360f4
RB
1420 }
1421
b266b968
RB
1422 return true;
1423}
1424
01d8bf07
RB
1425/* Check if the required load permutations in the SLP instance
1426 SLP_INSTN are supported. */
ebfd146a
IR
1427
1428static bool
01d8bf07 1429vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1430{
01d8bf07
RB
1431 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1432 unsigned int i, j, k, next;
6983e6b5 1433 slp_tree node;
a5b50aa1 1434 gimple *stmt, *load, *next_load;
ebfd146a 1435
73fbfcad 1436 if (dump_enabled_p ())
ebfd146a 1437 {
78c60e3d 1438 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1439 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1440 if (node->load_permutation.exists ())
1441 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1442 dump_printf (MSG_NOTE, "%d ", next);
1443 else
bddc974e
TJ
1444 for (k = 0; k < group_size; ++k)
1445 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1446 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1447 }
1448
b5aeb3bb
IR
1449 /* In case of reduction every load permutation is allowed, since the order
1450 of the reduction statements is not important (as opposed to the case of
0d0293ac 1451 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1452 load nodes are of the same size and have the same permutation (and then
1453 rearrange all the nodes of the SLP instance according to this
1454 permutation). */
1455
1456 /* Check that all the load nodes are of the same size. */
01d8bf07 1457 /* ??? Can't we assert this? */
9771b263 1458 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1459 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1460 return false;
2200fc49 1461
b5aeb3bb 1462 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1463 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1464
b010117a 1465 /* Reduction (there are no data-refs in the root).
b266b968 1466 In reduction chain the order of the loads is not important. */
b010117a
IR
1467 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1468 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
c4e360f4 1469 vect_attempt_slp_rearrange_stmts (slp_instn);
b5aeb3bb 1470
6aa904c4
IR
1471 /* In basic block vectorization we allow any subchain of an interleaving
1472 chain.
1473 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1474 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1475 {
240a94da
RB
1476 /* Check whether the loads in an instance form a subchain and thus
1477 no permutation is necessary. */
9771b263 1478 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1479 {
9626d143
RB
1480 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1481 continue;
240a94da 1482 bool subchain_p = true;
6aa904c4 1483 next_load = NULL;
9771b263 1484 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1485 {
5b5826c4
RB
1486 if (j != 0
1487 && (next_load != load
1488 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1489 {
1490 subchain_p = false;
1491 break;
1492 }
6aa904c4
IR
1493 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1494 }
240a94da
RB
1495 if (subchain_p)
1496 SLP_TREE_LOAD_PERMUTATION (node).release ();
1497 else
1498 {
fe73a332
RB
1499 stmt_vec_info group_info
1500 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1501 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
1502 unsigned nunits
1503 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1504 unsigned k, maxk = 0;
1505 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1506 if (k > maxk)
1507 maxk = k;
1508 /* In BB vectorization we may not actually use a loaded vector
1509 accessing elements in excess of GROUP_SIZE. */
1510 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
1511 {
1512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1513 "BB vectorization with gaps at the end of "
1514 "a load is not supported\n");
1515 return false;
1516 }
1517
240a94da
RB
1518 /* Verify the permutation can be generated. */
1519 vec<tree> tem;
29afecdf 1520 unsigned n_perms;
240a94da 1521 if (!vect_transform_slp_perm_load (node, tem, NULL,
29afecdf 1522 1, slp_instn, true, &n_perms))
240a94da
RB
1523 {
1524 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1525 vect_location,
1526 "unsupported load permutation\n");
1527 return false;
1528 }
1529 }
6aa904c4 1530 }
01d8bf07 1531 return true;
6aa904c4
IR
1532 }
1533
9b999e8c 1534 /* For loop vectorization verify we can generate the permutation. */
29afecdf 1535 unsigned n_perms;
01d8bf07
RB
1536 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1537 if (node->load_permutation.exists ()
1538 && !vect_transform_slp_perm_load
1539 (node, vNULL, NULL,
29afecdf
RB
1540 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true,
1541 &n_perms))
01d8bf07 1542 return false;
9b999e8c 1543
01d8bf07 1544 return true;
ebfd146a
IR
1545}
1546
1547
e4a707c4 1548/* Find the last store in SLP INSTANCE. */
ff802fa1 1549
64900538 1550gimple *
2e8ab70c 1551vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1552{
355fe088 1553 gimple *last = NULL, *stmt;
e4a707c4 1554
2e8ab70c
RB
1555 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1556 {
1557 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1558 if (is_pattern_stmt_p (stmt_vinfo))
1559 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1560 else
1561 last = get_later_stmt (stmt, last);
1562 }
e4a707c4 1563
2e8ab70c 1564 return last;
e4a707c4
IR
1565}
1566
23847df4
RB
1567/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1568
1569static void
1a4b99c1 1570vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
23847df4 1571 stmt_vector_for_cost *prologue_cost_vec,
1a4b99c1 1572 stmt_vector_for_cost *body_cost_vec,
23847df4
RB
1573 unsigned ncopies_for_cost)
1574{
603cca93 1575 unsigned i, j;
23847df4 1576 slp_tree child;
8155f4d8 1577 gimple *stmt;
23847df4
RB
1578 stmt_vec_info stmt_info;
1579 tree lhs;
23847df4
RB
1580
1581 /* Recurse down the SLP tree. */
1582 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
603cca93 1583 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1a4b99c1
RB
1584 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1585 body_cost_vec, ncopies_for_cost);
23847df4
RB
1586
1587 /* Look at the first scalar stmt to determine the cost. */
1588 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1589 stmt_info = vinfo_for_stmt (stmt);
1590 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1591 {
2de001ee
RS
1592 vect_memory_access_type memory_access_type
1593 = (STMT_VINFO_STRIDED_P (stmt_info)
1594 ? VMAT_STRIDED_SLP
1595 : VMAT_CONTIGUOUS);
23847df4 1596 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
2de001ee
RS
1597 vect_model_store_cost (stmt_info, ncopies_for_cost,
1598 memory_access_type, vect_uninitialized_def,
23847df4
RB
1599 node, prologue_cost_vec, body_cost_vec);
1600 else
1601 {
23847df4 1602 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
52eab378
RB
1603 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1604 {
8155f4d8
RB
1605 /* If the load is permuted then the alignment is determined by
1606 the first group element not by the first scalar stmt DR. */
52eab378
RB
1607 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1608 stmt_info = vinfo_for_stmt (stmt);
8155f4d8 1609 /* Record the cost for the permutation. */
29afecdf
RB
1610 unsigned n_perms;
1611 vect_transform_slp_perm_load (node, vNULL, NULL,
1612 ncopies_for_cost, instance, true,
1613 &n_perms);
1614 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
8155f4d8 1615 stmt_info, 0, vect_body);
8155f4d8
RB
1616 unsigned nunits
1617 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
29afecdf
RB
1618 /* And adjust the number of loads performed. This handles
1619 redundancies as well as loads that are later dead. */
1620 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1621 bitmap_clear (perm);
1622 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1623 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1624 ncopies_for_cost = 0;
1625 bool load_seen = false;
1626 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1627 {
1628 if (i % nunits == 0)
1629 {
1630 if (load_seen)
1631 ncopies_for_cost++;
1632 load_seen = false;
1633 }
1634 if (bitmap_bit_p (perm, i))
1635 load_seen = true;
1636 }
1637 if (load_seen)
1638 ncopies_for_cost++;
1639 gcc_assert (ncopies_for_cost
1640 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1641 + nunits - 1) / nunits);
8155f4d8 1642 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
52eab378 1643 }
8155f4d8 1644 /* Record the cost for the vector loads. */
2de001ee
RS
1645 vect_model_load_cost (stmt_info, ncopies_for_cost,
1646 memory_access_type, node, prologue_cost_vec,
1647 body_cost_vec);
89483f99 1648 return;
23847df4
RB
1649 }
1650 }
e7baeb39
RB
1651 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1652 {
1653 /* ncopies_for_cost is the number of IVs we generate. */
1654 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1655 stmt_info, 0, vect_body);
1656
1657 /* Prologue cost for the initial values and step vector. */
1658 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1659 CONSTANT_CLASS_P
1660 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1661 (stmt_info))
1662 ? vector_load : vec_construct,
1663 stmt_info, 0, vect_prologue);
1664 record_stmt_cost (prologue_cost_vec, 1,
1665 CONSTANT_CLASS_P
1666 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1667 ? vector_load : vec_construct,
1668 stmt_info, 0, vect_prologue);
1669
1670 /* ??? No easy way to get at the actual number of vector stmts
1671 to be geneated and thus the derived IVs. */
1672 }
89483f99 1673 else
6876e5bc
RB
1674 {
1675 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1676 stmt_info, 0, vect_body);
89483f99
RB
1677 if (SLP_TREE_TWO_OPERATORS (node))
1678 {
1679 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1680 stmt_info, 0, vect_body);
1681 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1682 stmt_info, 0, vect_body);
1683 }
6876e5bc 1684 }
23847df4 1685
603cca93
RB
1686 /* Push SLP node def-type to stmts. */
1687 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1688 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1689 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1690 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1691
23847df4
RB
1692 /* Scan operands and account for prologue cost of constants/externals.
1693 ??? This over-estimates cost for multiple uses and should be
1694 re-engineered. */
603cca93 1695 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
23847df4
RB
1696 lhs = gimple_get_lhs (stmt);
1697 for (i = 0; i < gimple_num_ops (stmt); ++i)
1698 {
81c40241 1699 tree op = gimple_op (stmt, i);
355fe088 1700 gimple *def_stmt;
23847df4
RB
1701 enum vect_def_type dt;
1702 if (!op || op == lhs)
1703 continue;
81c40241 1704 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
2e8ab70c
RB
1705 {
1706 /* Without looking at the actual initializer a vector of
1707 constants can be implemented as load from the constant pool.
1708 ??? We need to pass down stmt_info for a vector type
1709 even if it points to the wrong stmt. */
1710 if (dt == vect_constant_def)
1711 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1712 stmt_info, 0, vect_prologue);
1713 else if (dt == vect_external_def)
1714 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1715 stmt_info, 0, vect_prologue);
1716 }
23847df4 1717 }
603cca93
RB
1718
1719 /* Restore stmt def-types. */
1720 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1721 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1722 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1723 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
23847df4
RB
1724}
1725
1726/* Compute the cost for the SLP instance INSTANCE. */
1727
1728static void
1a4b99c1 1729vect_analyze_slp_cost (slp_instance instance, void *data)
23847df4
RB
1730{
1731 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1732 unsigned ncopies_for_cost;
1733 stmt_info_for_cost *si;
1734 unsigned i;
1735
b939ea86
RB
1736 if (dump_enabled_p ())
1737 dump_printf_loc (MSG_NOTE, vect_location,
1738 "=== vect_analyze_slp_cost ===\n");
1739
23847df4
RB
1740 /* Calculate the number of vector stmts to create based on the unrolling
1741 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1742 GROUP_SIZE / NUNITS otherwise. */
1743 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1a4b99c1
RB
1744 slp_tree node = SLP_INSTANCE_TREE (instance);
1745 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1746 /* Adjust the group_size by the vectorization factor which is always one
1747 for basic-block vectorization. */
1748 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1749 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1750 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1751 /* For reductions look at a reduction operand in case the reduction
1752 operation is widening like DOT_PROD or SAD. */
1753 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1754 {
355fe088 1755 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1a4b99c1
RB
1756 switch (gimple_assign_rhs_code (stmt))
1757 {
1758 case DOT_PROD_EXPR:
1759 case SAD_EXPR:
1760 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1761 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1762 break;
1763 default:;
1764 }
1765 }
23847df4
RB
1766 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1767
1768 prologue_cost_vec.create (10);
1769 body_cost_vec.create (10);
1a4b99c1
RB
1770 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1771 &prologue_cost_vec, &body_cost_vec,
1772 ncopies_for_cost);
23847df4
RB
1773
1774 /* Record the prologue costs, which were delayed until we were
1a4b99c1 1775 sure that SLP was successful. */
23847df4
RB
1776 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1777 {
1778 struct _stmt_vec_info *stmt_info
1779 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1780 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1781 si->misalign, vect_prologue);
1782 }
1783
1a4b99c1
RB
1784 /* Record the instance's instructions in the target cost model. */
1785 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1786 {
1787 struct _stmt_vec_info *stmt_info
1788 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1789 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1790 si->misalign, vect_body);
1791 }
1792
23847df4 1793 prologue_cost_vec.release ();
1a4b99c1 1794 body_cost_vec.release ();
23847df4 1795}
e4a707c4 1796
1ba91a49
AL
1797/* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1798 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1799 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1800 containing the remainder.
1801 Return the first stmt in the second group. */
1802
1803static gimple *
1804vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size)
1805{
1806 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt);
1807 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1808 gcc_assert (group1_size > 0);
1809 int group2_size = GROUP_SIZE (first_vinfo) - group1_size;
1810 gcc_assert (group2_size > 0);
1811 GROUP_SIZE (first_vinfo) = group1_size;
1812
1813 gimple *stmt = first_stmt;
1814 for (unsigned i = group1_size; i > 1; i--)
1815 {
1816 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1817 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1818 }
1819 /* STMT is now the last element of the first group. */
1820 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt));
1821 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0;
1822
1823 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size;
1824 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)))
1825 {
1826 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2;
1827 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1);
1828 }
1829
1830 /* For the second group, the GROUP_GAP is that before the original group,
1831 plus skipping over the first vector. */
1832 GROUP_GAP (vinfo_for_stmt (group2)) =
1833 GROUP_GAP (first_vinfo) + group1_size;
1834
1835 /* GROUP_GAP of the first group now has to skip over the second group too. */
1836 GROUP_GAP (first_vinfo) += group2_size;
1837
1838 if (dump_enabled_p ())
1839 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1840 group1_size, group2_size);
1841
1842 return group2;
1843}
1844
0d0293ac 1845/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1846 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1847 Return FALSE if it's impossible to SLP any stmt in the loop. */
1848
1849static bool
310213d4 1850vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1851 gimple *stmt, unsigned max_tree_size)
ebfd146a
IR
1852{
1853 slp_instance new_instance;
d092494c 1854 slp_tree node;
e14c1050 1855 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1856 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1857 tree vectype, scalar_type = NULL_TREE;
355fe088 1858 gimple *next;
1ba91a49 1859 unsigned int i;
ebfd146a 1860 unsigned int max_nunits = 0;
9771b263 1861 vec<slp_tree> loads;
b5aeb3bb 1862 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
355fe088 1863 vec<gimple *> scalar_stmts;
b5aeb3bb 1864
b010117a 1865 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1866 {
b010117a
IR
1867 if (dr)
1868 {
1869 scalar_type = TREE_TYPE (DR_REF (dr));
1870 vectype = get_vectype_for_scalar_type (scalar_type);
1871 }
1872 else
1873 {
310213d4 1874 gcc_assert (is_a <loop_vec_info> (vinfo));
b010117a
IR
1875 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1876 }
1877
e14c1050 1878 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1879 }
1880 else
1881 {
310213d4 1882 gcc_assert (is_a <loop_vec_info> (vinfo));
b5aeb3bb 1883 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
310213d4 1884 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1885 }
b8698a0f 1886
ebfd146a
IR
1887 if (!vectype)
1888 {
73fbfcad 1889 if (dump_enabled_p ())
ebfd146a 1890 {
78c60e3d
SS
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "Build SLP failed: unsupported data-type ");
1893 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1894 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1895 }
b5aeb3bb 1896
ebfd146a
IR
1897 return false;
1898 }
ebfd146a 1899 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342 1900
0d0293ac 1901 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1902 scalar_stmts.create (group_size);
ebfd146a 1903 next = stmt;
b010117a 1904 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1905 {
b5aeb3bb
IR
1906 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1907 while (next)
1908 {
f7e531cf
IR
1909 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1910 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1911 scalar_stmts.safe_push (
1912 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1913 else
9771b263 1914 scalar_stmts.safe_push (next);
e14c1050 1915 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 1916 }
14a61437
RB
1917 /* Mark the first element of the reduction chain as reduction to properly
1918 transform the node. In the reduction analysis phase only the last
1919 element of the chain is marked as reduction. */
1920 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1921 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
1922 }
1923 else
1924 {
1925 /* Collect reduction statements. */
310213d4 1926 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
9771b263
DN
1927 for (i = 0; reductions.iterate (i, &next); i++)
1928 scalar_stmts.safe_push (next);
ebfd146a
IR
1929 }
1930
9771b263 1931 loads.create (group_size);
ebfd146a
IR
1932
1933 /* Build the tree for the SLP instance. */
89d390e5
RB
1934 bool *matches = XALLOCAVEC (bool, group_size);
1935 unsigned npermutes = 0;
e569db5f 1936 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
e403d17e 1937 &max_nunits, &loads, matches, &npermutes,
e569db5f
VK
1938 NULL, max_tree_size);
1939 if (node != NULL)
ebfd146a 1940 {
4ef69dfc 1941 /* Calculate the unrolling factor based on the smallest type. */
e569db5f
VK
1942 unrolling_factor
1943 = least_common_multiple (max_nunits, group_size) / group_size;
b8698a0f 1944
e569db5f
VK
1945 if (unrolling_factor != 1
1946 && is_a <bb_vec_info> (vinfo))
1947 {
1948
1949 if (max_nunits > group_size)
4ef69dfc 1950 {
e645e942 1951 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e569db5f
VK
1952 "Build SLP failed: store group "
1953 "size not a multiple of the vector size "
1954 "in basic block SLP\n");
c7e62a26 1955 vect_free_slp_tree (node);
9771b263 1956 loads.release ();
4ef69dfc
IR
1957 return false;
1958 }
e569db5f
VK
1959 /* Fatal mismatch. */
1960 matches[group_size/max_nunits * max_nunits] = false;
1961 vect_free_slp_tree (node);
1962 loads.release ();
1963 }
1964 else
1965 {
4ef69dfc
IR
1966 /* Create a new SLP instance. */
1967 new_instance = XNEW (struct _slp_instance);
1968 SLP_INSTANCE_TREE (new_instance) = node;
1969 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 1970 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 1971 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
1972
1973 /* Compute the load permutation. */
1974 slp_tree load_node;
1975 bool loads_permuted = false;
abf9bfbc
RB
1976 FOR_EACH_VEC_ELT (loads, i, load_node)
1977 {
01d8bf07 1978 vec<unsigned> load_permutation;
abf9bfbc 1979 int j;
355fe088 1980 gimple *load, *first_stmt;
01d8bf07
RB
1981 bool this_load_permuted = false;
1982 load_permutation.create (group_size);
6983e6b5
RB
1983 first_stmt = GROUP_FIRST_ELEMENT
1984 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
1985 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1986 {
e569db5f
VK
1987 int load_place = vect_get_place_in_interleaving_chain
1988 (load, first_stmt);
6983e6b5
RB
1989 gcc_assert (load_place != -1);
1990 if (load_place != j)
01d8bf07 1991 this_load_permuted = true;
abf9bfbc
RB
1992 load_permutation.safe_push (load_place);
1993 }
fe2bef71
RB
1994 if (!this_load_permuted
1995 /* The load requires permutation when unrolling exposes
1996 a gap either because the group is larger than the SLP
1997 group-size or because there is a gap between the groups. */
1998 && (unrolling_factor == 1
1999 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
2000 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
2001 {
2002 load_permutation.release ();
2003 continue;
2004 }
2005 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2006 loads_permuted = true;
abf9bfbc 2007 }
6aa904c4
IR
2008
2009 if (loads_permuted)
ebfd146a 2010 {
01d8bf07 2011 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 2012 {
73fbfcad 2013 if (dump_enabled_p ())
ebfd146a 2014 {
e645e942 2015 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
2016 "Build SLP failed: unsupported load "
2017 "permutation ");
e569db5f
VK
2018 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2019 TDF_SLIM, stmt, 0);
ebfd146a 2020 }
ebfd146a
IR
2021 vect_free_slp_instance (new_instance);
2022 return false;
2023 }
ebfd146a 2024 }
ebfd146a 2025
e569db5f 2026 /* If the loads and stores can be handled with load/store-lan
bb0f5ca7
AL
2027 instructions do not generate this SLP instance. */
2028 if (is_a <loop_vec_info> (vinfo)
2029 && loads_permuted
2030 && dr && vect_store_lanes_supported (vectype, group_size))
2031 {
2032 slp_tree load_node;
2033 FOR_EACH_VEC_ELT (loads, i, load_node)
2034 {
2035 gimple *first_stmt = GROUP_FIRST_ELEMENT
2036 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
2037 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt);
e569db5f
VK
2038 /* Use SLP for strided accesses (or if we
2039 can't load-lanes). */
bb0f5ca7
AL
2040 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2041 || ! vect_load_lanes_supported
2042 (STMT_VINFO_VECTYPE (stmt_vinfo),
2043 GROUP_SIZE (stmt_vinfo)))
2044 break;
2045 }
2046 if (i == loads.length ())
2047 {
2048 if (dump_enabled_p ())
2049 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2050 "Built SLP cancelled: can use "
2051 "load/store-lanes\n");
2052 vect_free_slp_instance (new_instance);
2053 return false;
2054 }
2055 }
2056
310213d4 2057 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 2058
73fbfcad 2059 if (dump_enabled_p ())
c2a12ca0
RB
2060 {
2061 dump_printf_loc (MSG_NOTE, vect_location,
2062 "Final SLP tree for instance:\n");
2063 vect_print_slp_tree (MSG_NOTE, vect_location, node);
2064 }
ebfd146a
IR
2065
2066 return true;
2067 }
e569db5f
VK
2068 }
2069 else
2070 {
ebfd146a
IR
2071 /* Failed to SLP. */
2072 /* Free the allocated memory. */
e403d17e 2073 scalar_stmts.release ();
9771b263 2074 loads.release ();
e569db5f 2075 }
b8698a0f 2076
1ba91a49 2077 /* For basic block SLP, try to break the group up into multiples of the
97a1a642 2078 vector size. */
1ba91a49
AL
2079 if (is_a <bb_vec_info> (vinfo)
2080 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2081 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
2082 {
2083 /* We consider breaking the group only on VF boundaries from the existing
2084 start. */
2085 for (i = 0; i < group_size; i++)
2086 if (!matches[i]) break;
2087
97a1a642 2088 if (i >= nunits && i < group_size)
1ba91a49
AL
2089 {
2090 /* Split into two groups at the first vector boundary before i. */
97a1a642
RB
2091 gcc_assert ((nunits & (nunits - 1)) == 0);
2092 unsigned group1_size = i & ~(nunits - 1);
1ba91a49
AL
2093
2094 gimple *rest = vect_split_slp_store_group (stmt, group1_size);
2095 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size);
2096 /* If the first non-match was in the middle of a vector,
2097 skip the rest of that vector. */
2098 if (group1_size < i)
2099 {
97a1a642 2100 i = group1_size + nunits;
1ba91a49 2101 if (i < group_size)
97a1a642 2102 rest = vect_split_slp_store_group (rest, nunits);
1ba91a49
AL
2103 }
2104 if (i < group_size)
2105 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2106 return res;
2107 }
2108 /* Even though the first vector did not all match, we might be able to SLP
2109 (some) of the remainder. FORNOW ignore this possibility. */
2110 }
2111
a70d6342 2112 return false;
ebfd146a
IR
2113}
2114
2115
ff802fa1 2116/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
2117 trees of packed scalar stmts if SLP is possible. */
2118
2119bool
310213d4 2120vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
2121{
2122 unsigned int i;
355fe088 2123 gimple *first_element;
ebfd146a 2124
73fbfcad 2125 if (dump_enabled_p ())
e645e942 2126 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
ebfd146a 2127
0d0293ac 2128 /* Find SLP sequences starting from groups of grouped stores. */
310213d4 2129 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
0630a4ec 2130 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
ebfd146a 2131
310213d4 2132 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 2133 {
310213d4
RB
2134 if (loop_vinfo->reduction_chains.length () > 0)
2135 {
2136 /* Find SLP sequences starting from reduction chains. */
2137 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
0630a4ec 2138 if (! vect_analyze_slp_instance (vinfo, first_element,
310213d4 2139 max_tree_size))
6b5e165b
RB
2140 {
2141 /* Dissolve reduction chain group. */
2142 gimple *next, *stmt = first_element;
2143 while (stmt)
2144 {
2145 stmt_vec_info vinfo = vinfo_for_stmt (stmt);
2146 next = GROUP_NEXT_ELEMENT (vinfo);
2147 GROUP_FIRST_ELEMENT (vinfo) = NULL;
2148 GROUP_NEXT_ELEMENT (vinfo) = NULL;
2149 stmt = next;
2150 }
2151 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element))
2152 = vect_internal_def;
2153 }
310213d4 2154 }
b010117a 2155
310213d4 2156 /* Find SLP sequences starting from groups of reductions. */
0630a4ec
RB
2157 if (loop_vinfo->reductions.length () > 1)
2158 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2159 max_tree_size);
310213d4 2160 }
b5aeb3bb 2161
ebfd146a
IR
2162 return true;
2163}
2164
2165
2166/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
2167 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2168 least one instance. */
ebfd146a 2169
437f4a00 2170bool
ebfd146a
IR
2171vect_make_slp_decision (loop_vec_info loop_vinfo)
2172{
2173 unsigned int i, unrolling_factor = 1;
9771b263 2174 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2175 slp_instance instance;
2176 int decided_to_slp = 0;
2177
73fbfcad 2178 if (dump_enabled_p ())
e645e942
TJ
2179 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2180 "\n");
ebfd146a 2181
9771b263 2182 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
2183 {
2184 /* FORNOW: SLP if you can. */
2185 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
2186 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
2187
ff802fa1 2188 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 2189 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 2190 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
2191 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2192 decided_to_slp++;
2193 }
2194
2195 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2196
73fbfcad 2197 if (decided_to_slp && dump_enabled_p ())
ccb3ad87 2198 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2199 "Decided to SLP %d instances. Unrolling factor %d\n",
78c60e3d 2200 decided_to_slp, unrolling_factor);
437f4a00
IR
2201
2202 return (decided_to_slp > 0);
ebfd146a
IR
2203}
2204
2205
2206/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 2207 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
2208
2209static void
642fce57 2210vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 2211{
355fe088 2212 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 2213 imm_use_iterator imm_iter;
355fe088 2214 gimple *use_stmt;
642fce57 2215 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 2216 slp_tree child;
f2c74cc4 2217 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
2218 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2219 int j;
2220
2221 /* Propagate hybrid down the SLP tree. */
2222 if (stype == hybrid)
2223 ;
2224 else if (HYBRID_SLP_STMT (stmt_vinfo))
2225 stype = hybrid;
2226 else
2227 {
2228 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2229 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2935d994
RB
2230 /* If we get a pattern stmt here we have to use the LHS of the
2231 original stmt for immediate uses. */
2232 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo)
2233 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
29764870 2234 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
e7baeb39
RB
2235 tree def;
2236 if (gimple_code (stmt) == GIMPLE_PHI)
2237 def = gimple_phi_result (stmt);
2238 else
2239 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2240 if (def)
2241 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
29764870
RB
2242 {
2243 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
2244 continue;
2245 use_vinfo = vinfo_for_stmt (use_stmt);
2246 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2247 && STMT_VINFO_RELATED_STMT (use_vinfo))
2248 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2249 if (!STMT_SLP_TYPE (use_vinfo)
2250 && (STMT_VINFO_RELEVANT (use_vinfo)
2251 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2252 && !(gimple_code (use_stmt) == GIMPLE_PHI
2253 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
2254 {
2255 if (dump_enabled_p ())
2256 {
2257 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2258 "def in non-SLP stmt: ");
2259 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2260 }
2261 stype = hybrid;
2262 }
29764870 2263 }
642fce57 2264 }
ebfd146a 2265
502f0263
RB
2266 if (stype == hybrid
2267 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
2268 {
2269 if (dump_enabled_p ())
2270 {
2271 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2272 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2273 }
2274 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2275 }
ebfd146a 2276
642fce57 2277 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
603cca93 2278 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
90dd6e3d 2279 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2280}
f2c74cc4 2281
642fce57 2282/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2283
642fce57
RB
2284static tree
2285vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2286{
2287 walk_stmt_info *wi = (walk_stmt_info *)data;
2288 struct loop *loopp = (struct loop *)wi->info;
2289
2290 if (wi->is_lhs)
2291 return NULL_TREE;
2292
2293 if (TREE_CODE (*tp) == SSA_NAME
2294 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2295 {
355fe088 2296 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
642fce57
RB
2297 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2298 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
b1af7da6
RB
2299 {
2300 if (dump_enabled_p ())
2301 {
2302 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2303 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2304 }
2305 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2306 }
642fce57
RB
2307 }
2308
2309 return NULL_TREE;
ebfd146a
IR
2310}
2311
642fce57
RB
2312static tree
2313vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2314 walk_stmt_info *)
2315{
9186a9d3 2316 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi));
642fce57
RB
2317 /* If the stmt is in a SLP instance then this isn't a reason
2318 to mark use definitions in other SLP instances as hybrid. */
9186a9d3
RB
2319 if (! STMT_SLP_TYPE (use_vinfo)
2320 && (STMT_VINFO_RELEVANT (use_vinfo)
2321 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2322 && ! (gimple_code (gsi_stmt (*gsi)) == GIMPLE_PHI
2323 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2324 ;
2325 else
642fce57
RB
2326 *handled = true;
2327 return NULL_TREE;
2328}
ebfd146a
IR
2329
2330/* Find stmts that must be both vectorized and SLPed. */
2331
2332void
2333vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2334{
2335 unsigned int i;
9771b263 2336 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2337 slp_instance instance;
2338
73fbfcad 2339 if (dump_enabled_p ())
e645e942
TJ
2340 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2341 "\n");
ebfd146a 2342
642fce57
RB
2343 /* First walk all pattern stmt in the loop and mark defs of uses as
2344 hybrid because immediate uses in them are not recorded. */
2345 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2346 {
2347 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2348 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2349 gsi_next (&gsi))
2350 {
355fe088 2351 gimple *stmt = gsi_stmt (gsi);
642fce57
RB
2352 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2353 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2354 {
2355 walk_stmt_info wi;
2356 memset (&wi, 0, sizeof (wi));
2357 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2358 gimple_stmt_iterator gsi2
2359 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2360 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2361 vect_detect_hybrid_slp_1, &wi);
2362 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2363 vect_detect_hybrid_slp_2,
2364 vect_detect_hybrid_slp_1, &wi);
2365 }
2366 }
2367 }
2368
2369 /* Then walk the SLP instance trees marking stmts with uses in
2370 non-SLP stmts as hybrid, also propagating hybrid down the
2371 SLP tree, collecting the above info on-the-fly. */
9771b263 2372 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2373 {
2374 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2375 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2376 i, pure_slp);
2377 }
ebfd146a
IR
2378}
2379
a70d6342
IR
2380
2381/* Create and initialize a new bb_vec_info struct for BB, as well as
2382 stmt_vec_info structs for all the stmts in it. */
b8698a0f 2383
a70d6342 2384static bb_vec_info
61d371eb
RB
2385new_bb_vec_info (gimple_stmt_iterator region_begin,
2386 gimple_stmt_iterator region_end)
a70d6342 2387{
61d371eb 2388 basic_block bb = gsi_bb (region_begin);
a70d6342
IR
2389 bb_vec_info res = NULL;
2390 gimple_stmt_iterator gsi;
2391
2392 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
310213d4 2393 res->kind = vec_info::bb;
a70d6342 2394 BB_VINFO_BB (res) = bb;
61d371eb
RB
2395 res->region_begin = region_begin;
2396 res->region_end = region_end;
a70d6342 2397
61d371eb
RB
2398 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2399 gsi_next (&gsi))
a70d6342 2400 {
355fe088 2401 gimple *stmt = gsi_stmt (gsi);
a70d6342 2402 gimple_set_uid (stmt, 0);
310213d4 2403 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
a70d6342
IR
2404 }
2405
9771b263
DN
2406 BB_VINFO_GROUPED_STORES (res).create (10);
2407 BB_VINFO_SLP_INSTANCES (res).create (2);
c3e7ee41 2408 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
a70d6342
IR
2409
2410 bb->aux = res;
2411 return res;
2412}
2413
2414
2415/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2416 stmts in the basic block. */
2417
2418static void
2419destroy_bb_vec_info (bb_vec_info bb_vinfo)
2420{
c7e62a26 2421 slp_instance instance;
c7e62a26 2422 unsigned i;
a70d6342
IR
2423
2424 if (!bb_vinfo)
2425 return;
2426
78810bd3
RB
2427 vect_destroy_datarefs (bb_vinfo);
2428 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
2429 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
2430 FOR_EACH_VEC_ELT (BB_VINFO_SLP_INSTANCES (bb_vinfo), i, instance)
2431 vect_free_slp_instance (instance);
2432 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
2433 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
a70d6342 2434
78810bd3 2435 for (gimple_stmt_iterator si = bb_vinfo->region_begin;
61d371eb 2436 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
a70d6342 2437 {
355fe088 2438 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2439 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2440
2441 if (stmt_info)
2442 /* Free stmt_vec_info. */
2443 free_stmt_vec_info (stmt);
61d371eb
RB
2444
2445 /* Reset region marker. */
2446 gimple_set_uid (stmt, -1);
a70d6342
IR
2447 }
2448
78810bd3 2449 BB_VINFO_BB (bb_vinfo)->aux = NULL;
a70d6342 2450 free (bb_vinfo);
a70d6342
IR
2451}
2452
2453
2454/* Analyze statements contained in SLP tree node after recursively analyzing
2455 the subtree. Return TRUE if the operations are supported. */
2456
2457static bool
891ad31c 2458vect_slp_analyze_node_operations (slp_tree node, slp_instance node_instance)
a70d6342
IR
2459{
2460 bool dummy;
603cca93 2461 int i, j;
355fe088 2462 gimple *stmt;
d755c7ef 2463 slp_tree child;
a70d6342 2464
603cca93 2465 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
a70d6342
IR
2466 return true;
2467
9771b263 2468 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
891ad31c 2469 if (!vect_slp_analyze_node_operations (child, node_instance))
d092494c 2470 return false;
a70d6342 2471
bd2f172f
RB
2472 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2473 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2474 gcc_assert (stmt_info);
2475 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2476
2477 /* For BB vectorization vector types are assigned here.
2478 Memory accesses already got their vector type assigned
2479 in vect_analyze_data_refs. */
2480 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2481 if (bb_vinfo
2482 && ! STMT_VINFO_DATA_REF (stmt_info))
a70d6342 2483 {
bd2f172f
RB
2484 gcc_assert (PURE_SLP_STMT (stmt_info));
2485
2486 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
2487 if (dump_enabled_p ())
2488 {
2489 dump_printf_loc (MSG_NOTE, vect_location,
2490 "get vectype for scalar type: ");
2491 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
2492 dump_printf (MSG_NOTE, "\n");
2493 }
2494
2495 tree vectype = get_vectype_for_scalar_type (scalar_type);
2496 if (!vectype)
2497 {
2498 if (dump_enabled_p ())
2499 {
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2501 "not SLPed: unsupported data-type ");
2502 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2503 scalar_type);
2504 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2505 }
2506 return false;
2507 }
2508
2509 if (dump_enabled_p ())
2510 {
2511 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2512 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2513 dump_printf (MSG_NOTE, "\n");
2514 }
2515
2516 gimple *sstmt;
2517 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2518 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
a70d6342
IR
2519 }
2520
bd2f172f
RB
2521 /* Push SLP node def-type to stmt operands. */
2522 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2523 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2524 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2525 = SLP_TREE_DEF_TYPE (child);
891ad31c 2526 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance);
bd2f172f
RB
2527 /* Restore def-types. */
2528 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2529 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2530 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))
2531 = vect_internal_def;
2532 if (! res)
2533 return false;
2534
2535 return true;
a70d6342
IR
2536}
2537
2538
ff802fa1 2539/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
2540 operations are supported. */
2541
a12e42fc 2542bool
1a4b99c1 2543vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
a70d6342 2544{
a70d6342
IR
2545 slp_instance instance;
2546 int i;
2547
a12e42fc
RB
2548 if (dump_enabled_p ())
2549 dump_printf_loc (MSG_NOTE, vect_location,
2550 "=== vect_slp_analyze_operations ===\n");
2551
9771b263 2552 for (i = 0; slp_instances.iterate (i, &instance); )
a70d6342 2553 {
891ad31c
RB
2554 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance),
2555 instance))
a70d6342 2556 {
a12e42fc
RB
2557 dump_printf_loc (MSG_NOTE, vect_location,
2558 "removing SLP instance operations starting from: ");
2559 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2560 SLP_TREE_SCALAR_STMTS
2561 (SLP_INSTANCE_TREE (instance))[0], 0);
2562 vect_free_slp_instance (instance);
9771b263 2563 slp_instances.ordered_remove (i);
a70d6342
IR
2564 }
2565 else
1a4b99c1
RB
2566 {
2567 /* Compute the costs of the SLP instance. */
2568 vect_analyze_slp_cost (instance, data);
2569 i++;
2570 }
b8698a0f
L
2571 }
2572
9771b263 2573 if (!slp_instances.length ())
a70d6342
IR
2574 return false;
2575
2576 return true;
2577}
2578
6eddf228
RB
2579
2580/* Compute the scalar cost of the SLP node NODE and its children
2581 and return it. Do not account defs that are marked in LIFE and
2582 update LIFE according to uses of NODE. */
2583
2584static unsigned
292cba13 2585vect_bb_slp_scalar_cost (basic_block bb,
ff4c81cc 2586 slp_tree node, vec<bool, va_heap> *life)
6eddf228
RB
2587{
2588 unsigned scalar_cost = 0;
2589 unsigned i;
355fe088 2590 gimple *stmt;
6eddf228
RB
2591 slp_tree child;
2592
2593 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2594 {
2595 unsigned stmt_cost;
2596 ssa_op_iter op_iter;
2597 def_operand_p def_p;
2598 stmt_vec_info stmt_info;
2599
ff4c81cc 2600 if ((*life)[i])
6eddf228
RB
2601 continue;
2602
2603 /* If there is a non-vectorized use of the defs then the scalar
2604 stmt is kept live in which case we do not account it or any
2605 required defs in the SLP children in the scalar cost. This
2606 way we make the vectorization more costly when compared to
2607 the scalar cost. */
2608 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2609 {
2610 imm_use_iterator use_iter;
355fe088 2611 gimple *use_stmt;
6eddf228 2612 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2613 if (!is_gimple_debug (use_stmt)
61d371eb
RB
2614 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2615 use_stmt)
603cca93 2616 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
6eddf228 2617 {
ff4c81cc 2618 (*life)[i] = true;
6eddf228
RB
2619 BREAK_FROM_IMM_USE_STMT (use_iter);
2620 }
2621 }
ff4c81cc 2622 if ((*life)[i])
6eddf228
RB
2623 continue;
2624
b555a2e4
RB
2625 /* Count scalar stmts only once. */
2626 if (gimple_visited_p (stmt))
2627 continue;
2628 gimple_set_visited (stmt, true);
2629
6eddf228
RB
2630 stmt_info = vinfo_for_stmt (stmt);
2631 if (STMT_VINFO_DATA_REF (stmt_info))
2632 {
2633 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2634 stmt_cost = vect_get_stmt_cost (scalar_load);
2635 else
2636 stmt_cost = vect_get_stmt_cost (scalar_store);
2637 }
2638 else
2639 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2640
2641 scalar_cost += stmt_cost;
2642 }
2643
2644 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
603cca93 2645 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
90dd6e3d 2646 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
6eddf228
RB
2647
2648 return scalar_cost;
2649}
2650
69f11a13
IR
2651/* Check if vectorization of the basic block is profitable. */
2652
2653static bool
2654vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2655{
9771b263 2656 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2657 slp_instance instance;
1a4b99c1 2658 int i;
c3e7ee41 2659 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2660 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2661
2662 /* Calculate scalar cost. */
6eddf228 2663 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2664 {
00f96dc9 2665 auto_vec<bool, 20> life;
ff4c81cc 2666 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
292cba13
RB
2667 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2668 SLP_INSTANCE_TREE (instance),
ff4c81cc 2669 &life);
69f11a13
IR
2670 }
2671
b555a2e4
RB
2672 /* Unset visited flag. */
2673 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2674 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2675 gimple_set_visited (gsi_stmt (gsi), false);
2676
c3e7ee41 2677 /* Complete the target-specific cost calculation. */
92345349
BS
2678 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2679 &vec_inside_cost, &vec_epilogue_cost);
2680
2681 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2682
73fbfcad 2683 if (dump_enabled_p ())
69f11a13 2684 {
78c60e3d
SS
2685 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2686 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2687 vec_inside_cost);
2688 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2689 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2690 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2691 }
2692
a6524bba
RB
2693 /* Vectorization is profitable if its cost is more than the cost of scalar
2694 version. Note that we err on the vector side for equal cost because
2695 the cost estimate is otherwise quite pessimistic (constant uses are
2696 free on the scalar side but cost a load on the vector side for
2697 example). */
2698 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2699 return false;
2700
2701 return true;
2702}
2703
a5b50aa1
RB
2704/* Check if the basic block can be vectorized. Returns a bb_vec_info
2705 if so and sets fatal to true if failure is independent of
2706 current_vector_size. */
a70d6342 2707
8e19f5a1 2708static bb_vec_info
61d371eb
RB
2709vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2710 gimple_stmt_iterator region_end,
a5b50aa1
RB
2711 vec<data_reference_p> datarefs, int n_stmts,
2712 bool &fatal)
a70d6342
IR
2713{
2714 bb_vec_info bb_vinfo;
a70d6342 2715 slp_instance instance;
8e19f5a1 2716 int i;
777e1f09 2717 int min_vf = 2;
e4a707c4 2718
a5b50aa1
RB
2719 /* The first group of checks is independent of the vector size. */
2720 fatal = true;
2721
61d371eb
RB
2722 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2723 {
2724 if (dump_enabled_p ())
2725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2726 "not vectorized: too many instructions in "
2727 "basic block.\n");
2728 free_data_refs (datarefs);
2729 return NULL;
2730 }
2731
2732 bb_vinfo = new_bb_vec_info (region_begin, region_end);
a70d6342
IR
2733 if (!bb_vinfo)
2734 return NULL;
2735
61d371eb 2736 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
428db0ba
RB
2737
2738 /* Analyze the data references. */
2739
2740 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2741 {
73fbfcad 2742 if (dump_enabled_p ())
78c60e3d
SS
2743 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2744 "not vectorized: unhandled data-ref in basic "
2745 "block.\n");
b8698a0f 2746
a70d6342
IR
2747 destroy_bb_vec_info (bb_vinfo);
2748 return NULL;
2749 }
2750
fcac74a1 2751 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2752 {
73fbfcad 2753 if (dump_enabled_p ())
78c60e3d
SS
2754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2755 "not vectorized: not enough data-refs in "
2756 "basic block.\n");
a70d6342
IR
2757
2758 destroy_bb_vec_info (bb_vinfo);
2759 return NULL;
2760 }
2761
310213d4 2762 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2763 {
2764 if (dump_enabled_p ())
2765 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2766 "not vectorized: unhandled data access in "
2767 "basic block.\n");
2768
2769 destroy_bb_vec_info (bb_vinfo);
2770 return NULL;
2771 }
2772
a5b50aa1
RB
2773 /* If there are no grouped stores in the region there is no need
2774 to continue with pattern recog as vect_analyze_slp will fail
2775 anyway. */
2776 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2777 {
73fbfcad 2778 if (dump_enabled_p ())
a5b50aa1
RB
2779 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2780 "not vectorized: no grouped stores in "
2781 "basic block.\n");
b8698a0f 2782
a70d6342
IR
2783 destroy_bb_vec_info (bb_vinfo);
2784 return NULL;
2785 }
b8698a0f 2786
a5b50aa1
RB
2787 /* While the rest of the analysis below depends on it in some way. */
2788 fatal = false;
2789
2790 vect_pattern_recog (bb_vinfo);
2791
a70d6342
IR
2792 /* Check the SLP opportunities in the basic block, analyze and build SLP
2793 trees. */
310213d4 2794 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2795 {
73fbfcad 2796 if (dump_enabled_p ())
effb52da
RB
2797 {
2798 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2799 "Failed to SLP the basic block.\n");
2800 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2801 "not vectorized: failed to find SLP opportunities "
2802 "in basic block.\n");
2803 }
a70d6342
IR
2804
2805 destroy_bb_vec_info (bb_vinfo);
2806 return NULL;
2807 }
b8698a0f 2808
c2a12ca0
RB
2809 /* Analyze and verify the alignment of data references and the
2810 dependence in the SLP instances. */
a5b50aa1
RB
2811 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2812 {
c2a12ca0
RB
2813 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2814 || ! vect_slp_analyze_instance_dependence (instance))
a5b50aa1
RB
2815 {
2816 dump_printf_loc (MSG_NOTE, vect_location,
2817 "removing SLP instance operations starting from: ");
2818 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2819 SLP_TREE_SCALAR_STMTS
2820 (SLP_INSTANCE_TREE (instance))[0], 0);
2821 vect_free_slp_instance (instance);
2822 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2823 continue;
2824 }
c2a12ca0
RB
2825
2826 /* Mark all the statements that we want to vectorize as pure SLP and
2827 relevant. */
2828 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2829 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
2830
a5b50aa1
RB
2831 i++;
2832 }
a5b50aa1
RB
2833 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2834 {
2835 destroy_bb_vec_info (bb_vinfo);
2836 return NULL;
2837 }
2838
1a4b99c1
RB
2839 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2840 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
a70d6342 2841 {
73fbfcad 2842 if (dump_enabled_p ())
e645e942 2843 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2844 "not vectorized: bad operation in basic block.\n");
a70d6342
IR
2845
2846 destroy_bb_vec_info (bb_vinfo);
2847 return NULL;
2848 }
2849
69f11a13 2850 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2851 if (!unlimited_cost_model (NULL)
69f11a13
IR
2852 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2853 {
73fbfcad 2854 if (dump_enabled_p ())
78c60e3d
SS
2855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2856 "not vectorized: vectorization is not "
2857 "profitable.\n");
69f11a13
IR
2858
2859 destroy_bb_vec_info (bb_vinfo);
2860 return NULL;
2861 }
2862
73fbfcad 2863 if (dump_enabled_p ())
78c60e3d
SS
2864 dump_printf_loc (MSG_NOTE, vect_location,
2865 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2866
2867 return bb_vinfo;
2868}
2869
2870
428db0ba
RB
2871/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2872 true if anything in the basic-block was vectorized. */
2873
2874bool
2875vect_slp_bb (basic_block bb)
8e19f5a1
IR
2876{
2877 bb_vec_info bb_vinfo;
8e19f5a1
IR
2878 gimple_stmt_iterator gsi;
2879 unsigned int vector_sizes;
61d371eb 2880 bool any_vectorized = false;
8e19f5a1 2881
73fbfcad 2882 if (dump_enabled_p ())
78c60e3d 2883 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1 2884
8e19f5a1
IR
2885 /* Autodetect first vector size we try. */
2886 current_vector_size = 0;
2887 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2888
61d371eb
RB
2889 gsi = gsi_start_bb (bb);
2890
8e19f5a1
IR
2891 while (1)
2892 {
61d371eb
RB
2893 if (gsi_end_p (gsi))
2894 break;
2895
2896 gimple_stmt_iterator region_begin = gsi;
2897 vec<data_reference_p> datarefs = vNULL;
2898 int insns = 0;
2899
2900 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 2901 {
61d371eb
RB
2902 gimple *stmt = gsi_stmt (gsi);
2903 if (is_gimple_debug (stmt))
2904 continue;
2905 insns++;
2906
2907 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2908 vect_location = gimple_location (stmt);
2909
2910 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2911 break;
2912 }
2913
2914 /* Skip leading unhandled stmts. */
2915 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2916 {
2917 gsi_next (&gsi);
2918 continue;
2919 }
428db0ba 2920
61d371eb
RB
2921 gimple_stmt_iterator region_end = gsi;
2922
2923 bool vectorized = false;
a5b50aa1 2924 bool fatal = false;
61d371eb 2925 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
a5b50aa1 2926 datarefs, insns, fatal);
61d371eb
RB
2927 if (bb_vinfo
2928 && dbg_cnt (vect_slp))
2929 {
428db0ba 2930 if (dump_enabled_p ())
61d371eb 2931 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba
RB
2932
2933 vect_schedule_slp (bb_vinfo);
2934
2935 if (dump_enabled_p ())
2936 dump_printf_loc (MSG_NOTE, vect_location,
61d371eb 2937 "basic block part vectorized\n");
428db0ba
RB
2938
2939 destroy_bb_vec_info (bb_vinfo);
2940
61d371eb 2941 vectorized = true;
428db0ba 2942 }
61d371eb
RB
2943 else
2944 destroy_bb_vec_info (bb_vinfo);
8e19f5a1 2945
61d371eb 2946 any_vectorized |= vectorized;
8e19f5a1
IR
2947
2948 vector_sizes &= ~current_vector_size;
61d371eb
RB
2949 if (vectorized
2950 || vector_sizes == 0
a5b50aa1
RB
2951 || current_vector_size == 0
2952 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2953 vector sizes will fail do not bother iterating. */
2954 || fatal)
61d371eb
RB
2955 {
2956 if (gsi_end_p (region_end))
2957 break;
8e19f5a1 2958
61d371eb
RB
2959 /* Skip the unhandled stmt. */
2960 gsi_next (&gsi);
2961
2962 /* And reset vector sizes. */
2963 current_vector_size = 0;
2964 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2965 }
2966 else
2967 {
2968 /* Try the next biggest vector size. */
2969 current_vector_size = 1 << floor_log2 (vector_sizes);
2970 if (dump_enabled_p ())
2971 dump_printf_loc (MSG_NOTE, vect_location,
2972 "***** Re-trying analysis with "
2973 "vector size %d\n", current_vector_size);
2974
2975 /* Start over. */
2976 gsi = region_begin;
2977 }
8e19f5a1 2978 }
61d371eb
RB
2979
2980 return any_vectorized;
8e19f5a1
IR
2981}
2982
2983
e4af0bc4
IE
2984/* Return 1 if vector type of boolean constant which is OPNUM
2985 operand in statement STMT is a boolean vector. */
2986
2987static bool
2988vect_mask_constant_operand_p (gimple *stmt, int opnum)
2989{
2990 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
2991 enum tree_code code = gimple_expr_code (stmt);
2992 tree op, vectype;
2993 gimple *def_stmt;
2994 enum vect_def_type dt;
2995
2996 /* For comparison and COND_EXPR type is chosen depending
2997 on the other comparison operand. */
2998 if (TREE_CODE_CLASS (code) == tcc_comparison)
2999 {
3000 if (opnum)
3001 op = gimple_assign_rhs1 (stmt);
3002 else
3003 op = gimple_assign_rhs2 (stmt);
3004
3005 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3006 &dt, &vectype))
3007 gcc_unreachable ();
3008
3009 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3010 }
3011
3012 if (code == COND_EXPR)
3013 {
3014 tree cond = gimple_assign_rhs1 (stmt);
3015
3016 if (TREE_CODE (cond) == SSA_NAME)
7b1b0cc1
RB
3017 op = cond;
3018 else if (opnum)
e4af0bc4
IE
3019 op = TREE_OPERAND (cond, 1);
3020 else
3021 op = TREE_OPERAND (cond, 0);
3022
3023 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt,
3024 &dt, &vectype))
3025 gcc_unreachable ();
3026
3027 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3028 }
3029
3030 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3031}
3032
3033
b8698a0f
L
3034/* For constant and loop invariant defs of SLP_NODE this function returns
3035 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
3036 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
3037 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
3038 REDUC_INDEX is the index of the reduction operand in the statements, unless
3039 it is -1. */
ebfd146a
IR
3040
3041static void
9dc3f7de 3042vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 3043 vec<tree> *vec_oprnds,
306b0c92 3044 unsigned int op_num, unsigned int number_of_vectors)
ebfd146a 3045{
355fe088
TS
3046 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3047 gimple *stmt = stmts[0];
ebfd146a 3048 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 3049 unsigned nunits;
ebfd146a 3050 tree vec_cst;
d2a12ae7
RG
3051 tree *elts;
3052 unsigned j, number_of_places_left_in_vector;
ebfd146a 3053 tree vector_type;
9dc3f7de 3054 tree vop;
9771b263 3055 int group_size = stmts.length ();
ebfd146a 3056 unsigned int vec_num, i;
d2a12ae7 3057 unsigned number_of_copies = 1;
9771b263
DN
3058 vec<tree> voprnds;
3059 voprnds.create (number_of_vectors);
ebfd146a 3060 bool constant_p, is_store;
b5aeb3bb 3061 tree neutral_op = NULL;
bac430c9 3062 enum tree_code code = gimple_expr_code (stmt);
13396b6e 3063 gimple_seq ctor_seq = NULL;
b5aeb3bb 3064
42fd8198 3065 /* Check if vector type is a boolean vector. */
2568d8a1 3066 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
e4af0bc4 3067 && vect_mask_constant_operand_p (stmt, op_num))
42fd8198
IE
3068 vector_type
3069 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3070 else
3071 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325
RB
3072 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3073
ebfd146a
IR
3074 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3075 {
3076 is_store = true;
3077 op = gimple_assign_rhs1 (stmt);
3078 }
3079 else
9dc3f7de
IR
3080 is_store = false;
3081
3082 gcc_assert (op);
ebfd146a 3083
ebfd146a 3084 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 3085 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
3086
3087 For example, we have two scalar operands, s1 and s2 (e.g., group of
3088 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
3089 of this type can be packed in a vector). The output vector will contain
3090 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
3091 will be 2).
3092
b8698a0f 3093 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
3094 containing the operands.
3095
3096 For example, NUNITS is four as before, and the group size is 8
f7e531cf 3097 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 3098 {s5, s6, s7, s8}. */
b8698a0f 3099
14a61437 3100 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
3101
3102 number_of_places_left_in_vector = nunits;
62cf7335 3103 constant_p = true;
d2a12ae7 3104 elts = XALLOCAVEC (tree, nunits);
90dd6e3d 3105 bool place_after_defs = false;
ebfd146a
IR
3106 for (j = 0; j < number_of_copies; j++)
3107 {
9771b263 3108 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
3109 {
3110 if (is_store)
3111 op = gimple_assign_rhs1 (stmt);
bac430c9 3112 else
f7e531cf 3113 {
bac430c9 3114 switch (code)
f7e531cf 3115 {
bac430c9 3116 case COND_EXPR:
a989bcc3
IE
3117 {
3118 tree cond = gimple_assign_rhs1 (stmt);
3119 if (TREE_CODE (cond) == SSA_NAME)
3120 op = gimple_op (stmt, op_num + 1);
3121 else if (op_num == 0 || op_num == 1)
bac430c9 3122 op = TREE_OPERAND (cond, op_num);
a989bcc3
IE
3123 else
3124 {
3125 if (op_num == 2)
3126 op = gimple_assign_rhs2 (stmt);
3127 else
3128 op = gimple_assign_rhs3 (stmt);
3129 }
3130 }
bac430c9
IR
3131 break;
3132
3133 case CALL_EXPR:
3134 op = gimple_call_arg (stmt, op_num);
3135 break;
3136
b84b294a
JJ
3137 case LSHIFT_EXPR:
3138 case RSHIFT_EXPR:
3139 case LROTATE_EXPR:
3140 case RROTATE_EXPR:
3141 op = gimple_op (stmt, op_num + 1);
3142 /* Unlike the other binary operators, shifts/rotates have
3143 the shift count being int, instead of the same type as
3144 the lhs, so make sure the scalar is the right type if
3145 we are dealing with vectors of
3146 long long/long/short/char. */
793d9a16 3147 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
3148 op = fold_convert (TREE_TYPE (vector_type), op);
3149 break;
3150
bac430c9
IR
3151 default:
3152 op = gimple_op (stmt, op_num + 1);
b84b294a 3153 break;
f7e531cf
IR
3154 }
3155 }
b8698a0f 3156
ebfd146a 3157 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 3158 number_of_places_left_in_vector--;
90dd6e3d 3159 tree orig_op = op;
13396b6e 3160 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 3161 {
793d9a16 3162 if (CONSTANT_CLASS_P (op))
13396b6e 3163 {
42fd8198
IE
3164 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3165 {
3166 /* Can't use VIEW_CONVERT_EXPR for booleans because
3167 of possibly different sizes of scalar value and
3168 vector element. */
3169 if (integer_zerop (op))
3170 op = build_int_cst (TREE_TYPE (vector_type), 0);
3171 else if (integer_onep (op))
158beb4a 3172 op = build_all_ones_cst (TREE_TYPE (vector_type));
42fd8198
IE
3173 else
3174 gcc_unreachable ();
3175 }
3176 else
3177 op = fold_unary (VIEW_CONVERT_EXPR,
3178 TREE_TYPE (vector_type), op);
13396b6e
JJ
3179 gcc_assert (op && CONSTANT_CLASS_P (op));
3180 }
3181 else
3182 {
b731b390 3183 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 3184 gimple *init_stmt;
262a363f
JJ
3185 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
3186 {
158beb4a
JJ
3187 tree true_val
3188 = build_all_ones_cst (TREE_TYPE (vector_type));
3189 tree false_val
3190 = build_zero_cst (TREE_TYPE (vector_type));
7c285ab9 3191 gcc_assert (INTEGRAL_TYPE_P (TREE_TYPE (op)));
158beb4a
JJ
3192 init_stmt = gimple_build_assign (new_temp, COND_EXPR,
3193 op, true_val,
3194 false_val);
262a363f 3195 }
262a363f
JJ
3196 else
3197 {
3198 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type),
3199 op);
3200 init_stmt
3201 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR,
3202 op);
3203 }
13396b6e
JJ
3204 gimple_seq_add_stmt (&ctor_seq, init_stmt);
3205 op = new_temp;
3206 }
50eeef09 3207 }
d2a12ae7 3208 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
3209 if (!CONSTANT_CLASS_P (op))
3210 constant_p = false;
90dd6e3d
RB
3211 if (TREE_CODE (orig_op) == SSA_NAME
3212 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
3213 && STMT_VINFO_BB_VINFO (stmt_vinfo)
3214 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
3215 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3216 place_after_defs = true;
ebfd146a
IR
3217
3218 if (number_of_places_left_in_vector == 0)
3219 {
ebfd146a 3220 if (constant_p)
d2a12ae7 3221 vec_cst = build_vector (vector_type, elts);
ebfd146a 3222 else
d2a12ae7 3223 {
9771b263 3224 vec<constructor_elt, va_gc> *v;
d2a12ae7 3225 unsigned k;
9771b263 3226 vec_alloc (v, nunits);
d2a12ae7
RG
3227 for (k = 0; k < nunits; ++k)
3228 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
3229 vec_cst = build_constructor (vector_type, v);
3230 }
90dd6e3d
RB
3231 tree init;
3232 gimple_stmt_iterator gsi;
3233 if (place_after_defs)
3234 {
3235 gsi = gsi_for_stmt
3236 (vect_find_last_scalar_stmt_in_slp (slp_node));
3237 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
3238 }
3239 else
3240 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
3241 if (ctor_seq != NULL)
3242 {
90dd6e3d 3243 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
13396b6e
JJ
3244 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
3245 GSI_SAME_STMT);
3246 ctor_seq = NULL;
3247 }
90dd6e3d
RB
3248 voprnds.quick_push (init);
3249 place_after_defs = false;
62cf7335
RB
3250 number_of_places_left_in_vector = nunits;
3251 constant_p = true;
ebfd146a
IR
3252 }
3253 }
3254 }
3255
b8698a0f 3256 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 3257 them. */
9771b263 3258 vec_num = voprnds.length ();
d2a12ae7 3259 for (j = vec_num; j != 0; j--)
ebfd146a 3260 {
9771b263
DN
3261 vop = voprnds[j - 1];
3262 vec_oprnds->quick_push (vop);
ebfd146a
IR
3263 }
3264
9771b263 3265 voprnds.release ();
ebfd146a
IR
3266
3267 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
3268 group of stmts, NUMBER_OF_VECTORS to be created is greater than
3269 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 3270 to replicate the vectors. */
9771b263 3271 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 3272 {
b5aeb3bb
IR
3273 tree neutral_vec = NULL;
3274
3275 if (neutral_op)
3276 {
3277 if (!neutral_vec)
b9acc9f1 3278 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 3279
9771b263 3280 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
3281 }
3282 else
3283 {
9771b263
DN
3284 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
3285 vec_oprnds->quick_push (vop);
b5aeb3bb 3286 }
ebfd146a
IR
3287 }
3288}
3289
3290
3291/* Get vectorized definitions from SLP_NODE that contains corresponding
3292 vectorized def-stmts. */
3293
3294static void
9771b263 3295vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
3296{
3297 tree vec_oprnd;
355fe088 3298 gimple *vec_def_stmt;
ebfd146a
IR
3299 unsigned int i;
3300
9771b263 3301 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 3302
9771b263 3303 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
3304 {
3305 gcc_assert (vec_def_stmt);
e7baeb39
RB
3306 if (gimple_code (vec_def_stmt) == GIMPLE_PHI)
3307 vec_oprnd = gimple_phi_result (vec_def_stmt);
3308 else
3309 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 3310 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
3311 }
3312}
3313
3314
b8698a0f
L
3315/* Get vectorized definitions for SLP_NODE.
3316 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
3317 call vect_get_constant_vectors() to create vector stmts.
3318 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
3319 must be stored in the corresponding child of SLP_NODE, and we call
3320 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 3321
ebfd146a 3322void
9771b263 3323vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
306b0c92 3324 vec<vec<tree> > *vec_oprnds)
ebfd146a 3325{
355fe088 3326 gimple *first_stmt;
d092494c 3327 int number_of_vects = 0, i;
b8698a0f 3328 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 3329 slp_tree child = NULL;
37b5ec8f 3330 vec<tree> vec_defs;
e44978dc 3331 tree oprnd;
038000bd 3332 bool first_iteration = true;
ebfd146a 3333
9771b263
DN
3334 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3335 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 3336 {
038000bd
AH
3337 bool vectorized_defs = false;
3338
3339 if (oprnd == NULL)
3340 {
3341 vec_defs = vNULL;
3342 vec_defs.create (0);
3343 vec_oprnds->quick_push (vec_defs);
3344 continue;
3345 }
3346
d092494c
IR
3347 /* For each operand we check if it has vectorized definitions in a child
3348 node or we need to create them (for invariants and constants). We
3349 check if the LHS of the first stmt of the next child matches OPRND.
3350 If it does, we found the correct child. Otherwise, we call
038000bd
AH
3351 vect_get_constant_vectors (). */
3352 for (unsigned int child_index = 0;
3353 child_index < SLP_TREE_CHILDREN (slp_node).length (); child_index++)
ebfd146a 3354 {
01d8bf07 3355 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 3356
e44978dc 3357 /* We have to check both pattern and original def, if available. */
603cca93 3358 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
e44978dc 3359 {
355fe088
TS
3360 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
3361 gimple *related
90dd6e3d 3362 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
e7baeb39 3363 tree first_def_op;
90dd6e3d 3364
e7baeb39
RB
3365 if (gimple_code (first_def) == GIMPLE_PHI)
3366 first_def_op = gimple_phi_result (first_def);
3367 else
3368 first_def_op = gimple_get_lhs (first_def);
3369 if (operand_equal_p (oprnd, first_def_op, 0)
90dd6e3d
RB
3370 || (related
3371 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
3372 {
3373 /* The number of vector defs is determined by the number of
3374 vector statements in the node from which we get those
3375 statements. */
3376 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3377 vectorized_defs = true;
038000bd 3378 break;
90dd6e3d 3379 }
e44978dc 3380 }
d092494c 3381 }
ebfd146a 3382
038000bd
AH
3383 if (!vectorized_defs && first_iteration)
3384 {
3385 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3386 /* Number of vector stmts was calculated according to LHS in
3387 vect_schedule_slp_instance (), fix it by replacing LHS with
3388 RHS, if necessary. See vect_get_smallest_scalar_type () for
3389 details. */
3390 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3391 &rhs_size_unit);
3392 if (rhs_size_unit != lhs_size_unit)
3393 {
3394 number_of_vects *= rhs_size_unit;
3395 number_of_vects /= lhs_size_unit;
3396 }
d092494c 3397 }
b5aeb3bb 3398
d092494c 3399 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3400 vec_defs = vNULL;
3401 vec_defs.create (number_of_vects);
ebfd146a 3402
d092494c
IR
3403 /* For reduction defs we call vect_get_constant_vectors (), since we are
3404 looking for initial loop invariant values. */
306b0c92 3405 if (vectorized_defs)
d092494c 3406 /* The defs are already vectorized. */
37b5ec8f 3407 vect_get_slp_vect_defs (child, &vec_defs);
d092494c 3408 else
e7baeb39 3409 /* Build vectors from scalar defs. */
37b5ec8f 3410 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
306b0c92 3411 number_of_vects);
ebfd146a 3412
37b5ec8f 3413 vec_oprnds->quick_push (vec_defs);
ebfd146a 3414
038000bd 3415 first_iteration = false;
d092494c 3416 }
ebfd146a
IR
3417}
3418
ebfd146a
IR
3419/* Generate vector permute statements from a list of loads in DR_CHAIN.
3420 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3421 permute statements for the SLP node NODE of the SLP instance
3422 SLP_NODE_INSTANCE. */
3423
ebfd146a 3424bool
01d8bf07 3425vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
ebfd146a 3426 gimple_stmt_iterator *gsi, int vf,
29afecdf
RB
3427 slp_instance slp_node_instance, bool analyze_only,
3428 unsigned *n_perms)
ebfd146a 3429{
355fe088 3430 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3431 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3432 tree mask_element_type = NULL_TREE, mask_type;
2ce27200 3433 int nunits, vec_index = 0;
2635892a 3434 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a 3435 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
61fdfd8c 3436 int mask_element;
22e4dee7 3437 unsigned char *mask;
ef4bddc2 3438 machine_mode mode;
ebfd146a 3439
91ff1504
RB
3440 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3441 return false;
3442
3443 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3444
22e4dee7
RH
3445 mode = TYPE_MODE (vectype);
3446
2635892a
RH
3447 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3448 same size as the vector element being permuted. */
96f9265a
RG
3449 mask_element_type = lang_hooks.types.type_for_mode
3450 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
ebfd146a 3451 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 3452 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 3453 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a 3454
61fdfd8c
RB
3455 /* Initialize the vect stmts of NODE to properly insert the generated
3456 stmts later. */
3457 if (! analyze_only)
3458 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3459 i < SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
3460 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a 3461
b8698a0f
L
3462 /* Generate permutation masks for every NODE. Number of masks for each NODE
3463 is equal to GROUP_SIZE.
3464 E.g., we have a group of three nodes with three loads from the same
3465 location in each node, and the vector size is 4. I.e., we have a
3466 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3467 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3468 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3469 ...
3470
2635892a 3471 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3472 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3473 operation, and the mask element values can't be outside that range.
3474 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3475 For the first two permutations we need the first and the second input
ebfd146a 3476 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3477 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3478 {c2,a3,b3,c3}. */
3479
2ce27200
RB
3480 int vect_stmts_counter = 0;
3481 int index = 0;
3482 int first_vec_index = -1;
3483 int second_vec_index = -1;
be377c80 3484 bool noop_p = true;
29afecdf 3485 *n_perms = 0;
ebfd146a 3486
61fdfd8c 3487 for (int j = 0; j < vf; j++)
2ce27200
RB
3488 {
3489 for (int k = 0; k < group_size; k++)
3490 {
3491 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k]
3492 + j * STMT_VINFO_GROUP_SIZE (stmt_info));
3493 vec_index = i / nunits;
3494 mask_element = i % nunits;
3495 if (vec_index == first_vec_index
3496 || first_vec_index == -1)
3497 {
3498 first_vec_index = vec_index;
3499 }
3500 else if (vec_index == second_vec_index
3501 || second_vec_index == -1)
3502 {
3503 second_vec_index = vec_index;
3504 mask_element += nunits;
3505 }
3506 else
3507 {
3508 if (dump_enabled_p ())
3509 {
3510 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3511 "permutation requires at "
3512 "least three vectors ");
3513 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
3514 stmt, 0);
2ce27200
RB
3515 }
3516 return false;
3517 }
ebfd146a 3518
2ce27200
RB
3519 gcc_assert (mask_element >= 0
3520 && mask_element < 2 * nunits);
be377c80
RB
3521 if (mask_element != index)
3522 noop_p = false;
2ce27200
RB
3523 mask[index++] = mask_element;
3524
3525 if (index == nunits)
3526 {
be377c80
RB
3527 if (! noop_p
3528 && ! can_vec_perm_p (mode, false, mask))
2ce27200
RB
3529 {
3530 if (dump_enabled_p ())
22e4dee7 3531 {
2ce27200
RB
3532 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3533 vect_location,
3534 "unsupported vect permute { ");
3535 for (i = 0; i < nunits; ++i)
3536 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3537 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7 3538 }
2ce27200
RB
3539 return false;
3540 }
22e4dee7 3541
29afecdf
RB
3542 if (! noop_p)
3543 ++*n_perms;
3544
2ce27200
RB
3545 if (!analyze_only)
3546 {
be377c80
RB
3547 tree mask_vec = NULL_TREE;
3548
3549 if (! noop_p)
3550 {
3551 tree *mask_elts = XALLOCAVEC (tree, nunits);
3552 for (int l = 0; l < nunits; ++l)
3553 mask_elts[l] = build_int_cst (mask_element_type,
3554 mask[l]);
3555 mask_vec = build_vector (mask_type, mask_elts);
3556 }
2ce27200
RB
3557
3558 if (second_vec_index == -1)
3559 second_vec_index = first_vec_index;
61fdfd8c
RB
3560
3561 /* Generate the permute statement if necessary. */
3562 tree first_vec = dr_chain[first_vec_index];
3563 tree second_vec = dr_chain[second_vec_index];
3564 gimple *perm_stmt;
3565 if (! noop_p)
3566 {
3567 tree perm_dest
3568 = vect_create_destination_var (gimple_assign_lhs (stmt),
3569 vectype);
3570 perm_dest = make_ssa_name (perm_dest);
3571 perm_stmt = gimple_build_assign (perm_dest,
3572 VEC_PERM_EXPR,
3573 first_vec, second_vec,
3574 mask_vec);
3575 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
3576 }
3577 else
3578 /* If mask was NULL_TREE generate the requested
3579 identity transform. */
3580 perm_stmt = SSA_NAME_DEF_STMT (first_vec);
3581
3582 /* Store the vector statement in NODE. */
3583 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt;
2ce27200 3584 }
ebfd146a 3585
2ce27200
RB
3586 index = 0;
3587 first_vec_index = -1;
3588 second_vec_index = -1;
be377c80 3589 noop_p = true;
2ce27200
RB
3590 }
3591 }
b8698a0f 3592 }
ebfd146a 3593
ebfd146a
IR
3594 return true;
3595}
3596
3597
3598
3599/* Vectorize SLP instance tree in postorder. */
3600
3601static bool
3602vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 3603 unsigned int vectorization_factor)
ebfd146a 3604{
355fe088 3605 gimple *stmt;
0d0293ac 3606 bool grouped_store, is_store;
ebfd146a
IR
3607 gimple_stmt_iterator si;
3608 stmt_vec_info stmt_info;
3609 unsigned int vec_stmts_size, nunits, group_size;
3610 tree vectype;
603cca93 3611 int i, j;
d755c7ef 3612 slp_tree child;
ebfd146a 3613
603cca93 3614 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
ebfd146a
IR
3615 return false;
3616
9771b263 3617 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3618 vect_schedule_slp_instance (child, instance, vectorization_factor);
b8698a0f 3619
603cca93
RB
3620 /* Push SLP node def-type to stmts. */
3621 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3622 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3623 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3624 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
3625
9771b263 3626 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3627 stmt_info = vinfo_for_stmt (stmt);
3628
3629 /* VECTYPE is the type of the destination. */
b690cc0f 3630 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3631 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3632 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3633
3634 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 3635 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
3636 elements in one vector iteration is the number of scalar elements in
3637 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
14a61437
RB
3638 size.
3639 Unless this is a SLP reduction in which case the number of vector
3640 stmts is equal to the number of vector stmts of the children. */
3641 if (GROUP_FIRST_ELEMENT (stmt_info)
3642 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3643 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3644 else
3645 vec_stmts_size = (vectorization_factor * group_size) / nunits;
ebfd146a 3646
9771b263 3647 if (!SLP_TREE_VEC_STMTS (node).exists ())
ebfd146a 3648 {
9771b263 3649 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
ebfd146a
IR
3650 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3651 }
3652
73fbfcad 3653 if (dump_enabled_p ())
ebfd146a 3654 {
78c60e3d
SS
3655 dump_printf_loc (MSG_NOTE,vect_location,
3656 "------>vectorizing SLP node starting from: ");
3657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
b8698a0f 3658 }
ebfd146a 3659
2e8ab70c
RB
3660 /* Vectorized stmts go before the last scalar stmt which is where
3661 all uses are ready. */
3662 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3663
b010117a
IR
3664 /* Mark the first element of the reduction chain as reduction to properly
3665 transform the node. In the analysis phase only the last element of the
3666 chain is marked as reduction. */
0d0293ac 3667 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
3668 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3669 {
3670 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3671 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3672 }
3673
6876e5bc
RB
3674 /* Handle two-operation SLP nodes by vectorizing the group with
3675 both operations and then performing a merge. */
3676 if (SLP_TREE_TWO_OPERATORS (node))
3677 {
3678 enum tree_code code0 = gimple_assign_rhs_code (stmt);
567a3691 3679 enum tree_code ocode = ERROR_MARK;
355fe088 3680 gimple *ostmt;
6876e5bc 3681 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
6876e5bc
RB
3682 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3683 if (gimple_assign_rhs_code (ostmt) != code0)
3684 {
3685 mask[i] = 1;
6876e5bc
RB
3686 ocode = gimple_assign_rhs_code (ostmt);
3687 }
3688 else
3689 mask[i] = 0;
567a3691 3690 if (ocode != ERROR_MARK)
6876e5bc 3691 {
355fe088
TS
3692 vec<gimple *> v0;
3693 vec<gimple *> v1;
6876e5bc
RB
3694 unsigned j;
3695 tree tmask = NULL_TREE;
3696 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3697 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3698 SLP_TREE_VEC_STMTS (node).truncate (0);
3699 gimple_assign_set_rhs_code (stmt, ocode);
3700 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3701 gimple_assign_set_rhs_code (stmt, code0);
3702 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3703 SLP_TREE_VEC_STMTS (node).truncate (0);
3704 tree meltype = build_nonstandard_integer_type
3705 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3706 tree mvectype = get_same_sized_vectype (meltype, vectype);
3707 unsigned k = 0, l;
3708 for (j = 0; j < v0.length (); ++j)
3709 {
3710 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3711 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3712 {
1ece8d4c 3713 if (k >= group_size)
6876e5bc
RB
3714 k = 0;
3715 melts[l] = build_int_cst
3716 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3717 }
3718 tmask = build_vector (mvectype, melts);
3719
3720 /* ??? Not all targets support a VEC_PERM_EXPR with a
3721 constant mask that would translate to a vec_merge RTX
3722 (with their vec_perm_const_ok). We can either not
3723 vectorize in that case or let veclower do its job.
3724 Unfortunately that isn't too great and at least for
3725 plus/minus we'd eventually like to match targets
3726 vector addsub instructions. */
355fe088 3727 gimple *vstmt;
6876e5bc
RB
3728 vstmt = gimple_build_assign (make_ssa_name (vectype),
3729 VEC_PERM_EXPR,
3730 gimple_assign_lhs (v0[j]),
3731 gimple_assign_lhs (v1[j]), tmask);
3732 vect_finish_stmt_generation (stmt, vstmt, &si);
3733 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3734 }
3735 v0.release ();
3736 v1.release ();
3737 return false;
3738 }
3739 }
0d0293ac 3740 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
603cca93
RB
3741
3742 /* Restore stmt def-types. */
3743 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3744 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3745 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
3746 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
3747
b5aeb3bb 3748 return is_store;
ebfd146a
IR
3749}
3750
dd34c087
JJ
3751/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3752 For loop vectorization this is done in vectorizable_call, but for SLP
3753 it needs to be deferred until end of vect_schedule_slp, because multiple
3754 SLP instances may refer to the same scalar stmt. */
3755
3756static void
3757vect_remove_slp_scalar_calls (slp_tree node)
3758{
355fe088 3759 gimple *stmt, *new_stmt;
dd34c087
JJ
3760 gimple_stmt_iterator gsi;
3761 int i;
d755c7ef 3762 slp_tree child;
dd34c087
JJ
3763 tree lhs;
3764 stmt_vec_info stmt_info;
3765
603cca93 3766 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
dd34c087
JJ
3767 return;
3768
9771b263 3769 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3770 vect_remove_slp_scalar_calls (child);
dd34c087 3771
9771b263 3772 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
3773 {
3774 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3775 continue;
3776 stmt_info = vinfo_for_stmt (stmt);
3777 if (stmt_info == NULL
3778 || is_pattern_stmt_p (stmt_info)
3779 || !PURE_SLP_STMT (stmt_info))
3780 continue;
3781 lhs = gimple_call_lhs (stmt);
3782 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3783 set_vinfo_for_stmt (new_stmt, stmt_info);
3784 set_vinfo_for_stmt (stmt, NULL);
3785 STMT_VINFO_STMT (stmt_info) = new_stmt;
3786 gsi = gsi_for_stmt (stmt);
3787 gsi_replace (&gsi, new_stmt, false);
3788 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3789 }
3790}
ebfd146a 3791
ff802fa1
IR
3792/* Generate vector code for all SLP instances in the loop/basic block. */
3793
ebfd146a 3794bool
310213d4 3795vect_schedule_slp (vec_info *vinfo)
ebfd146a 3796{
9771b263 3797 vec<slp_instance> slp_instances;
ebfd146a 3798 slp_instance instance;
01d8bf07 3799 unsigned int i, vf;
ebfd146a
IR
3800 bool is_store = false;
3801
310213d4
RB
3802 slp_instances = vinfo->slp_instances;
3803 if (is_a <loop_vec_info> (vinfo))
3804 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
a70d6342 3805 else
310213d4 3806 vf = 1;
a70d6342 3807
9771b263 3808 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3809 {
3810 /* Schedule the tree of INSTANCE. */
3811 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 3812 instance, vf);
73fbfcad 3813 if (dump_enabled_p ())
78c60e3d 3814 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3815 "vectorizing stmts using SLP.\n");
ebfd146a
IR
3816 }
3817
9771b263 3818 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3819 {
3820 slp_tree root = SLP_INSTANCE_TREE (instance);
355fe088 3821 gimple *store;
b5aeb3bb
IR
3822 unsigned int j;
3823 gimple_stmt_iterator gsi;
3824
c40eced0
RB
3825 /* Remove scalar call stmts. Do not do this for basic-block
3826 vectorization as not all uses may be vectorized.
3827 ??? Why should this be necessary? DCE should be able to
3828 remove the stmts itself.
3829 ??? For BB vectorization we can as well remove scalar
3830 stmts starting from the SLP tree root if they have no
3831 uses. */
310213d4 3832 if (is_a <loop_vec_info> (vinfo))
c40eced0 3833 vect_remove_slp_scalar_calls (root);
dd34c087 3834
9771b263 3835 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
3836 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3837 {
3838 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3839 break;
3840
a024e70e
IR
3841 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3842 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
3843 /* Free the attached stmt_vec_info and remove the stmt. */
3844 gsi = gsi_for_stmt (store);
3d3f2249 3845 unlink_stmt_vdef (store);
b5aeb3bb 3846 gsi_remove (&gsi, true);
3d3f2249 3847 release_defs (store);
b5aeb3bb
IR
3848 free_stmt_vec_info (store);
3849 }
3850 }
3851
ebfd146a
IR
3852 return is_store;
3853}