]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
visium-protos.h (notice_update_cc): Delete.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
5624e564 2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
957060b5
AM
26#include "target.h"
27#include "rtl.h"
ebfd146a 28#include "tree.h"
c7131fb2 29#include "gimple.h"
957060b5 30#include "tree-pass.h"
c7131fb2 31#include "ssa.h"
957060b5
AM
32#include "optabs-tree.h"
33#include "insn-config.h"
34#include "recog.h" /* FIXME: for insn_data */
957060b5 35#include "params.h"
40e23961 36#include "fold-const.h"
d8a2d370 37#include "stor-layout.h"
5be5c238 38#include "gimple-iterator.h"
ebfd146a 39#include "cfgloop.h"
ebfd146a 40#include "tree-vectorizer.h"
2635892a 41#include "langhooks.h"
642fce57 42#include "gimple-walk.h"
428db0ba 43#include "dbgcnt.h"
a70d6342
IR
44
45
ebfd146a
IR
46/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
47
48static void
49vect_free_slp_tree (slp_tree node)
50{
d092494c 51 int i;
d755c7ef 52 slp_tree child;
d092494c 53
ebfd146a
IR
54 if (!node)
55 return;
56
9771b263 57 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 58 vect_free_slp_tree (child);
b8698a0f 59
9771b263
DN
60 SLP_TREE_CHILDREN (node).release ();
61 SLP_TREE_SCALAR_STMTS (node).release ();
62 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 63 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
64
65 free (node);
66}
67
68
69/* Free the memory allocated for the SLP instance. */
70
71void
72vect_free_slp_instance (slp_instance instance)
73{
74 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263 75 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 76 free (instance);
ebfd146a
IR
77}
78
79
d092494c
IR
80/* Create an SLP node for SCALAR_STMTS. */
81
82static slp_tree
355fe088 83vect_create_new_slp_node (vec<gimple *> scalar_stmts)
d092494c 84{
d3cfd39e 85 slp_tree node;
355fe088 86 gimple *stmt = scalar_stmts[0];
d092494c
IR
87 unsigned int nops;
88
89 if (is_gimple_call (stmt))
90 nops = gimple_call_num_args (stmt);
91 else if (is_gimple_assign (stmt))
f7e531cf
IR
92 {
93 nops = gimple_num_ops (stmt) - 1;
94 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
95 nops++;
96 }
d092494c
IR
97 else
98 return NULL;
99
d3cfd39e 100 node = XNEW (struct _slp_tree);
d092494c 101 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263
DN
102 SLP_TREE_VEC_STMTS (node).create (0);
103 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 104 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 105 SLP_TREE_TWO_OPERATORS (node) = false;
d092494c
IR
106
107 return node;
108}
109
110
ddf56386
RB
111/* This structure is used in creation of an SLP tree. Each instance
112 corresponds to the same operand in a group of scalar stmts in an SLP
113 node. */
114typedef struct _slp_oprnd_info
115{
116 /* Def-stmts for the operands. */
117 vec<gimple *> def_stmts;
118 /* Information about the first statement, its vector def-type, type, the
119 operand itself in case it's constant, and an indication if it's a pattern
120 stmt. */
121 enum vect_def_type first_dt;
122 tree first_op_type;
123 bool first_pattern;
124 bool second_pattern;
125} *slp_oprnd_info;
126
127
d092494c
IR
128/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
129 operand. */
9771b263 130static vec<slp_oprnd_info>
d092494c
IR
131vect_create_oprnd_info (int nops, int group_size)
132{
133 int i;
134 slp_oprnd_info oprnd_info;
9771b263 135 vec<slp_oprnd_info> oprnds_info;
d092494c 136
9771b263 137 oprnds_info.create (nops);
d092494c
IR
138 for (i = 0; i < nops; i++)
139 {
140 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 141 oprnd_info->def_stmts.create (group_size);
d092494c 142 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 143 oprnd_info->first_op_type = NULL_TREE;
d092494c 144 oprnd_info->first_pattern = false;
effb52da 145 oprnd_info->second_pattern = false;
9771b263 146 oprnds_info.quick_push (oprnd_info);
d092494c
IR
147 }
148
149 return oprnds_info;
150}
151
152
d3cfd39e
JJ
153/* Free operands info. */
154
d092494c 155static void
9771b263 156vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
157{
158 int i;
159 slp_oprnd_info oprnd_info;
160
9771b263 161 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 162 {
9771b263 163 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
164 XDELETE (oprnd_info);
165 }
d092494c 166
9771b263 167 oprnds_info.release ();
d092494c
IR
168}
169
170
d755c7ef
RB
171/* Find the place of the data-ref in STMT in the interleaving chain that starts
172 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
173
174static int
355fe088 175vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt)
d755c7ef 176{
355fe088 177 gimple *next_stmt = first_stmt;
d755c7ef
RB
178 int result = 0;
179
180 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
181 return -1;
182
183 do
184 {
185 if (next_stmt == stmt)
186 return result;
d755c7ef 187 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699
RB
188 if (next_stmt)
189 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
190 }
191 while (next_stmt);
192
193 return -1;
194}
195
196
d092494c
IR
197/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
198 they are of a valid type and that they match the defs of the first stmt of
b0b4483e
RB
199 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
200 return -1, if the error could be corrected by swapping operands of the
201 operation return 1, if everything is ok return 0. */
ebfd146a 202
b0b4483e 203static int
310213d4 204vect_get_and_check_slp_defs (vec_info *vinfo,
355fe088 205 gimple *stmt, unsigned stmt_num,
23847df4 206 vec<slp_oprnd_info> *oprnds_info)
ebfd146a
IR
207{
208 tree oprnd;
209 unsigned int i, number_of_oprnds;
355fe088 210 gimple *def_stmt;
d092494c 211 enum vect_def_type dt = vect_uninitialized_def;
d092494c 212 bool pattern = false;
abf9bfbc 213 slp_oprnd_info oprnd_info;
b0b4483e
RB
214 int first_op_idx = 1;
215 bool commutative = false;
216 bool first_op_cond = false;
effb52da
RB
217 bool first = stmt_num == 0;
218 bool second = stmt_num == 1;
b8698a0f 219
d092494c 220 if (is_gimple_call (stmt))
190c2236
JJ
221 {
222 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 223 first_op_idx = 3;
190c2236 224 }
f7e531cf
IR
225 else if (is_gimple_assign (stmt))
226 {
b0b4483e 227 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf 228 number_of_oprnds = gimple_num_ops (stmt) - 1;
a414c77f
IE
229 if (gimple_assign_rhs_code (stmt) == COND_EXPR
230 && COMPARISON_CLASS_P (gimple_assign_rhs1 (stmt)))
b0b4483e
RB
231 {
232 first_op_cond = true;
233 commutative = true;
234 number_of_oprnds++;
235 }
236 else
237 commutative = commutative_tree_code (code);
f7e531cf 238 }
d092494c 239 else
b0b4483e 240 return -1;
ebfd146a 241
b0b4483e 242 bool swapped = false;
ebfd146a
IR
243 for (i = 0; i < number_of_oprnds; i++)
244 {
b0b4483e
RB
245again:
246 if (first_op_cond)
f7e531cf 247 {
b0b4483e
RB
248 if (i == 0 || i == 1)
249 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
250 swapped ? !i : i);
251 else
252 oprnd = gimple_op (stmt, first_op_idx + i - 1);
f7e531cf
IR
253 }
254 else
b0b4483e 255 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 256
9771b263 257 oprnd_info = (*oprnds_info)[i];
ebfd146a 258
81c40241 259 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt))
ebfd146a 260 {
73fbfcad 261 if (dump_enabled_p ())
ebfd146a 262 {
78c60e3d 263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 264 "Build SLP failed: can't analyze def for ");
78c60e3d 265 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 266 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
267 }
268
b0b4483e 269 return -1;
ebfd146a
IR
270 }
271
a70d6342 272 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 273 from the pattern. Check that all the stmts of the node are in the
ebfd146a 274 pattern. */
f5709183 275 if (def_stmt && gimple_bb (def_stmt)
61d371eb 276 && vect_stmt_in_region_p (vinfo, def_stmt)
ebfd146a 277 && vinfo_for_stmt (def_stmt)
83197f37 278 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
279 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
280 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 281 {
d092494c 282 pattern = true;
effb52da
RB
283 if (!first && !oprnd_info->first_pattern
284 /* Allow different pattern state for the defs of the
285 first stmt in reduction chains. */
286 && (oprnd_info->first_dt != vect_reduction_def
287 || (!second && !oprnd_info->second_pattern)))
d092494c 288 {
b0b4483e
RB
289 if (i == 0
290 && !swapped
291 && commutative)
292 {
293 swapped = true;
294 goto again;
295 }
296
73fbfcad 297 if (dump_enabled_p ())
d092494c 298 {
78c60e3d
SS
299 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
300 "Build SLP failed: some of the stmts"
301 " are in a pattern, and others are not ");
302 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 303 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 304 }
ebfd146a 305
b0b4483e 306 return 1;
ebfd146a
IR
307 }
308
309 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 310 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 311
f7e531cf 312 if (dt == vect_unknown_def_type)
ebfd146a 313 {
73fbfcad 314 if (dump_enabled_p ())
78c60e3d 315 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 316 "Unsupported pattern.\n");
b0b4483e 317 return -1;
ebfd146a
IR
318 }
319
320 switch (gimple_code (def_stmt))
321 {
81c40241
RB
322 case GIMPLE_PHI:
323 case GIMPLE_ASSIGN:
324 break;
325
326 default:
327 if (dump_enabled_p ())
328 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
329 "unsupported defining stmt:\n");
330 return -1;
ebfd146a
IR
331 }
332 }
333
effb52da
RB
334 if (second)
335 oprnd_info->second_pattern = pattern;
336
d092494c 337 if (first)
ebfd146a 338 {
d092494c
IR
339 oprnd_info->first_dt = dt;
340 oprnd_info->first_pattern = pattern;
793d9a16 341 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 342 }
ebfd146a
IR
343 else
344 {
d092494c
IR
345 /* Not first stmt of the group, check that the def-stmt/s match
346 the def-stmt/s of the first stmt. Allow different definition
347 types for reduction chains: the first stmt must be a
348 vect_reduction_def (a phi node), and the rest
349 vect_internal_def. */
350 if (((oprnd_info->first_dt != dt
351 && !(oprnd_info->first_dt == vect_reduction_def
793d9a16
RB
352 && dt == vect_internal_def)
353 && !((oprnd_info->first_dt == vect_external_def
354 || oprnd_info->first_dt == vect_constant_def)
355 && (dt == vect_external_def
356 || dt == vect_constant_def)))
357 || !types_compatible_p (oprnd_info->first_op_type,
358 TREE_TYPE (oprnd))))
ebfd146a 359 {
b0b4483e
RB
360 /* Try swapping operands if we got a mismatch. */
361 if (i == 0
362 && !swapped
363 && commutative)
364 {
365 swapped = true;
366 goto again;
367 }
368
abf9bfbc
RB
369 if (dump_enabled_p ())
370 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 371 "Build SLP failed: different types\n");
d092494c 372
b0b4483e 373 return 1;
ebfd146a
IR
374 }
375 }
376
377 /* Check the types of the definitions. */
d092494c 378 switch (dt)
ebfd146a
IR
379 {
380 case vect_constant_def:
8644a673 381 case vect_external_def:
d092494c 382 case vect_reduction_def:
ebfd146a 383 break;
b8698a0f 384
8644a673 385 case vect_internal_def:
abf9bfbc 386 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
387 break;
388
389 default:
390 /* FORNOW: Not supported. */
73fbfcad 391 if (dump_enabled_p ())
ebfd146a 392 {
78c60e3d
SS
393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
394 "Build SLP failed: illegal type of def ");
81c40241 395 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 396 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
397 }
398
b0b4483e 399 return -1;
ebfd146a
IR
400 }
401 }
402
b0b4483e
RB
403 /* Swap operands. */
404 if (swapped)
405 {
406 if (first_op_cond)
407 {
408 tree cond = gimple_assign_rhs1 (stmt);
409 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
410 &TREE_OPERAND (cond, 1));
411 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
412 }
413 else
414 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
415 gimple_assign_rhs2_ptr (stmt));
416 }
417
418 return 0;
ebfd146a
IR
419}
420
421
6983e6b5
RB
422/* Verify if the scalar stmts STMTS are isomorphic, require data
423 permutation or are of unsupported types of operation. Return
424 true if they are, otherwise return false and indicate in *MATCHES
425 which stmts are not isomorphic to the first one. If MATCHES[0]
426 is false then this indicates the comparison could not be
427 carried out or the stmts will never be vectorized by SLP. */
ebfd146a
IR
428
429static bool
310213d4 430vect_build_slp_tree_1 (vec_info *vinfo,
355fe088 431 vec<gimple *> stmts, unsigned int group_size,
6983e6b5 432 unsigned nops, unsigned int *max_nunits,
6876e5bc
RB
433 unsigned int vectorization_factor, bool *matches,
434 bool *two_operators)
ebfd146a 435{
ebfd146a 436 unsigned int i;
355fe088 437 gimple *first_stmt = stmts[0], *stmt = stmts[0];
6876e5bc
RB
438 enum tree_code first_stmt_code = ERROR_MARK;
439 enum tree_code alt_stmt_code = ERROR_MARK;
440 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 441 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 442 tree lhs;
6983e6b5 443 bool need_same_oprnds = false;
e00cdb8a 444 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
ebfd146a
IR
445 optab optab;
446 int icode;
ef4bddc2
RS
447 machine_mode optab_op2_mode;
448 machine_mode vec_mode;
ebfd146a 449 HOST_WIDE_INT dummy;
355fe088 450 gimple *first_load = NULL, *prev_first_load = NULL;
d092494c 451
ebfd146a 452 /* For every stmt in NODE find its def stmt/s. */
9771b263 453 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 454 {
6983e6b5
RB
455 matches[i] = false;
456
73fbfcad 457 if (dump_enabled_p ())
ebfd146a 458 {
78c60e3d
SS
459 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
460 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 461 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
462 }
463
4b5caab7
IR
464 /* Fail to vectorize statements marked as unvectorizable. */
465 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
466 {
73fbfcad 467 if (dump_enabled_p ())
4b5caab7 468 {
78c60e3d
SS
469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
470 "Build SLP failed: unvectorizable statement ");
471 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 472 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4b5caab7 473 }
6983e6b5
RB
474 /* Fatal mismatch. */
475 matches[0] = false;
4b5caab7
IR
476 return false;
477 }
478
ebfd146a
IR
479 lhs = gimple_get_lhs (stmt);
480 if (lhs == NULL_TREE)
481 {
73fbfcad 482 if (dump_enabled_p ())
ebfd146a 483 {
78c60e3d
SS
484 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
485 "Build SLP failed: not GIMPLE_ASSIGN nor "
486 "GIMPLE_CALL ");
487 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 488 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 489 }
6983e6b5
RB
490 /* Fatal mismatch. */
491 matches[0] = false;
ebfd146a
IR
492 return false;
493 }
494
b8698a0f 495 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
496 vectype = get_vectype_for_scalar_type (scalar_type);
497 if (!vectype)
498 {
73fbfcad 499 if (dump_enabled_p ())
ebfd146a 500 {
78c60e3d
SS
501 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
502 "Build SLP failed: unsupported data-type ");
503 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
504 scalar_type);
e645e942 505 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 506 }
6983e6b5
RB
507 /* Fatal mismatch. */
508 matches[0] = false;
ebfd146a
IR
509 return false;
510 }
b8698a0f 511
dfc55d30
RB
512 /* If populating the vector type requires unrolling then fail
513 before adjusting *max_nunits for basic-block vectorization. */
310213d4 514 if (is_a <bb_vec_info> (vinfo)
dfc55d30
RB
515 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
516 {
517 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
518 "Build SLP failed: unrolling required "
519 "in basic block SLP\n");
520 /* Fatal mismatch. */
521 matches[0] = false;
522 return false;
523 }
524
4ef69dfc
IR
525 /* In case of multiple types we need to detect the smallest type. */
526 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
a70d6342 527 {
4ef69dfc 528 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
310213d4 529 if (is_a <bb_vec_info> (vinfo))
4ef69dfc 530 vectorization_factor = *max_nunits;
a70d6342 531 }
b8698a0f 532
538dd0b7 533 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
534 {
535 rhs_code = CALL_EXPR;
538dd0b7
DM
536 if (gimple_call_internal_p (call_stmt)
537 || gimple_call_tail_p (call_stmt)
538 || gimple_call_noreturn_p (call_stmt)
539 || !gimple_call_nothrow_p (call_stmt)
540 || gimple_call_chain (call_stmt))
190c2236 541 {
73fbfcad 542 if (dump_enabled_p ())
190c2236 543 {
78c60e3d
SS
544 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
545 "Build SLP failed: unsupported call type ");
538dd0b7
DM
546 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
547 call_stmt, 0);
e645e942 548 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
190c2236 549 }
6983e6b5
RB
550 /* Fatal mismatch. */
551 matches[0] = false;
190c2236
JJ
552 return false;
553 }
554 }
ebfd146a
IR
555 else
556 rhs_code = gimple_assign_rhs_code (stmt);
557
558 /* Check the operation. */
559 if (i == 0)
560 {
561 first_stmt_code = rhs_code;
562
b8698a0f 563 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
564 vector shift with scalar shift operand. */
565 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
566 || rhs_code == LROTATE_EXPR
567 || rhs_code == RROTATE_EXPR)
568 {
569 vec_mode = TYPE_MODE (vectype);
570
571 /* First see if we have a vector/vector shift. */
572 optab = optab_for_tree_code (rhs_code, vectype,
573 optab_vector);
574
575 if (!optab
947131ba 576 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
577 {
578 /* No vector/vector shift, try for a vector/scalar shift. */
579 optab = optab_for_tree_code (rhs_code, vectype,
580 optab_scalar);
581
582 if (!optab)
583 {
73fbfcad 584 if (dump_enabled_p ())
78c60e3d 585 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 586 "Build SLP failed: no optab.\n");
6983e6b5
RB
587 /* Fatal mismatch. */
588 matches[0] = false;
ebfd146a
IR
589 return false;
590 }
947131ba 591 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
592 if (icode == CODE_FOR_nothing)
593 {
73fbfcad 594 if (dump_enabled_p ())
78c60e3d
SS
595 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
596 "Build SLP failed: "
e645e942 597 "op not supported by target.\n");
6983e6b5
RB
598 /* Fatal mismatch. */
599 matches[0] = false;
ebfd146a
IR
600 return false;
601 }
602 optab_op2_mode = insn_data[icode].operand[2].mode;
603 if (!VECTOR_MODE_P (optab_op2_mode))
604 {
605 need_same_oprnds = true;
606 first_op1 = gimple_assign_rhs2 (stmt);
607 }
608 }
609 }
36ba4aae
IR
610 else if (rhs_code == WIDEN_LSHIFT_EXPR)
611 {
612 need_same_oprnds = true;
613 first_op1 = gimple_assign_rhs2 (stmt);
614 }
ebfd146a
IR
615 }
616 else
617 {
6876e5bc
RB
618 if (first_stmt_code != rhs_code
619 && alt_stmt_code == ERROR_MARK)
620 alt_stmt_code = rhs_code;
ebfd146a
IR
621 if (first_stmt_code != rhs_code
622 && (first_stmt_code != IMAGPART_EXPR
623 || rhs_code != REALPART_EXPR)
624 && (first_stmt_code != REALPART_EXPR
69f11a13 625 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
626 /* Handle mismatches in plus/minus by computing both
627 and merging the results. */
628 && !((first_stmt_code == PLUS_EXPR
629 || first_stmt_code == MINUS_EXPR)
630 && (alt_stmt_code == PLUS_EXPR
631 || alt_stmt_code == MINUS_EXPR)
632 && rhs_code == alt_stmt_code)
0d0293ac 633 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 634 && (first_stmt_code == ARRAY_REF
38000232 635 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
636 || first_stmt_code == INDIRECT_REF
637 || first_stmt_code == COMPONENT_REF
638 || first_stmt_code == MEM_REF)))
ebfd146a 639 {
73fbfcad 640 if (dump_enabled_p ())
ebfd146a 641 {
78c60e3d
SS
642 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
643 "Build SLP failed: different operation "
644 "in stmt ");
645 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
646 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
647 "original stmt ");
648 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
649 first_stmt, 0);
ebfd146a 650 }
6983e6b5
RB
651 /* Mismatch. */
652 continue;
ebfd146a 653 }
b8698a0f
L
654
655 if (need_same_oprnds
ebfd146a
IR
656 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
657 {
73fbfcad 658 if (dump_enabled_p ())
ebfd146a 659 {
78c60e3d
SS
660 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
661 "Build SLP failed: different shift "
662 "arguments in ");
663 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 664 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 665 }
6983e6b5
RB
666 /* Mismatch. */
667 continue;
ebfd146a 668 }
190c2236
JJ
669
670 if (rhs_code == CALL_EXPR)
671 {
355fe088 672 gimple *first_stmt = stmts[0];
190c2236
JJ
673 if (gimple_call_num_args (stmt) != nops
674 || !operand_equal_p (gimple_call_fn (first_stmt),
675 gimple_call_fn (stmt), 0)
676 || gimple_call_fntype (first_stmt)
677 != gimple_call_fntype (stmt))
678 {
73fbfcad 679 if (dump_enabled_p ())
190c2236 680 {
78c60e3d
SS
681 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
682 "Build SLP failed: different calls in ");
683 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
684 stmt, 0);
e645e942 685 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
190c2236 686 }
6983e6b5
RB
687 /* Mismatch. */
688 continue;
190c2236
JJ
689 }
690 }
ebfd146a
IR
691 }
692
0d0293ac
MM
693 /* Grouped store or load. */
694 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
695 {
696 if (REFERENCE_CLASS_P (lhs))
697 {
698 /* Store. */
6983e6b5 699 ;
ebfd146a 700 }
b5aeb3bb
IR
701 else
702 {
703 /* Load. */
b5aeb3bb
IR
704 /* Check that the size of interleaved loads group is not
705 greater than the SLP group size. */
314f64eb
RB
706 unsigned ncopies
707 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
310213d4 708 if (is_a <loop_vec_info> (vinfo)
a64b9c26
RB
709 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
710 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
711 - GROUP_GAP (vinfo_for_stmt (stmt)))
712 > ncopies * group_size))
b5aeb3bb 713 {
73fbfcad 714 if (dump_enabled_p ())
b5aeb3bb 715 {
78c60e3d
SS
716 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
717 "Build SLP failed: the number "
718 "of interleaved loads is greater than "
719 "the SLP group size ");
720 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
721 stmt, 0);
e645e942 722 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b5aeb3bb 723 }
6983e6b5
RB
724 /* Fatal mismatch. */
725 matches[0] = false;
b5aeb3bb
IR
726 return false;
727 }
728
e14c1050 729 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
730 if (prev_first_load)
731 {
732 /* Check that there are no loads from different interleaving
6983e6b5
RB
733 chains in the same node. */
734 if (prev_first_load != first_load)
78c60e3d 735 {
73fbfcad 736 if (dump_enabled_p ())
b5aeb3bb 737 {
78c60e3d
SS
738 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
739 vect_location,
740 "Build SLP failed: different "
741 "interleaving chains in one node ");
742 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
743 stmt, 0);
e645e942 744 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b5aeb3bb 745 }
6983e6b5
RB
746 /* Mismatch. */
747 continue;
b5aeb3bb
IR
748 }
749 }
750 else
751 prev_first_load = first_load;
ebfd146a 752 }
0d0293ac 753 } /* Grouped access. */
ebfd146a
IR
754 else
755 {
756 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
757 {
0d0293ac 758 /* Not grouped load. */
73fbfcad 759 if (dump_enabled_p ())
ebfd146a 760 {
78c60e3d
SS
761 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
762 "Build SLP failed: not grouped load ");
763 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 764 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
765 }
766
0d0293ac 767 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
768 /* Fatal mismatch. */
769 matches[0] = false;
ebfd146a
IR
770 return false;
771 }
772
773 /* Not memory operation. */
774 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 775 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 776 && TREE_CODE_CLASS (rhs_code) != tcc_expression
42fd8198 777 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
190c2236 778 && rhs_code != CALL_EXPR)
ebfd146a 779 {
73fbfcad 780 if (dump_enabled_p ())
ebfd146a 781 {
78c60e3d
SS
782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
783 "Build SLP failed: operation");
784 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
785 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 786 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 787 }
6983e6b5
RB
788 /* Fatal mismatch. */
789 matches[0] = false;
ebfd146a
IR
790 return false;
791 }
792
f7e531cf
IR
793 if (rhs_code == COND_EXPR)
794 {
795 tree cond_expr = gimple_assign_rhs1 (stmt);
796
797 if (i == 0)
798 first_cond_code = TREE_CODE (cond_expr);
799 else if (first_cond_code != TREE_CODE (cond_expr))
800 {
73fbfcad 801 if (dump_enabled_p ())
f7e531cf 802 {
78c60e3d
SS
803 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
804 "Build SLP failed: different"
805 " operation");
806 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
807 stmt, 0);
e645e942 808 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f7e531cf 809 }
6983e6b5
RB
810 /* Mismatch. */
811 continue;
f7e531cf
IR
812 }
813 }
ebfd146a 814 }
6983e6b5
RB
815
816 matches[i] = true;
817 }
818
819 for (i = 0; i < group_size; ++i)
820 if (!matches[i])
821 return false;
822
6876e5bc
RB
823 /* If we allowed a two-operation SLP node verify the target can cope
824 with the permute we are going to use. */
825 if (alt_stmt_code != ERROR_MARK
826 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
827 {
828 unsigned char *sel
829 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
830 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
831 {
832 sel[i] = i;
833 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
834 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
835 }
836 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
837 {
838 for (i = 0; i < group_size; ++i)
839 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
840 {
841 matches[i] = false;
842 if (dump_enabled_p ())
843 {
844 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
845 "Build SLP failed: different operation "
846 "in stmt ");
847 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
848 stmts[i], 0);
849 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
850 "original stmt ");
851 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
852 first_stmt, 0);
853 }
854 }
855 return false;
856 }
857 *two_operators = true;
858 }
859
6983e6b5
RB
860 return true;
861}
862
863/* Recursively build an SLP tree starting from NODE.
864 Fail (and return a value not equal to zero) if def-stmts are not
865 isomorphic, require data permutation or are of unsupported types of
866 operation. Otherwise, return 0.
867 The value returned is the depth in the SLP tree where a mismatch
868 was found. */
869
870static bool
310213d4 871vect_build_slp_tree (vec_info *vinfo,
6983e6b5
RB
872 slp_tree *node, unsigned int group_size,
873 unsigned int *max_nunits,
874 vec<slp_tree> *loads,
875 unsigned int vectorization_factor,
1428105c
RB
876 bool *matches, unsigned *npermutes, unsigned *tree_size,
877 unsigned max_tree_size)
6983e6b5 878{
89d390e5 879 unsigned nops, i, this_tree_size = 0;
355fe088 880 gimple *stmt;
6983e6b5 881
6983e6b5
RB
882 matches[0] = false;
883
884 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
885 if (is_gimple_call (stmt))
886 nops = gimple_call_num_args (stmt);
887 else if (is_gimple_assign (stmt))
888 {
889 nops = gimple_num_ops (stmt) - 1;
890 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
891 nops++;
ebfd146a 892 }
6983e6b5
RB
893 else
894 return false;
895
6876e5bc 896 bool two_operators = false;
310213d4 897 if (!vect_build_slp_tree_1 (vinfo,
6983e6b5 898 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
6876e5bc
RB
899 max_nunits, vectorization_factor, matches,
900 &two_operators))
6983e6b5 901 return false;
6876e5bc 902 SLP_TREE_TWO_OPERATORS (*node) = two_operators;
ebfd146a 903
6983e6b5
RB
904 /* If the SLP node is a load, terminate the recursion. */
905 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
906 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 907 {
9771b263 908 loads->safe_push (*node);
ebfd146a
IR
909 return true;
910 }
911
6983e6b5
RB
912 /* Get at the operands, verifying they are compatible. */
913 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
914 slp_oprnd_info oprnd_info;
915 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
916 {
310213d4 917 switch (vect_get_and_check_slp_defs (vinfo, stmt, i, &oprnds_info))
6983e6b5 918 {
b0b4483e
RB
919 case 0:
920 break;
921 case -1:
922 matches[0] = false;
6983e6b5
RB
923 vect_free_oprnd_info (oprnds_info);
924 return false;
b0b4483e
RB
925 case 1:
926 matches[i] = false;
927 break;
6983e6b5
RB
928 }
929 }
b0b4483e
RB
930 for (i = 0; i < group_size; ++i)
931 if (!matches[i])
932 {
933 vect_free_oprnd_info (oprnds_info);
934 return false;
935 }
6983e6b5
RB
936
937 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
938
b8698a0f 939 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 940 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 941 {
d092494c 942 slp_tree child;
6983e6b5
RB
943 unsigned old_nloads = loads->length ();
944 unsigned old_max_nunits = *max_nunits;
b8698a0f 945
d092494c
IR
946 if (oprnd_info->first_dt != vect_internal_def)
947 continue;
ebfd146a 948
1428105c
RB
949 if (++this_tree_size > max_tree_size)
950 {
951 vect_free_oprnd_info (oprnds_info);
952 return false;
953 }
954
d092494c 955 child = vect_create_new_slp_node (oprnd_info->def_stmts);
6983e6b5
RB
956 if (!child)
957 {
9771b263 958 vect_free_oprnd_info (oprnds_info);
6983e6b5 959 return false;
d092494c 960 }
b8698a0f 961
310213d4 962 if (vect_build_slp_tree (vinfo, &child,
6983e6b5 963 group_size, max_nunits, loads,
1428105c
RB
964 vectorization_factor, matches,
965 npermutes, &this_tree_size, max_tree_size))
6983e6b5 966 {
3fc356dc
RB
967 /* If we have all children of child built up from scalars then just
968 throw that away and build it up this node from scalars. */
969 if (!SLP_TREE_CHILDREN (child).is_empty ())
970 {
971 unsigned int j;
972 slp_tree grandchild;
973
974 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
975 if (grandchild != NULL)
976 break;
977 if (!grandchild)
978 {
979 /* Roll back. */
980 *max_nunits = old_max_nunits;
981 loads->truncate (old_nloads);
982 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
983 vect_free_slp_tree (grandchild);
984 SLP_TREE_CHILDREN (child).truncate (0);
985
986 dump_printf_loc (MSG_NOTE, vect_location,
987 "Building parent vector operands from "
988 "scalars instead\n");
989 oprnd_info->def_stmts = vNULL;
990 vect_free_slp_tree (child);
991 SLP_TREE_CHILDREN (*node).quick_push (NULL);
992 continue;
993 }
994 }
995
6983e6b5
RB
996 oprnd_info->def_stmts = vNULL;
997 SLP_TREE_CHILDREN (*node).quick_push (child);
998 continue;
999 }
1000
90dd6e3d
RB
1001 /* If the SLP build failed fatally and we analyze a basic-block
1002 simply treat nodes we fail to build as externally defined
1003 (and thus build vectors from the scalar defs).
1004 The cost model will reject outright expensive cases.
1005 ??? This doesn't treat cases where permutation ultimatively
1006 fails (or we don't try permutation below). Ideally we'd
1007 even compute a permutation that will end up with the maximum
1008 SLP tree size... */
310213d4 1009 if (is_a <bb_vec_info> (vinfo)
90dd6e3d
RB
1010 && !matches[0]
1011 /* ??? Rejecting patterns this way doesn't work. We'd have to
1012 do extra work to cancel the pattern so the uses see the
1013 scalar version. */
1014 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1015 {
9626d143
RB
1016 unsigned int j;
1017 slp_tree grandchild;
1018
1019 /* Roll back. */
1020 *max_nunits = old_max_nunits;
1021 loads->truncate (old_nloads);
1022 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1023 vect_free_slp_tree (grandchild);
1024 SLP_TREE_CHILDREN (child).truncate (0);
1025
90dd6e3d
RB
1026 dump_printf_loc (MSG_NOTE, vect_location,
1027 "Building vector operands from scalars\n");
1028 oprnd_info->def_stmts = vNULL;
1029 vect_free_slp_tree (child);
1030 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1031 continue;
1032 }
1033
6983e6b5
RB
1034 /* If the SLP build for operand zero failed and operand zero
1035 and one can be commutated try that for the scalar stmts
1036 that failed the match. */
1037 if (i == 0
1038 /* A first scalar stmt mismatch signals a fatal mismatch. */
1039 && matches[0]
1040 /* ??? For COND_EXPRs we can swap the comparison operands
1041 as well as the arms under some constraints. */
1042 && nops == 2
1043 && oprnds_info[1]->first_dt == vect_internal_def
1044 && is_gimple_assign (stmt)
1045 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1ece8d4c 1046 && !SLP_TREE_TWO_OPERATORS (*node)
6983e6b5
RB
1047 /* Do so only if the number of not successful permutes was nor more
1048 than a cut-ff as re-trying the recursive match on
1049 possibly each level of the tree would expose exponential
1050 behavior. */
1051 && *npermutes < 4)
1052 {
e72baed7
JJ
1053 unsigned int j;
1054 slp_tree grandchild;
1055
6983e6b5
RB
1056 /* Roll back. */
1057 *max_nunits = old_max_nunits;
1058 loads->truncate (old_nloads);
e72baed7
JJ
1059 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1060 vect_free_slp_tree (grandchild);
1061 SLP_TREE_CHILDREN (child).truncate (0);
1062
6983e6b5 1063 /* Swap mismatched definition stmts. */
b0b4483e
RB
1064 dump_printf_loc (MSG_NOTE, vect_location,
1065 "Re-trying with swapped operands of stmts ");
e72baed7 1066 for (j = 0; j < group_size; ++j)
6983e6b5
RB
1067 if (!matches[j])
1068 {
6b4db501
MM
1069 std::swap (oprnds_info[0]->def_stmts[j],
1070 oprnds_info[1]->def_stmts[j]);
b0b4483e 1071 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1072 }
b0b4483e 1073 dump_printf (MSG_NOTE, "\n");
74574669
RB
1074 /* And try again with scratch 'matches' ... */
1075 bool *tem = XALLOCAVEC (bool, group_size);
310213d4 1076 if (vect_build_slp_tree (vinfo, &child,
6983e6b5
RB
1077 group_size, max_nunits, loads,
1078 vectorization_factor,
74574669 1079 tem, npermutes, &this_tree_size,
1428105c 1080 max_tree_size))
6983e6b5 1081 {
74574669
RB
1082 /* ... so if successful we can apply the operand swapping
1083 to the GIMPLE IL. This is necessary because for example
1084 vect_get_slp_defs uses operand indexes and thus expects
1085 canonical operand order. */
1086 for (j = 0; j < group_size; ++j)
1087 if (!matches[j])
1088 {
355fe088 1089 gimple *stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
74574669
RB
1090 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1091 gimple_assign_rhs2_ptr (stmt));
1092 }
6983e6b5
RB
1093 oprnd_info->def_stmts = vNULL;
1094 SLP_TREE_CHILDREN (*node).quick_push (child);
1095 continue;
1096 }
1097
1098 ++*npermutes;
1099 }
1100
1101 oprnd_info->def_stmts = vNULL;
1102 vect_free_slp_tree (child);
1103 vect_free_oprnd_info (oprnds_info);
1104 return false;
ebfd146a
IR
1105 }
1106
1428105c
RB
1107 if (tree_size)
1108 *tree_size += this_tree_size;
1109
9771b263 1110 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
1111 return true;
1112}
1113
78c60e3d 1114/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1115
1116static void
78c60e3d 1117vect_print_slp_tree (int dump_kind, slp_tree node)
ebfd146a
IR
1118{
1119 int i;
355fe088 1120 gimple *stmt;
d755c7ef 1121 slp_tree child;
ebfd146a
IR
1122
1123 if (!node)
1124 return;
1125
78c60e3d 1126 dump_printf (dump_kind, "node ");
9771b263 1127 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1128 {
78c60e3d
SS
1129 dump_printf (dump_kind, "\n\tstmt %d ", i);
1130 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1131 }
78c60e3d 1132 dump_printf (dump_kind, "\n");
ebfd146a 1133
9771b263 1134 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1135 vect_print_slp_tree (dump_kind, child);
ebfd146a
IR
1136}
1137
1138
b8698a0f
L
1139/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1140 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1141 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1142 stmts in NODE are to be marked. */
1143
1144static void
1145vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1146{
1147 int i;
355fe088 1148 gimple *stmt;
d755c7ef 1149 slp_tree child;
ebfd146a
IR
1150
1151 if (!node)
1152 return;
1153
9771b263 1154 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1155 if (j < 0 || i == j)
1156 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1157
9771b263 1158 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1159 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1160}
1161
1162
a70d6342
IR
1163/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1164
1165static void
1166vect_mark_slp_stmts_relevant (slp_tree node)
1167{
1168 int i;
355fe088 1169 gimple *stmt;
a70d6342 1170 stmt_vec_info stmt_info;
d755c7ef 1171 slp_tree child;
a70d6342
IR
1172
1173 if (!node)
1174 return;
1175
9771b263 1176 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1177 {
1178 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1179 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1180 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1181 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1182 }
1183
9771b263 1184 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1185 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1186}
1187
1188
b5aeb3bb
IR
1189/* Rearrange the statements of NODE according to PERMUTATION. */
1190
1191static void
1192vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1193 vec<unsigned> permutation)
b5aeb3bb 1194{
355fe088
TS
1195 gimple *stmt;
1196 vec<gimple *> tmp_stmts;
d755c7ef
RB
1197 unsigned int i;
1198 slp_tree child;
b5aeb3bb 1199
9771b263 1200 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1201 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1202
9771b263
DN
1203 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1204 tmp_stmts.create (group_size);
d755c7ef 1205 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1206
9771b263 1207 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1208 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1209
9771b263 1210 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1211 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1212}
1213
1214
b266b968
RB
1215/* Attempt to reorder stmts in a reduction chain so that we don't
1216 require any load permutation. Return true if that was possible,
1217 otherwise return false. */
1218
1219static bool
1220vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1221{
1222 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1223 unsigned int i, j;
1224 sbitmap load_index;
1225 unsigned int lidx;
1226 slp_tree node, load;
1227
1228 /* Compare all the permutation sequences to the first one. We know
1229 that at least one load is permuted. */
1230 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1231 if (!node->load_permutation.exists ())
1232 return false;
1233 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1234 {
1235 if (!load->load_permutation.exists ())
1236 return false;
1237 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1238 if (lidx != node->load_permutation[j])
1239 return false;
1240 }
1241
1242 /* Check that the loads in the first sequence are different and there
1243 are no gaps between them. */
1244 load_index = sbitmap_alloc (group_size);
1245 bitmap_clear (load_index);
1246 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1247 {
1248 if (bitmap_bit_p (load_index, lidx))
1249 {
1250 sbitmap_free (load_index);
1251 return false;
1252 }
1253 bitmap_set_bit (load_index, lidx);
1254 }
1255 for (i = 0; i < group_size; i++)
1256 if (!bitmap_bit_p (load_index, i))
1257 {
1258 sbitmap_free (load_index);
1259 return false;
1260 }
1261 sbitmap_free (load_index);
1262
1263 /* This permutation is valid for reduction. Since the order of the
1264 statements in the nodes is not important unless they are memory
1265 accesses, we can rearrange the statements in all the nodes
1266 according to the order of the loads. */
1267 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1268 node->load_permutation);
1269
1270 /* We are done, no actual permutations need to be generated. */
1271 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1272 SLP_TREE_LOAD_PERMUTATION (node).release ();
1273 return true;
1274}
1275
01d8bf07
RB
1276/* Check if the required load permutations in the SLP instance
1277 SLP_INSTN are supported. */
ebfd146a
IR
1278
1279static bool
01d8bf07 1280vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1281{
01d8bf07
RB
1282 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1283 unsigned int i, j, k, next;
6983e6b5 1284 slp_tree node;
a5b50aa1 1285 gimple *stmt, *load, *next_load;
ebfd146a 1286
73fbfcad 1287 if (dump_enabled_p ())
ebfd146a 1288 {
78c60e3d 1289 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1290 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1291 if (node->load_permutation.exists ())
1292 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1293 dump_printf (MSG_NOTE, "%d ", next);
1294 else
bddc974e
TJ
1295 for (k = 0; k < group_size; ++k)
1296 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1297 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1298 }
1299
b5aeb3bb
IR
1300 /* In case of reduction every load permutation is allowed, since the order
1301 of the reduction statements is not important (as opposed to the case of
0d0293ac 1302 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1303 load nodes are of the same size and have the same permutation (and then
1304 rearrange all the nodes of the SLP instance according to this
1305 permutation). */
1306
1307 /* Check that all the load nodes are of the same size. */
01d8bf07 1308 /* ??? Can't we assert this? */
9771b263 1309 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1310 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1311 return false;
2200fc49 1312
b5aeb3bb 1313 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1314 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1315
b010117a 1316 /* Reduction (there are no data-refs in the root).
b266b968 1317 In reduction chain the order of the loads is not important. */
b010117a
IR
1318 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1319 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1320 {
b266b968
RB
1321 if (vect_attempt_slp_rearrange_stmts (slp_instn))
1322 return true;
b5aeb3bb 1323
b266b968 1324 /* Fallthru to general load permutation handling. */
b5aeb3bb
IR
1325 }
1326
6aa904c4
IR
1327 /* In basic block vectorization we allow any subchain of an interleaving
1328 chain.
1329 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1330 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1331 {
240a94da
RB
1332 /* Check whether the loads in an instance form a subchain and thus
1333 no permutation is necessary. */
9771b263 1334 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1335 {
9626d143
RB
1336 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1337 continue;
240a94da 1338 bool subchain_p = true;
6aa904c4 1339 next_load = NULL;
9771b263 1340 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1341 {
5b5826c4
RB
1342 if (j != 0
1343 && (next_load != load
1344 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1345 {
1346 subchain_p = false;
1347 break;
1348 }
6aa904c4
IR
1349 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1350 }
240a94da
RB
1351 if (subchain_p)
1352 SLP_TREE_LOAD_PERMUTATION (node).release ();
1353 else
1354 {
1355 /* Verify the permutation can be generated. */
1356 vec<tree> tem;
1357 if (!vect_transform_slp_perm_load (node, tem, NULL,
1358 1, slp_instn, true))
1359 {
1360 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1361 vect_location,
1362 "unsupported load permutation\n");
1363 return false;
1364 }
1365 }
6aa904c4 1366 }
01d8bf07 1367 return true;
6aa904c4
IR
1368 }
1369
9b999e8c 1370 /* For loop vectorization verify we can generate the permutation. */
01d8bf07
RB
1371 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1372 if (node->load_permutation.exists ()
1373 && !vect_transform_slp_perm_load
1374 (node, vNULL, NULL,
1375 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1376 return false;
9b999e8c 1377
01d8bf07 1378 return true;
ebfd146a
IR
1379}
1380
1381
e4a707c4 1382/* Find the last store in SLP INSTANCE. */
ff802fa1 1383
64900538 1384gimple *
2e8ab70c 1385vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1386{
355fe088 1387 gimple *last = NULL, *stmt;
e4a707c4 1388
2e8ab70c
RB
1389 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1390 {
1391 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1392 if (is_pattern_stmt_p (stmt_vinfo))
1393 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1394 else
1395 last = get_later_stmt (stmt, last);
1396 }
e4a707c4 1397
2e8ab70c 1398 return last;
e4a707c4
IR
1399}
1400
23847df4
RB
1401/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1402
1403static void
1a4b99c1 1404vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
23847df4 1405 stmt_vector_for_cost *prologue_cost_vec,
1a4b99c1 1406 stmt_vector_for_cost *body_cost_vec,
23847df4
RB
1407 unsigned ncopies_for_cost)
1408{
23847df4
RB
1409 unsigned i;
1410 slp_tree child;
355fe088 1411 gimple *stmt, *s;
23847df4
RB
1412 stmt_vec_info stmt_info;
1413 tree lhs;
1414 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1415
1416 /* Recurse down the SLP tree. */
1417 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
90dd6e3d 1418 if (child)
1a4b99c1
RB
1419 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1420 body_cost_vec, ncopies_for_cost);
23847df4
RB
1421
1422 /* Look at the first scalar stmt to determine the cost. */
1423 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1424 stmt_info = vinfo_for_stmt (stmt);
1425 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1426 {
1427 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1428 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1429 vect_uninitialized_def,
1430 node, prologue_cost_vec, body_cost_vec);
1431 else
1432 {
1433 int i;
1434 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1435 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1436 node, prologue_cost_vec, body_cost_vec);
1437 /* If the load is permuted record the cost for the permutation.
1438 ??? Loads from multiple chains are let through here only
1439 for a single special case involving complex numbers where
1440 in the end no permutation is necessary. */
1441 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1442 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1443 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1444 && vect_get_place_in_interleaving_chain
1445 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1446 {
1447 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1448 stmt_info, 0, vect_body);
1449 break;
1450 }
1451 }
1452 }
1453 else
6876e5bc
RB
1454 {
1455 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1456 stmt_info, 0, vect_body);
1457 if (SLP_TREE_TWO_OPERATORS (node))
1458 {
1459 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1460 stmt_info, 0, vect_body);
1461 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1462 stmt_info, 0, vect_body);
1463 }
1464 }
23847df4
RB
1465
1466 /* Scan operands and account for prologue cost of constants/externals.
1467 ??? This over-estimates cost for multiple uses and should be
1468 re-engineered. */
1469 lhs = gimple_get_lhs (stmt);
1470 for (i = 0; i < gimple_num_ops (stmt); ++i)
1471 {
81c40241 1472 tree op = gimple_op (stmt, i);
355fe088 1473 gimple *def_stmt;
23847df4
RB
1474 enum vect_def_type dt;
1475 if (!op || op == lhs)
1476 continue;
81c40241 1477 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
2e8ab70c
RB
1478 {
1479 /* Without looking at the actual initializer a vector of
1480 constants can be implemented as load from the constant pool.
1481 ??? We need to pass down stmt_info for a vector type
1482 even if it points to the wrong stmt. */
1483 if (dt == vect_constant_def)
1484 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1485 stmt_info, 0, vect_prologue);
1486 else if (dt == vect_external_def)
1487 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1488 stmt_info, 0, vect_prologue);
1489 }
23847df4
RB
1490 }
1491}
1492
1493/* Compute the cost for the SLP instance INSTANCE. */
1494
1495static void
1a4b99c1 1496vect_analyze_slp_cost (slp_instance instance, void *data)
23847df4
RB
1497{
1498 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1499 unsigned ncopies_for_cost;
1500 stmt_info_for_cost *si;
1501 unsigned i;
1502
b939ea86
RB
1503 if (dump_enabled_p ())
1504 dump_printf_loc (MSG_NOTE, vect_location,
1505 "=== vect_analyze_slp_cost ===\n");
1506
23847df4
RB
1507 /* Calculate the number of vector stmts to create based on the unrolling
1508 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1509 GROUP_SIZE / NUNITS otherwise. */
1510 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1a4b99c1
RB
1511 slp_tree node = SLP_INSTANCE_TREE (instance);
1512 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1513 /* Adjust the group_size by the vectorization factor which is always one
1514 for basic-block vectorization. */
1515 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1516 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1517 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1518 /* For reductions look at a reduction operand in case the reduction
1519 operation is widening like DOT_PROD or SAD. */
1520 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1521 {
355fe088 1522 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1a4b99c1
RB
1523 switch (gimple_assign_rhs_code (stmt))
1524 {
1525 case DOT_PROD_EXPR:
1526 case SAD_EXPR:
1527 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1528 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1529 break;
1530 default:;
1531 }
1532 }
23847df4
RB
1533 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1534
1535 prologue_cost_vec.create (10);
1536 body_cost_vec.create (10);
1a4b99c1
RB
1537 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1538 &prologue_cost_vec, &body_cost_vec,
1539 ncopies_for_cost);
23847df4
RB
1540
1541 /* Record the prologue costs, which were delayed until we were
1a4b99c1 1542 sure that SLP was successful. */
23847df4
RB
1543 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1544 {
1545 struct _stmt_vec_info *stmt_info
1546 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1547 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1548 si->misalign, vect_prologue);
1549 }
1550
1a4b99c1
RB
1551 /* Record the instance's instructions in the target cost model. */
1552 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1553 {
1554 struct _stmt_vec_info *stmt_info
1555 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1556 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1557 si->misalign, vect_body);
1558 }
1559
23847df4 1560 prologue_cost_vec.release ();
1a4b99c1 1561 body_cost_vec.release ();
23847df4 1562}
e4a707c4 1563
0d0293ac 1564/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1565 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1566 Return FALSE if it's impossible to SLP any stmt in the loop. */
1567
1568static bool
310213d4 1569vect_analyze_slp_instance (vec_info *vinfo,
355fe088 1570 gimple *stmt, unsigned max_tree_size)
ebfd146a
IR
1571{
1572 slp_instance new_instance;
d092494c 1573 slp_tree node;
e14c1050 1574 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1575 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1576 tree vectype, scalar_type = NULL_TREE;
355fe088 1577 gimple *next;
0f900dfa 1578 unsigned int vectorization_factor = 0;
23847df4 1579 int i;
ebfd146a 1580 unsigned int max_nunits = 0;
9771b263 1581 vec<slp_tree> loads;
b5aeb3bb 1582 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
355fe088 1583 vec<gimple *> scalar_stmts;
b5aeb3bb 1584
b010117a 1585 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1586 {
b010117a
IR
1587 if (dr)
1588 {
1589 scalar_type = TREE_TYPE (DR_REF (dr));
1590 vectype = get_vectype_for_scalar_type (scalar_type);
1591 }
1592 else
1593 {
310213d4 1594 gcc_assert (is_a <loop_vec_info> (vinfo));
b010117a
IR
1595 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1596 }
1597
e14c1050 1598 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1599 }
1600 else
1601 {
310213d4 1602 gcc_assert (is_a <loop_vec_info> (vinfo));
b5aeb3bb 1603 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
310213d4 1604 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
b5aeb3bb 1605 }
b8698a0f 1606
ebfd146a
IR
1607 if (!vectype)
1608 {
73fbfcad 1609 if (dump_enabled_p ())
ebfd146a 1610 {
78c60e3d
SS
1611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1612 "Build SLP failed: unsupported data-type ");
1613 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1614 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1615 }
b5aeb3bb 1616
ebfd146a
IR
1617 return false;
1618 }
1619
1620 nunits = TYPE_VECTOR_SUBPARTS (vectype);
310213d4
RB
1621 if (is_a <loop_vec_info> (vinfo))
1622 vectorization_factor = as_a <loop_vec_info> (vinfo)->vectorization_factor;
a70d6342 1623 else
a70d6342
IR
1624 vectorization_factor = nunits;
1625
a70d6342
IR
1626 /* Calculate the unrolling factor. */
1627 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
310213d4 1628 if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
a70d6342 1629 {
73fbfcad 1630 if (dump_enabled_p ())
e645e942 1631 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1632 "Build SLP failed: unrolling required in basic"
e645e942 1633 " block SLP\n");
b8698a0f 1634
a70d6342
IR
1635 return false;
1636 }
1637
0d0293ac 1638 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1639 scalar_stmts.create (group_size);
ebfd146a 1640 next = stmt;
b010117a 1641 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1642 {
b5aeb3bb
IR
1643 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1644 while (next)
1645 {
f7e531cf
IR
1646 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1647 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1648 scalar_stmts.safe_push (
1649 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1650 else
9771b263 1651 scalar_stmts.safe_push (next);
e14c1050 1652 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 1653 }
14a61437
RB
1654 /* Mark the first element of the reduction chain as reduction to properly
1655 transform the node. In the reduction analysis phase only the last
1656 element of the chain is marked as reduction. */
1657 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1658 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
1659 }
1660 else
1661 {
1662 /* Collect reduction statements. */
310213d4 1663 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions;
9771b263
DN
1664 for (i = 0; reductions.iterate (i, &next); i++)
1665 scalar_stmts.safe_push (next);
ebfd146a
IR
1666 }
1667
d092494c 1668 node = vect_create_new_slp_node (scalar_stmts);
ebfd146a 1669
9771b263 1670 loads.create (group_size);
ebfd146a
IR
1671
1672 /* Build the tree for the SLP instance. */
89d390e5
RB
1673 bool *matches = XALLOCAVEC (bool, group_size);
1674 unsigned npermutes = 0;
310213d4 1675 if (vect_build_slp_tree (vinfo, &node, group_size,
abf9bfbc 1676 &max_nunits, &loads,
89d390e5 1677 vectorization_factor, matches, &npermutes, NULL,
1428105c 1678 max_tree_size))
ebfd146a 1679 {
4ef69dfc 1680 /* Calculate the unrolling factor based on the smallest type. */
ebfd146a
IR
1681 if (max_nunits > nunits)
1682 unrolling_factor = least_common_multiple (max_nunits, group_size)
1683 / group_size;
b8698a0f 1684
310213d4 1685 if (unrolling_factor != 1 && is_a <bb_vec_info> (vinfo))
4ef69dfc 1686 {
73fbfcad 1687 if (dump_enabled_p ())
e645e942 1688 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1689 "Build SLP failed: unrolling required in basic"
e645e942 1690 " block SLP\n");
c7e62a26 1691 vect_free_slp_tree (node);
9771b263 1692 loads.release ();
4ef69dfc
IR
1693 return false;
1694 }
1695
1696 /* Create a new SLP instance. */
1697 new_instance = XNEW (struct _slp_instance);
1698 SLP_INSTANCE_TREE (new_instance) = node;
1699 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 1700 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 1701 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
1702
1703 /* Compute the load permutation. */
1704 slp_tree load_node;
1705 bool loads_permuted = false;
abf9bfbc
RB
1706 FOR_EACH_VEC_ELT (loads, i, load_node)
1707 {
01d8bf07 1708 vec<unsigned> load_permutation;
abf9bfbc 1709 int j;
355fe088 1710 gimple *load, *first_stmt;
01d8bf07
RB
1711 bool this_load_permuted = false;
1712 load_permutation.create (group_size);
6983e6b5
RB
1713 first_stmt = GROUP_FIRST_ELEMENT
1714 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
1715 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1716 {
6983e6b5
RB
1717 int load_place
1718 = vect_get_place_in_interleaving_chain (load, first_stmt);
1719 gcc_assert (load_place != -1);
1720 if (load_place != j)
01d8bf07 1721 this_load_permuted = true;
abf9bfbc
RB
1722 load_permutation.safe_push (load_place);
1723 }
fe2bef71
RB
1724 if (!this_load_permuted
1725 /* The load requires permutation when unrolling exposes
1726 a gap either because the group is larger than the SLP
1727 group-size or because there is a gap between the groups. */
1728 && (unrolling_factor == 1
1729 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1730 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
1731 {
1732 load_permutation.release ();
1733 continue;
1734 }
1735 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1736 loads_permuted = true;
abf9bfbc 1737 }
6aa904c4
IR
1738
1739 if (loads_permuted)
ebfd146a 1740 {
01d8bf07 1741 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 1742 {
73fbfcad 1743 if (dump_enabled_p ())
ebfd146a 1744 {
e645e942 1745 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1746 "Build SLP failed: unsupported load "
1747 "permutation ");
1748 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 1749 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1750 }
ebfd146a
IR
1751 vect_free_slp_instance (new_instance);
1752 return false;
1753 }
ebfd146a 1754 }
ebfd146a 1755
310213d4 1756 vinfo->slp_instances.safe_push (new_instance);
b8698a0f 1757
73fbfcad 1758 if (dump_enabled_p ())
78c60e3d 1759 vect_print_slp_tree (MSG_NOTE, node);
ebfd146a
IR
1760
1761 return true;
1762 }
1763
1764 /* Failed to SLP. */
1765 /* Free the allocated memory. */
1766 vect_free_slp_tree (node);
9771b263 1767 loads.release ();
b8698a0f 1768
a70d6342 1769 return false;
ebfd146a
IR
1770}
1771
1772
ff802fa1 1773/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1774 trees of packed scalar stmts if SLP is possible. */
1775
1776bool
310213d4 1777vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
ebfd146a
IR
1778{
1779 unsigned int i;
355fe088 1780 gimple *first_element;
a70d6342 1781 bool ok = false;
ebfd146a 1782
73fbfcad 1783 if (dump_enabled_p ())
e645e942 1784 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
ebfd146a 1785
0d0293ac 1786 /* Find SLP sequences starting from groups of grouped stores. */
310213d4
RB
1787 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
1788 if (vect_analyze_slp_instance (vinfo, first_element, max_tree_size))
a70d6342 1789 ok = true;
ebfd146a 1790
310213d4 1791 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
b010117a 1792 {
310213d4
RB
1793 if (loop_vinfo->reduction_chains.length () > 0)
1794 {
1795 /* Find SLP sequences starting from reduction chains. */
1796 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
1797 if (vect_analyze_slp_instance (vinfo, first_element,
1798 max_tree_size))
1799 ok = true;
1800 else
1801 return false;
b010117a 1802
310213d4
RB
1803 /* Don't try to vectorize SLP reductions if reduction chain was
1804 detected. */
1805 return ok;
1806 }
b010117a 1807
310213d4
RB
1808 /* Find SLP sequences starting from groups of reductions. */
1809 if (loop_vinfo->reductions.length () > 1
1810 && vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
1811 max_tree_size))
1812 ok = true;
1813 }
b5aeb3bb 1814
ebfd146a
IR
1815 return true;
1816}
1817
1818
1819/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
1820 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1821 least one instance. */
ebfd146a 1822
437f4a00 1823bool
ebfd146a
IR
1824vect_make_slp_decision (loop_vec_info loop_vinfo)
1825{
1826 unsigned int i, unrolling_factor = 1;
9771b263 1827 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
1828 slp_instance instance;
1829 int decided_to_slp = 0;
1830
73fbfcad 1831 if (dump_enabled_p ())
e645e942
TJ
1832 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1833 "\n");
ebfd146a 1834
9771b263 1835 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
1836 {
1837 /* FORNOW: SLP if you can. */
1838 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1839 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1840
ff802fa1 1841 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1842 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1843 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1844 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1845 decided_to_slp++;
1846 }
1847
1848 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1849
73fbfcad 1850 if (decided_to_slp && dump_enabled_p ())
ccb3ad87 1851 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1852 "Decided to SLP %d instances. Unrolling factor %d\n",
78c60e3d 1853 decided_to_slp, unrolling_factor);
437f4a00
IR
1854
1855 return (decided_to_slp > 0);
ebfd146a
IR
1856}
1857
1858
1859/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1860 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1861
1862static void
642fce57 1863vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 1864{
355fe088 1865 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a 1866 imm_use_iterator imm_iter;
355fe088 1867 gimple *use_stmt;
642fce57 1868 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 1869 slp_tree child;
f2c74cc4 1870 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
1871 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1872 int j;
1873
1874 /* Propagate hybrid down the SLP tree. */
1875 if (stype == hybrid)
1876 ;
1877 else if (HYBRID_SLP_STMT (stmt_vinfo))
1878 stype = hybrid;
1879 else
1880 {
1881 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
1882 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
29764870
RB
1883 /* We always get the pattern stmt here, but for immediate
1884 uses we have to use the LHS of the original stmt. */
1885 gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1886 if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
1887 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
642fce57
RB
1888 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1889 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
29764870
RB
1890 {
1891 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1892 continue;
1893 use_vinfo = vinfo_for_stmt (use_stmt);
1894 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
1895 && STMT_VINFO_RELATED_STMT (use_vinfo))
1896 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
1897 if (!STMT_SLP_TYPE (use_vinfo)
1898 && (STMT_VINFO_RELEVANT (use_vinfo)
1899 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
1900 && !(gimple_code (use_stmt) == GIMPLE_PHI
1901 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
1902 {
1903 if (dump_enabled_p ())
1904 {
1905 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
1906 "def in non-SLP stmt: ");
1907 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
1908 }
1909 stype = hybrid;
1910 }
29764870 1911 }
642fce57 1912 }
ebfd146a 1913
502f0263
RB
1914 if (stype == hybrid
1915 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
1916 {
1917 if (dump_enabled_p ())
1918 {
1919 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
1920 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
1921 }
1922 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
1923 }
ebfd146a 1924
642fce57 1925 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
90dd6e3d
RB
1926 if (child)
1927 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 1928}
f2c74cc4 1929
642fce57 1930/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 1931
642fce57
RB
1932static tree
1933vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
1934{
1935 walk_stmt_info *wi = (walk_stmt_info *)data;
1936 struct loop *loopp = (struct loop *)wi->info;
1937
1938 if (wi->is_lhs)
1939 return NULL_TREE;
1940
1941 if (TREE_CODE (*tp) == SSA_NAME
1942 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
1943 {
355fe088 1944 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp);
642fce57
RB
1945 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
1946 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
b1af7da6
RB
1947 {
1948 if (dump_enabled_p ())
1949 {
1950 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
1951 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
1952 }
1953 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
1954 }
642fce57
RB
1955 }
1956
1957 return NULL_TREE;
ebfd146a
IR
1958}
1959
642fce57
RB
1960static tree
1961vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
1962 walk_stmt_info *)
1963{
1964 /* If the stmt is in a SLP instance then this isn't a reason
1965 to mark use definitions in other SLP instances as hybrid. */
1966 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
1967 *handled = true;
1968 return NULL_TREE;
1969}
ebfd146a
IR
1970
1971/* Find stmts that must be both vectorized and SLPed. */
1972
1973void
1974vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
1975{
1976 unsigned int i;
9771b263 1977 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
1978 slp_instance instance;
1979
73fbfcad 1980 if (dump_enabled_p ())
e645e942
TJ
1981 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
1982 "\n");
ebfd146a 1983
642fce57
RB
1984 /* First walk all pattern stmt in the loop and mark defs of uses as
1985 hybrid because immediate uses in them are not recorded. */
1986 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
1987 {
1988 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
1989 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1990 gsi_next (&gsi))
1991 {
355fe088 1992 gimple *stmt = gsi_stmt (gsi);
642fce57
RB
1993 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1994 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1995 {
1996 walk_stmt_info wi;
1997 memset (&wi, 0, sizeof (wi));
1998 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
1999 gimple_stmt_iterator gsi2
2000 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2001 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2002 vect_detect_hybrid_slp_1, &wi);
2003 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2004 vect_detect_hybrid_slp_2,
2005 vect_detect_hybrid_slp_1, &wi);
2006 }
2007 }
2008 }
2009
2010 /* Then walk the SLP instance trees marking stmts with uses in
2011 non-SLP stmts as hybrid, also propagating hybrid down the
2012 SLP tree, collecting the above info on-the-fly. */
9771b263 2013 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2014 {
2015 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2016 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2017 i, pure_slp);
2018 }
ebfd146a
IR
2019}
2020
a70d6342
IR
2021
2022/* Create and initialize a new bb_vec_info struct for BB, as well as
2023 stmt_vec_info structs for all the stmts in it. */
b8698a0f 2024
a70d6342 2025static bb_vec_info
61d371eb
RB
2026new_bb_vec_info (gimple_stmt_iterator region_begin,
2027 gimple_stmt_iterator region_end)
a70d6342 2028{
61d371eb 2029 basic_block bb = gsi_bb (region_begin);
a70d6342
IR
2030 bb_vec_info res = NULL;
2031 gimple_stmt_iterator gsi;
2032
2033 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
310213d4 2034 res->kind = vec_info::bb;
a70d6342 2035 BB_VINFO_BB (res) = bb;
61d371eb
RB
2036 res->region_begin = region_begin;
2037 res->region_end = region_end;
a70d6342 2038
61d371eb
RB
2039 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2040 gsi_next (&gsi))
a70d6342 2041 {
355fe088 2042 gimple *stmt = gsi_stmt (gsi);
a70d6342 2043 gimple_set_uid (stmt, 0);
310213d4 2044 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res));
a70d6342
IR
2045 }
2046
9771b263
DN
2047 BB_VINFO_GROUPED_STORES (res).create (10);
2048 BB_VINFO_SLP_INSTANCES (res).create (2);
c3e7ee41 2049 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
a70d6342
IR
2050
2051 bb->aux = res;
2052 return res;
2053}
2054
2055
2056/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2057 stmts in the basic block. */
2058
2059static void
2060destroy_bb_vec_info (bb_vec_info bb_vinfo)
2061{
9771b263 2062 vec<slp_instance> slp_instances;
c7e62a26 2063 slp_instance instance;
a70d6342
IR
2064 basic_block bb;
2065 gimple_stmt_iterator si;
c7e62a26 2066 unsigned i;
a70d6342
IR
2067
2068 if (!bb_vinfo)
2069 return;
2070
2071 bb = BB_VINFO_BB (bb_vinfo);
2072
61d371eb
RB
2073 for (si = bb_vinfo->region_begin;
2074 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
a70d6342 2075 {
355fe088 2076 gimple *stmt = gsi_stmt (si);
a70d6342
IR
2077 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2078
2079 if (stmt_info)
2080 /* Free stmt_vec_info. */
2081 free_stmt_vec_info (stmt);
61d371eb
RB
2082
2083 /* Reset region marker. */
2084 gimple_set_uid (stmt, -1);
a70d6342
IR
2085 }
2086
310213d4 2087 vect_destroy_datarefs (bb_vinfo);
01be8516 2088 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
9771b263 2089 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
c7e62a26 2090 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
9771b263 2091 FOR_EACH_VEC_ELT (slp_instances, i, instance)
c7e62a26 2092 vect_free_slp_instance (instance);
9771b263 2093 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
c3e7ee41 2094 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
a70d6342
IR
2095 free (bb_vinfo);
2096 bb->aux = NULL;
2097}
2098
2099
2100/* Analyze statements contained in SLP tree node after recursively analyzing
2101 the subtree. Return TRUE if the operations are supported. */
2102
2103static bool
a12e42fc 2104vect_slp_analyze_node_operations (slp_tree node)
a70d6342
IR
2105{
2106 bool dummy;
2107 int i;
355fe088 2108 gimple *stmt;
d755c7ef 2109 slp_tree child;
a70d6342
IR
2110
2111 if (!node)
2112 return true;
2113
9771b263 2114 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a12e42fc 2115 if (!vect_slp_analyze_node_operations (child))
d092494c 2116 return false;
a70d6342 2117
9771b263 2118 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
2119 {
2120 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2121 gcc_assert (stmt_info);
a12e42fc 2122 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
a70d6342
IR
2123
2124 if (!vect_analyze_stmt (stmt, &dummy, node))
a12e42fc 2125 return false;
a70d6342
IR
2126 }
2127
2128 return true;
2129}
2130
2131
ff802fa1 2132/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
2133 operations are supported. */
2134
a12e42fc 2135bool
1a4b99c1 2136vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
a70d6342 2137{
a70d6342
IR
2138 slp_instance instance;
2139 int i;
2140
a12e42fc
RB
2141 if (dump_enabled_p ())
2142 dump_printf_loc (MSG_NOTE, vect_location,
2143 "=== vect_slp_analyze_operations ===\n");
2144
9771b263 2145 for (i = 0; slp_instances.iterate (i, &instance); )
a70d6342 2146 {
a12e42fc 2147 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
a70d6342 2148 {
a12e42fc
RB
2149 dump_printf_loc (MSG_NOTE, vect_location,
2150 "removing SLP instance operations starting from: ");
2151 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2152 SLP_TREE_SCALAR_STMTS
2153 (SLP_INSTANCE_TREE (instance))[0], 0);
2154 vect_free_slp_instance (instance);
9771b263 2155 slp_instances.ordered_remove (i);
a70d6342
IR
2156 }
2157 else
1a4b99c1
RB
2158 {
2159 /* Compute the costs of the SLP instance. */
2160 vect_analyze_slp_cost (instance, data);
2161 i++;
2162 }
b8698a0f
L
2163 }
2164
9771b263 2165 if (!slp_instances.length ())
a70d6342
IR
2166 return false;
2167
2168 return true;
2169}
2170
6eddf228
RB
2171
2172/* Compute the scalar cost of the SLP node NODE and its children
2173 and return it. Do not account defs that are marked in LIFE and
2174 update LIFE according to uses of NODE. */
2175
2176static unsigned
292cba13 2177vect_bb_slp_scalar_cost (basic_block bb,
ff4c81cc 2178 slp_tree node, vec<bool, va_heap> *life)
6eddf228
RB
2179{
2180 unsigned scalar_cost = 0;
2181 unsigned i;
355fe088 2182 gimple *stmt;
6eddf228
RB
2183 slp_tree child;
2184
2185 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2186 {
2187 unsigned stmt_cost;
2188 ssa_op_iter op_iter;
2189 def_operand_p def_p;
2190 stmt_vec_info stmt_info;
2191
ff4c81cc 2192 if ((*life)[i])
6eddf228
RB
2193 continue;
2194
2195 /* If there is a non-vectorized use of the defs then the scalar
2196 stmt is kept live in which case we do not account it or any
2197 required defs in the SLP children in the scalar cost. This
2198 way we make the vectorization more costly when compared to
2199 the scalar cost. */
2200 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2201 {
2202 imm_use_iterator use_iter;
355fe088 2203 gimple *use_stmt;
6eddf228 2204 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5 2205 if (!is_gimple_debug (use_stmt)
61d371eb
RB
2206 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2207 use_stmt)
f30a0ba5 2208 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
6eddf228 2209 {
ff4c81cc 2210 (*life)[i] = true;
6eddf228
RB
2211 BREAK_FROM_IMM_USE_STMT (use_iter);
2212 }
2213 }
ff4c81cc 2214 if ((*life)[i])
6eddf228
RB
2215 continue;
2216
2217 stmt_info = vinfo_for_stmt (stmt);
2218 if (STMT_VINFO_DATA_REF (stmt_info))
2219 {
2220 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2221 stmt_cost = vect_get_stmt_cost (scalar_load);
2222 else
2223 stmt_cost = vect_get_stmt_cost (scalar_store);
2224 }
2225 else
2226 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2227
2228 scalar_cost += stmt_cost;
2229 }
2230
2231 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
90dd6e3d
RB
2232 if (child)
2233 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
6eddf228
RB
2234
2235 return scalar_cost;
2236}
2237
69f11a13
IR
2238/* Check if vectorization of the basic block is profitable. */
2239
2240static bool
2241vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2242{
9771b263 2243 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2244 slp_instance instance;
1a4b99c1 2245 int i;
c3e7ee41 2246 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2247 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2248
2249 /* Calculate scalar cost. */
6eddf228 2250 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2251 {
00f96dc9 2252 auto_vec<bool, 20> life;
ff4c81cc 2253 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
292cba13
RB
2254 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2255 SLP_INSTANCE_TREE (instance),
ff4c81cc 2256 &life);
69f11a13
IR
2257 }
2258
c3e7ee41 2259 /* Complete the target-specific cost calculation. */
92345349
BS
2260 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2261 &vec_inside_cost, &vec_epilogue_cost);
2262
2263 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2264
73fbfcad 2265 if (dump_enabled_p ())
69f11a13 2266 {
78c60e3d
SS
2267 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2268 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2269 vec_inside_cost);
2270 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2271 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2272 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2273 }
2274
a6524bba
RB
2275 /* Vectorization is profitable if its cost is more than the cost of scalar
2276 version. Note that we err on the vector side for equal cost because
2277 the cost estimate is otherwise quite pessimistic (constant uses are
2278 free on the scalar side but cost a load on the vector side for
2279 example). */
2280 if (vec_outside_cost + vec_inside_cost > scalar_cost)
69f11a13
IR
2281 return false;
2282
2283 return true;
2284}
2285
a5b50aa1
RB
2286/* Check if the basic block can be vectorized. Returns a bb_vec_info
2287 if so and sets fatal to true if failure is independent of
2288 current_vector_size. */
a70d6342 2289
8e19f5a1 2290static bb_vec_info
61d371eb
RB
2291vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2292 gimple_stmt_iterator region_end,
a5b50aa1
RB
2293 vec<data_reference_p> datarefs, int n_stmts,
2294 bool &fatal)
a70d6342
IR
2295{
2296 bb_vec_info bb_vinfo;
9771b263 2297 vec<slp_instance> slp_instances;
a70d6342 2298 slp_instance instance;
8e19f5a1 2299 int i;
777e1f09 2300 int min_vf = 2;
e4a707c4 2301
a5b50aa1
RB
2302 /* The first group of checks is independent of the vector size. */
2303 fatal = true;
2304
61d371eb
RB
2305 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2306 {
2307 if (dump_enabled_p ())
2308 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2309 "not vectorized: too many instructions in "
2310 "basic block.\n");
2311 free_data_refs (datarefs);
2312 return NULL;
2313 }
2314
2315 bb_vinfo = new_bb_vec_info (region_begin, region_end);
a70d6342
IR
2316 if (!bb_vinfo)
2317 return NULL;
2318
61d371eb 2319 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
428db0ba
RB
2320
2321 /* Analyze the data references. */
2322
2323 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
a70d6342 2324 {
73fbfcad 2325 if (dump_enabled_p ())
78c60e3d
SS
2326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2327 "not vectorized: unhandled data-ref in basic "
2328 "block.\n");
b8698a0f 2329
a70d6342
IR
2330 destroy_bb_vec_info (bb_vinfo);
2331 return NULL;
2332 }
2333
fcac74a1 2334 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2335 {
73fbfcad 2336 if (dump_enabled_p ())
78c60e3d
SS
2337 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2338 "not vectorized: not enough data-refs in "
2339 "basic block.\n");
a70d6342
IR
2340
2341 destroy_bb_vec_info (bb_vinfo);
2342 return NULL;
2343 }
2344
310213d4 2345 if (!vect_analyze_data_ref_accesses (bb_vinfo))
5abe1e05
RB
2346 {
2347 if (dump_enabled_p ())
2348 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2349 "not vectorized: unhandled data access in "
2350 "basic block.\n");
2351
2352 destroy_bb_vec_info (bb_vinfo);
2353 return NULL;
2354 }
2355
a5b50aa1
RB
2356 /* If there are no grouped stores in the region there is no need
2357 to continue with pattern recog as vect_analyze_slp will fail
2358 anyway. */
2359 if (bb_vinfo->grouped_stores.is_empty ())
a70d6342 2360 {
73fbfcad 2361 if (dump_enabled_p ())
a5b50aa1
RB
2362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2363 "not vectorized: no grouped stores in "
2364 "basic block.\n");
b8698a0f 2365
a70d6342
IR
2366 destroy_bb_vec_info (bb_vinfo);
2367 return NULL;
2368 }
b8698a0f 2369
a5b50aa1
RB
2370 /* While the rest of the analysis below depends on it in some way. */
2371 fatal = false;
2372
2373 vect_pattern_recog (bb_vinfo);
2374
a70d6342
IR
2375 /* Check the SLP opportunities in the basic block, analyze and build SLP
2376 trees. */
310213d4 2377 if (!vect_analyze_slp (bb_vinfo, n_stmts))
a70d6342 2378 {
73fbfcad 2379 if (dump_enabled_p ())
effb52da
RB
2380 {
2381 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2382 "Failed to SLP the basic block.\n");
2383 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2384 "not vectorized: failed to find SLP opportunities "
2385 "in basic block.\n");
2386 }
a70d6342
IR
2387
2388 destroy_bb_vec_info (bb_vinfo);
2389 return NULL;
2390 }
b8698a0f 2391
a5b50aa1
RB
2392 /* Analyze and verify the alignment of data references in the SLP
2393 instances. */
2394 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2395 {
2396 if (! vect_slp_analyze_and_verify_instance_alignment (instance))
2397 {
2398 dump_printf_loc (MSG_NOTE, vect_location,
2399 "removing SLP instance operations starting from: ");
2400 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2401 SLP_TREE_SCALAR_STMTS
2402 (SLP_INSTANCE_TREE (instance))[0], 0);
2403 vect_free_slp_instance (instance);
2404 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2405 continue;
2406 }
2407 i++;
2408 }
2409
2410 if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
2411 {
2412 destroy_bb_vec_info (bb_vinfo);
2413 return NULL;
2414 }
2415
a70d6342
IR
2416 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2417
2418 /* Mark all the statements that we want to vectorize as pure SLP and
2419 relevant. */
9771b263 2420 FOR_EACH_VEC_ELT (slp_instances, i, instance)
a70d6342
IR
2421 {
2422 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2423 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 2424 }
a70d6342 2425
5e6667b2 2426 /* Mark all the statements that we do not want to vectorize. */
61d371eb
RB
2427 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2428 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
5e6667b2
RB
2429 {
2430 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2431 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2432 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2433 }
2434
2435 /* Analyze dependences. At this point all stmts not participating in
2436 vectorization have to be marked. Dependence analysis assumes
2437 that we either vectorize all SLP instances or none at all. */
a5b50aa1 2438 if (! vect_slp_analyze_data_ref_dependences (bb_vinfo))
38eec4c6 2439 {
73fbfcad 2440 if (dump_enabled_p ())
a5b50aa1
RB
2441 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2442 "not vectorized: unhandled data dependence "
2443 "in basic block.\n");
2444
38eec4c6
UW
2445 destroy_bb_vec_info (bb_vinfo);
2446 return NULL;
2447 }
2448
1a4b99c1
RB
2449 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2450 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
a70d6342 2451 {
73fbfcad 2452 if (dump_enabled_p ())
e645e942 2453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2454 "not vectorized: bad operation in basic block.\n");
a70d6342
IR
2455
2456 destroy_bb_vec_info (bb_vinfo);
2457 return NULL;
2458 }
2459
69f11a13 2460 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2461 if (!unlimited_cost_model (NULL)
69f11a13
IR
2462 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2463 {
73fbfcad 2464 if (dump_enabled_p ())
78c60e3d
SS
2465 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2466 "not vectorized: vectorization is not "
2467 "profitable.\n");
69f11a13
IR
2468
2469 destroy_bb_vec_info (bb_vinfo);
2470 return NULL;
2471 }
2472
73fbfcad 2473 if (dump_enabled_p ())
78c60e3d
SS
2474 dump_printf_loc (MSG_NOTE, vect_location,
2475 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2476
2477 return bb_vinfo;
2478}
2479
2480
428db0ba
RB
2481/* Main entry for the BB vectorizer. Analyze and transform BB, returns
2482 true if anything in the basic-block was vectorized. */
2483
2484bool
2485vect_slp_bb (basic_block bb)
8e19f5a1
IR
2486{
2487 bb_vec_info bb_vinfo;
8e19f5a1
IR
2488 gimple_stmt_iterator gsi;
2489 unsigned int vector_sizes;
61d371eb 2490 bool any_vectorized = false;
8e19f5a1 2491
73fbfcad 2492 if (dump_enabled_p ())
78c60e3d 2493 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1 2494
8e19f5a1
IR
2495 /* Autodetect first vector size we try. */
2496 current_vector_size = 0;
2497 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2498
61d371eb
RB
2499 gsi = gsi_start_bb (bb);
2500
8e19f5a1
IR
2501 while (1)
2502 {
61d371eb
RB
2503 if (gsi_end_p (gsi))
2504 break;
2505
2506 gimple_stmt_iterator region_begin = gsi;
2507 vec<data_reference_p> datarefs = vNULL;
2508 int insns = 0;
2509
2510 for (; !gsi_end_p (gsi); gsi_next (&gsi))
428db0ba 2511 {
61d371eb
RB
2512 gimple *stmt = gsi_stmt (gsi);
2513 if (is_gimple_debug (stmt))
2514 continue;
2515 insns++;
2516
2517 if (gimple_location (stmt) != UNKNOWN_LOCATION)
2518 vect_location = gimple_location (stmt);
2519
2520 if (!find_data_references_in_stmt (NULL, stmt, &datarefs))
2521 break;
2522 }
2523
2524 /* Skip leading unhandled stmts. */
2525 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
2526 {
2527 gsi_next (&gsi);
2528 continue;
2529 }
428db0ba 2530
61d371eb
RB
2531 gimple_stmt_iterator region_end = gsi;
2532
2533 bool vectorized = false;
a5b50aa1 2534 bool fatal = false;
61d371eb 2535 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
a5b50aa1 2536 datarefs, insns, fatal);
61d371eb
RB
2537 if (bb_vinfo
2538 && dbg_cnt (vect_slp))
2539 {
428db0ba 2540 if (dump_enabled_p ())
61d371eb 2541 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
428db0ba
RB
2542
2543 vect_schedule_slp (bb_vinfo);
2544
2545 if (dump_enabled_p ())
2546 dump_printf_loc (MSG_NOTE, vect_location,
61d371eb 2547 "basic block part vectorized\n");
428db0ba
RB
2548
2549 destroy_bb_vec_info (bb_vinfo);
2550
61d371eb 2551 vectorized = true;
428db0ba 2552 }
61d371eb
RB
2553 else
2554 destroy_bb_vec_info (bb_vinfo);
8e19f5a1 2555
61d371eb 2556 any_vectorized |= vectorized;
8e19f5a1
IR
2557
2558 vector_sizes &= ~current_vector_size;
61d371eb
RB
2559 if (vectorized
2560 || vector_sizes == 0
a5b50aa1
RB
2561 || current_vector_size == 0
2562 /* If vect_slp_analyze_bb_1 signaled that analysis for all
2563 vector sizes will fail do not bother iterating. */
2564 || fatal)
61d371eb
RB
2565 {
2566 if (gsi_end_p (region_end))
2567 break;
8e19f5a1 2568
61d371eb
RB
2569 /* Skip the unhandled stmt. */
2570 gsi_next (&gsi);
2571
2572 /* And reset vector sizes. */
2573 current_vector_size = 0;
2574 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2575 }
2576 else
2577 {
2578 /* Try the next biggest vector size. */
2579 current_vector_size = 1 << floor_log2 (vector_sizes);
2580 if (dump_enabled_p ())
2581 dump_printf_loc (MSG_NOTE, vect_location,
2582 "***** Re-trying analysis with "
2583 "vector size %d\n", current_vector_size);
2584
2585 /* Start over. */
2586 gsi = region_begin;
2587 }
8e19f5a1 2588 }
61d371eb
RB
2589
2590 return any_vectorized;
8e19f5a1
IR
2591}
2592
2593
b8698a0f
L
2594/* For constant and loop invariant defs of SLP_NODE this function returns
2595 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
2596 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2597 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
2598 REDUC_INDEX is the index of the reduction operand in the statements, unless
2599 it is -1. */
ebfd146a
IR
2600
2601static void
9dc3f7de 2602vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 2603 vec<tree> *vec_oprnds,
b5aeb3bb
IR
2604 unsigned int op_num, unsigned int number_of_vectors,
2605 int reduc_index)
ebfd146a 2606{
355fe088
TS
2607 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2608 gimple *stmt = stmts[0];
ebfd146a 2609 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 2610 unsigned nunits;
ebfd146a 2611 tree vec_cst;
d2a12ae7
RG
2612 tree *elts;
2613 unsigned j, number_of_places_left_in_vector;
ebfd146a 2614 tree vector_type;
9dc3f7de 2615 tree vop;
9771b263 2616 int group_size = stmts.length ();
ebfd146a 2617 unsigned int vec_num, i;
d2a12ae7 2618 unsigned number_of_copies = 1;
9771b263
DN
2619 vec<tree> voprnds;
2620 voprnds.create (number_of_vectors);
ebfd146a 2621 bool constant_p, is_store;
b5aeb3bb 2622 tree neutral_op = NULL;
bac430c9 2623 enum tree_code code = gimple_expr_code (stmt);
355fe088 2624 gimple *def_stmt;
0e93a64e 2625 struct loop *loop;
13396b6e 2626 gimple_seq ctor_seq = NULL;
b5aeb3bb 2627
42fd8198
IE
2628 /* Check if vector type is a boolean vector. */
2629 if (TREE_CODE (TREE_TYPE (op)) == BOOLEAN_TYPE
2630 && (VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo))
2631 || (code == COND_EXPR && op_num < 2)))
2632 vector_type
2633 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
2634 else
2635 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
afbe6325
RB
2636 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2637
29ed4920
IR
2638 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2639 && reduc_index != -1)
b5aeb3bb 2640 {
afbe6325
RB
2641 op_num = reduc_index;
2642 op = gimple_op (stmt, op_num + 1);
b5aeb3bb 2643 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 2644 we need either neutral operands or the original operands. See
b5aeb3bb
IR
2645 get_initial_def_for_reduction() for details. */
2646 switch (code)
2647 {
2648 case WIDEN_SUM_EXPR:
2649 case DOT_PROD_EXPR:
afbe6325 2650 case SAD_EXPR:
b5aeb3bb
IR
2651 case PLUS_EXPR:
2652 case MINUS_EXPR:
2653 case BIT_IOR_EXPR:
2654 case BIT_XOR_EXPR:
2655 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2656 neutral_op = build_real (TREE_TYPE (op), dconst0);
2657 else
2658 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2659
2660 break;
2661
2662 case MULT_EXPR:
b5aeb3bb
IR
2663 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2664 neutral_op = build_real (TREE_TYPE (op), dconst1);
2665 else
2666 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2667
2668 break;
2669
c1e822d5
IR
2670 case BIT_AND_EXPR:
2671 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2672 break;
2673
f1485e5b
RB
2674 /* For MIN/MAX we don't have an easy neutral operand but
2675 the initial values can be used fine here. Only for
2676 a reduction chain we have to force a neutral element. */
2677 case MAX_EXPR:
2678 case MIN_EXPR:
2679 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2680 neutral_op = NULL;
2681 else
2682 {
2683 def_stmt = SSA_NAME_DEF_STMT (op);
2684 loop = (gimple_bb (stmt))->loop_father;
2685 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2686 loop_preheader_edge (loop));
2687 }
2688 break;
0e93a64e 2689
b5aeb3bb 2690 default:
afbe6325 2691 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
0e93a64e 2692 neutral_op = NULL;
b5aeb3bb
IR
2693 }
2694 }
ebfd146a
IR
2695
2696 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2697 {
2698 is_store = true;
2699 op = gimple_assign_rhs1 (stmt);
2700 }
2701 else
9dc3f7de
IR
2702 is_store = false;
2703
2704 gcc_assert (op);
ebfd146a
IR
2705
2706 if (CONSTANT_CLASS_P (op))
d59dc888 2707 constant_p = true;
ebfd146a 2708 else
d59dc888
IR
2709 constant_p = false;
2710
ebfd146a 2711 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 2712 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
2713
2714 For example, we have two scalar operands, s1 and s2 (e.g., group of
2715 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
2716 of this type can be packed in a vector). The output vector will contain
2717 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
2718 will be 2).
2719
b8698a0f 2720 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
2721 containing the operands.
2722
2723 For example, NUNITS is four as before, and the group size is 8
f7e531cf 2724 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 2725 {s5, s6, s7, s8}. */
b8698a0f 2726
14a61437 2727 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
2728
2729 number_of_places_left_in_vector = nunits;
d2a12ae7 2730 elts = XALLOCAVEC (tree, nunits);
90dd6e3d 2731 bool place_after_defs = false;
ebfd146a
IR
2732 for (j = 0; j < number_of_copies; j++)
2733 {
9771b263 2734 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
2735 {
2736 if (is_store)
2737 op = gimple_assign_rhs1 (stmt);
bac430c9 2738 else
f7e531cf 2739 {
bac430c9 2740 switch (code)
f7e531cf 2741 {
bac430c9
IR
2742 case COND_EXPR:
2743 if (op_num == 0 || op_num == 1)
2744 {
2745 tree cond = gimple_assign_rhs1 (stmt);
2746 op = TREE_OPERAND (cond, op_num);
2747 }
2748 else
2749 {
2750 if (op_num == 2)
2751 op = gimple_assign_rhs2 (stmt);
2752 else
2753 op = gimple_assign_rhs3 (stmt);
2754 }
2755 break;
2756
2757 case CALL_EXPR:
2758 op = gimple_call_arg (stmt, op_num);
2759 break;
2760
b84b294a
JJ
2761 case LSHIFT_EXPR:
2762 case RSHIFT_EXPR:
2763 case LROTATE_EXPR:
2764 case RROTATE_EXPR:
2765 op = gimple_op (stmt, op_num + 1);
2766 /* Unlike the other binary operators, shifts/rotates have
2767 the shift count being int, instead of the same type as
2768 the lhs, so make sure the scalar is the right type if
2769 we are dealing with vectors of
2770 long long/long/short/char. */
793d9a16 2771 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
2772 op = fold_convert (TREE_TYPE (vector_type), op);
2773 break;
2774
bac430c9
IR
2775 default:
2776 op = gimple_op (stmt, op_num + 1);
b84b294a 2777 break;
f7e531cf
IR
2778 }
2779 }
b8698a0f 2780
b5aeb3bb
IR
2781 if (reduc_index != -1)
2782 {
0e93a64e
IR
2783 loop = (gimple_bb (stmt))->loop_father;
2784 def_stmt = SSA_NAME_DEF_STMT (op);
b5aeb3bb
IR
2785
2786 gcc_assert (loop);
b010117a
IR
2787
2788 /* Get the def before the loop. In reduction chain we have only
2789 one initial value. */
2790 if ((j != (number_of_copies - 1)
2791 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2792 && i != 0))
2793 && neutral_op)
b5aeb3bb 2794 op = neutral_op;
b010117a
IR
2795 else
2796 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2797 loop_preheader_edge (loop));
b5aeb3bb
IR
2798 }
2799
ebfd146a 2800 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 2801 number_of_places_left_in_vector--;
90dd6e3d 2802 tree orig_op = op;
13396b6e 2803 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 2804 {
793d9a16 2805 if (CONSTANT_CLASS_P (op))
13396b6e 2806 {
42fd8198
IE
2807 if (VECTOR_BOOLEAN_TYPE_P (vector_type))
2808 {
2809 /* Can't use VIEW_CONVERT_EXPR for booleans because
2810 of possibly different sizes of scalar value and
2811 vector element. */
2812 if (integer_zerop (op))
2813 op = build_int_cst (TREE_TYPE (vector_type), 0);
2814 else if (integer_onep (op))
2815 op = build_int_cst (TREE_TYPE (vector_type), 1);
2816 else
2817 gcc_unreachable ();
2818 }
2819 else
2820 op = fold_unary (VIEW_CONVERT_EXPR,
2821 TREE_TYPE (vector_type), op);
13396b6e
JJ
2822 gcc_assert (op && CONSTANT_CLASS_P (op));
2823 }
2824 else
2825 {
b731b390 2826 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
355fe088 2827 gimple *init_stmt;
0d0e4a03 2828 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
13396b6e 2829 init_stmt
0d0e4a03 2830 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
13396b6e
JJ
2831 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2832 op = new_temp;
2833 }
50eeef09 2834 }
d2a12ae7 2835 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
2836 if (!CONSTANT_CLASS_P (op))
2837 constant_p = false;
90dd6e3d
RB
2838 if (TREE_CODE (orig_op) == SSA_NAME
2839 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2840 && STMT_VINFO_BB_VINFO (stmt_vinfo)
2841 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2842 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2843 place_after_defs = true;
ebfd146a
IR
2844
2845 if (number_of_places_left_in_vector == 0)
2846 {
2847 number_of_places_left_in_vector = nunits;
2848
2849 if (constant_p)
d2a12ae7 2850 vec_cst = build_vector (vector_type, elts);
ebfd146a 2851 else
d2a12ae7 2852 {
9771b263 2853 vec<constructor_elt, va_gc> *v;
d2a12ae7 2854 unsigned k;
9771b263 2855 vec_alloc (v, nunits);
d2a12ae7
RG
2856 for (k = 0; k < nunits; ++k)
2857 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2858 vec_cst = build_constructor (vector_type, v);
2859 }
90dd6e3d
RB
2860 tree init;
2861 gimple_stmt_iterator gsi;
2862 if (place_after_defs)
2863 {
2864 gsi = gsi_for_stmt
2865 (vect_find_last_scalar_stmt_in_slp (slp_node));
2866 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2867 }
2868 else
2869 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
2870 if (ctor_seq != NULL)
2871 {
90dd6e3d 2872 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
13396b6e
JJ
2873 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2874 GSI_SAME_STMT);
2875 ctor_seq = NULL;
2876 }
90dd6e3d
RB
2877 voprnds.quick_push (init);
2878 place_after_defs = false;
ebfd146a
IR
2879 }
2880 }
2881 }
2882
b8698a0f 2883 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 2884 them. */
9771b263 2885 vec_num = voprnds.length ();
d2a12ae7 2886 for (j = vec_num; j != 0; j--)
ebfd146a 2887 {
9771b263
DN
2888 vop = voprnds[j - 1];
2889 vec_oprnds->quick_push (vop);
ebfd146a
IR
2890 }
2891
9771b263 2892 voprnds.release ();
ebfd146a
IR
2893
2894 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
2895 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2896 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 2897 to replicate the vectors. */
9771b263 2898 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 2899 {
b5aeb3bb
IR
2900 tree neutral_vec = NULL;
2901
2902 if (neutral_op)
2903 {
2904 if (!neutral_vec)
b9acc9f1 2905 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 2906
9771b263 2907 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
2908 }
2909 else
2910 {
9771b263
DN
2911 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2912 vec_oprnds->quick_push (vop);
b5aeb3bb 2913 }
ebfd146a
IR
2914 }
2915}
2916
2917
2918/* Get vectorized definitions from SLP_NODE that contains corresponding
2919 vectorized def-stmts. */
2920
2921static void
9771b263 2922vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
2923{
2924 tree vec_oprnd;
355fe088 2925 gimple *vec_def_stmt;
ebfd146a
IR
2926 unsigned int i;
2927
9771b263 2928 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 2929
9771b263 2930 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
2931 {
2932 gcc_assert (vec_def_stmt);
2933 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 2934 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
2935 }
2936}
2937
2938
b8698a0f
L
2939/* Get vectorized definitions for SLP_NODE.
2940 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
2941 call vect_get_constant_vectors() to create vector stmts.
2942 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
2943 must be stored in the corresponding child of SLP_NODE, and we call
2944 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 2945
ebfd146a 2946void
9771b263 2947vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
37b5ec8f 2948 vec<vec<tree> > *vec_oprnds, int reduc_index)
ebfd146a 2949{
355fe088 2950 gimple *first_stmt;
d092494c
IR
2951 int number_of_vects = 0, i;
2952 unsigned int child_index = 0;
b8698a0f 2953 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 2954 slp_tree child = NULL;
37b5ec8f 2955 vec<tree> vec_defs;
e44978dc 2956 tree oprnd;
d092494c 2957 bool vectorized_defs;
ebfd146a 2958
9771b263
DN
2959 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2960 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 2961 {
d092494c
IR
2962 /* For each operand we check if it has vectorized definitions in a child
2963 node or we need to create them (for invariants and constants). We
2964 check if the LHS of the first stmt of the next child matches OPRND.
2965 If it does, we found the correct child. Otherwise, we call
2966 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2967 to check this child node for the next operand. */
2968 vectorized_defs = false;
9771b263 2969 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 2970 {
01d8bf07 2971 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 2972
e44978dc 2973 /* We have to check both pattern and original def, if available. */
90dd6e3d 2974 if (child)
e44978dc 2975 {
355fe088
TS
2976 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2977 gimple *related
90dd6e3d
RB
2978 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2979
2980 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2981 || (related
2982 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2983 {
2984 /* The number of vector defs is determined by the number of
2985 vector statements in the node from which we get those
2986 statements. */
2987 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2988 vectorized_defs = true;
2989 child_index++;
2990 }
e44978dc 2991 }
90dd6e3d
RB
2992 else
2993 child_index++;
d092494c 2994 }
ebfd146a 2995
d092494c
IR
2996 if (!vectorized_defs)
2997 {
2998 if (i == 0)
2999 {
3000 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3001 /* Number of vector stmts was calculated according to LHS in
3002 vect_schedule_slp_instance (), fix it by replacing LHS with
3003 RHS, if necessary. See vect_get_smallest_scalar_type () for
3004 details. */
3005 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
3006 &rhs_size_unit);
3007 if (rhs_size_unit != lhs_size_unit)
3008 {
3009 number_of_vects *= rhs_size_unit;
3010 number_of_vects /= lhs_size_unit;
3011 }
3012 }
3013 }
b5aeb3bb 3014
d092494c 3015 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
3016 vec_defs = vNULL;
3017 vec_defs.create (number_of_vects);
ebfd146a 3018
d092494c
IR
3019 /* For reduction defs we call vect_get_constant_vectors (), since we are
3020 looking for initial loop invariant values. */
3021 if (vectorized_defs && reduc_index == -1)
3022 /* The defs are already vectorized. */
37b5ec8f 3023 vect_get_slp_vect_defs (child, &vec_defs);
d092494c
IR
3024 else
3025 /* Build vectors from scalar defs. */
37b5ec8f 3026 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
d092494c 3027 number_of_vects, reduc_index);
ebfd146a 3028
37b5ec8f 3029 vec_oprnds->quick_push (vec_defs);
ebfd146a 3030
d092494c
IR
3031 /* For reductions, we only need initial values. */
3032 if (reduc_index != -1)
3033 return;
3034 }
ebfd146a
IR
3035}
3036
a70d6342 3037
b8698a0f 3038/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
3039 building a vector of type MASK_TYPE from it) and two input vectors placed in
3040 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
3041 shifting by STRIDE elements of DR_CHAIN for every copy.
3042 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 3043 copies).
ebfd146a
IR
3044 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
3045 the created stmts must be inserted. */
3046
3047static inline void
355fe088 3048vect_create_mask_and_perm (gimple *stmt,
faf63e39 3049 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f 3050 gimple_stmt_iterator *gsi, slp_tree node,
9771b263 3051 tree vectype, vec<tree> dr_chain,
ebfd146a
IR
3052 int ncopies, int vect_stmts_counter)
3053{
faf63e39 3054 tree perm_dest;
355fe088 3055 gimple *perm_stmt = NULL;
0f900dfa 3056 int i, stride;
ebfd146a 3057 tree first_vec, second_vec, data_ref;
ebfd146a 3058
ebfd146a 3059 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 3060
b8698a0f 3061 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 3062 stmts later. */
9771b263 3063 for (i = SLP_TREE_VEC_STMTS (node).length ();
ebfd146a 3064 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
9771b263 3065 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a
IR
3066
3067 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3068 for (i = 0; i < ncopies; i++)
3069 {
9771b263
DN
3070 first_vec = dr_chain[first_vec_indx];
3071 second_vec = dr_chain[second_vec_indx];
ebfd146a 3072
ebfd146a 3073 /* Generate the permute statement. */
0d0e4a03
JJ
3074 perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3075 first_vec, second_vec, mask);
ebfd146a 3076 data_ref = make_ssa_name (perm_dest, perm_stmt);
2635892a 3077 gimple_set_lhs (perm_stmt, data_ref);
ebfd146a 3078 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 3079
b8698a0f 3080 /* Store the vector statement in NODE. */
9771b263 3081 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
ebfd146a
IR
3082
3083 first_vec_indx += stride;
3084 second_vec_indx += stride;
3085 }
ebfd146a
IR
3086}
3087
3088
b8698a0f 3089/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 3090 return in CURRENT_MASK_ELEMENT its equivalent in target specific
ff802fa1 3091 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
3092 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3093 the next vector, i.e., the current first vector is not needed. */
b8698a0f 3094
ebfd146a 3095static bool
355fe088 3096vect_get_mask_element (gimple *stmt, int first_mask_element, int m,
ebfd146a 3097 int mask_nunits, bool only_one_vec, int index,
22e4dee7 3098 unsigned char *mask, int *current_mask_element,
694a4f61
IR
3099 bool *need_next_vector, int *number_of_mask_fixes,
3100 bool *mask_fixed, bool *needs_first_vector)
ebfd146a
IR
3101{
3102 int i;
ebfd146a
IR
3103
3104 /* Convert to target specific representation. */
3105 *current_mask_element = first_mask_element + m;
3106 /* Adjust the value in case it's a mask for second and third vectors. */
694a4f61 3107 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
ebfd146a 3108
c8047699
RB
3109 if (*current_mask_element < 0)
3110 {
3111 if (dump_enabled_p ())
3112 {
3113 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3114 "permutation requires past vector ");
3115 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3116 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3117 }
3118 return false;
3119 }
3120
ebfd146a 3121 if (*current_mask_element < mask_nunits)
694a4f61 3122 *needs_first_vector = true;
ebfd146a
IR
3123
3124 /* We have only one input vector to permute but the mask accesses values in
3125 the next vector as well. */
3126 if (only_one_vec && *current_mask_element >= mask_nunits)
3127 {
73fbfcad 3128 if (dump_enabled_p ())
ebfd146a 3129 {
e645e942 3130 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
3131 "permutation requires at least two vectors ");
3132 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3133 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
3134 }
3135
3136 return false;
3137 }
3138
3139 /* The mask requires the next vector. */
496d3346 3140 while (*current_mask_element >= mask_nunits * 2)
ebfd146a 3141 {
694a4f61 3142 if (*needs_first_vector || *mask_fixed)
ebfd146a
IR
3143 {
3144 /* We either need the first vector too or have already moved to the
b8698a0f 3145 next vector. In both cases, this permutation needs three
ebfd146a 3146 vectors. */
73fbfcad 3147 if (dump_enabled_p ())
ebfd146a 3148 {
78c60e3d
SS
3149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3150 "permutation requires at "
3151 "least three vectors ");
3152 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3153 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
3154 }
3155
3156 return false;
3157 }
3158
3159 /* We move to the next vector, dropping the first one and working with
3160 the second and the third - we need to adjust the values of the mask
3161 accordingly. */
694a4f61 3162 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
ebfd146a
IR
3163
3164 for (i = 0; i < index; i++)
694a4f61 3165 mask[i] -= mask_nunits * *number_of_mask_fixes;
ebfd146a 3166
694a4f61
IR
3167 (*number_of_mask_fixes)++;
3168 *mask_fixed = true;
ebfd146a
IR
3169 }
3170
694a4f61 3171 *need_next_vector = *mask_fixed;
ebfd146a
IR
3172
3173 /* This was the last element of this mask. Start a new one. */
3174 if (index == mask_nunits - 1)
3175 {
694a4f61
IR
3176 *number_of_mask_fixes = 1;
3177 *mask_fixed = false;
3178 *needs_first_vector = false;
ebfd146a
IR
3179 }
3180
3181 return true;
3182}
3183
3184
3185/* Generate vector permute statements from a list of loads in DR_CHAIN.
3186 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3187 permute statements for the SLP node NODE of the SLP instance
3188 SLP_NODE_INSTANCE. */
3189
ebfd146a 3190bool
01d8bf07 3191vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
ebfd146a
IR
3192 gimple_stmt_iterator *gsi, int vf,
3193 slp_instance slp_node_instance, bool analyze_only)
3194{
355fe088 3195 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3196 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3197 tree mask_element_type = NULL_TREE, mask_type;
502f0263 3198 int i, j, k, nunits, vec_index = 0;
2635892a 3199 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3200 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3201 int first_mask_element;
22e4dee7
RH
3202 int index, unroll_factor, current_mask_element, ncopies;
3203 unsigned char *mask;
ebfd146a
IR
3204 bool only_one_vec = false, need_next_vector = false;
3205 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
694a4f61
IR
3206 int number_of_mask_fixes = 1;
3207 bool mask_fixed = false;
3208 bool needs_first_vector = false;
ef4bddc2 3209 machine_mode mode;
ebfd146a 3210
91ff1504
RB
3211 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3212 return false;
3213
3214 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3215
22e4dee7
RH
3216 mode = TYPE_MODE (vectype);
3217
3218 if (!can_vec_perm_p (mode, false, NULL))
ebfd146a 3219 {
73fbfcad 3220 if (dump_enabled_p ())
ebfd146a 3221 {
78c60e3d
SS
3222 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3223 "no vect permute for ");
3224 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3225 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 3226 }
2635892a 3227 return false;
ebfd146a
IR
3228 }
3229
2635892a
RH
3230 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3231 same size as the vector element being permuted. */
96f9265a
RG
3232 mask_element_type = lang_hooks.types.type_for_mode
3233 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
ebfd146a 3234 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 3235 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 3236 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a
IR
3237 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3238
3239 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3240 unrolling factor. */
91ff1504
RB
3241 orig_vec_stmts_num
3242 = (STMT_VINFO_GROUP_SIZE (stmt_info)
3243 * SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance)
3244 + nunits - 1) / nunits;
ebfd146a
IR
3245 if (orig_vec_stmts_num == 1)
3246 only_one_vec = true;
3247
b8698a0f 3248 /* Number of copies is determined by the final vectorization factor
ebfd146a 3249 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 3250 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 3251
b8698a0f
L
3252 /* Generate permutation masks for every NODE. Number of masks for each NODE
3253 is equal to GROUP_SIZE.
3254 E.g., we have a group of three nodes with three loads from the same
3255 location in each node, and the vector size is 4. I.e., we have a
3256 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3257 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3258 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3259 ...
3260
2635892a 3261 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3262 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3263 operation, and the mask element values can't be outside that range.
3264 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3265 For the first two permutations we need the first and the second input
ebfd146a 3266 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3267 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3268 {c2,a3,b3,c3}. */
3269
21c0a521 3270 {
ebfd146a
IR
3271 index = 0;
3272 vect_stmts_counter = 0;
3273 vec_index = 0;
3274 first_vec_index = vec_index++;
3275 if (only_one_vec)
3276 second_vec_index = first_vec_index;
3277 else
3278 second_vec_index = vec_index++;
3279
3280 for (j = 0; j < unroll_factor; j++)
3281 {
3282 for (k = 0; k < group_size; k++)
3283 {
01d8bf07 3284 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
9b999e8c 3285 first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info);
2635892a
RH
3286 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3287 nunits, only_one_vec, index,
3288 mask, &current_mask_element,
3289 &need_next_vector,
3290 &number_of_mask_fixes, &mask_fixed,
3291 &needs_first_vector))
3292 return false;
c8047699
RB
3293 gcc_assert (current_mask_element >= 0
3294 && current_mask_element < 2 * nunits);
2635892a 3295 mask[index++] = current_mask_element;
ebfd146a 3296
2635892a 3297 if (index == nunits)
ebfd146a 3298 {
01d8bf07 3299 index = 0;
22e4dee7
RH
3300 if (!can_vec_perm_p (mode, false, mask))
3301 {
73fbfcad 3302 if (dump_enabled_p ())
22e4dee7 3303 {
78c60e3d
SS
3304 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3305 vect_location,
3306 "unsupported vect permute { ");
22e4dee7 3307 for (i = 0; i < nunits; ++i)
78c60e3d
SS
3308 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3309 mask[i]);
3310 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7
RH
3311 }
3312 return false;
3313 }
3314
ebfd146a
IR
3315 if (!analyze_only)
3316 {
01d8bf07
RB
3317 int l;
3318 tree mask_vec, *mask_elts;
3319 mask_elts = XALLOCAVEC (tree, nunits);
3320 for (l = 0; l < nunits; ++l)
3321 mask_elts[l] = build_int_cst (mask_element_type,
3322 mask[l]);
3323 mask_vec = build_vector (mask_type, mask_elts);
3324
3325 if (need_next_vector)
ebfd146a
IR
3326 {
3327 first_vec_index = second_vec_index;
3328 second_vec_index = vec_index;
3329 }
3330
502f0263 3331 vect_create_mask_and_perm (stmt,
faf63e39 3332 mask_vec, first_vec_index, second_vec_index,
2635892a 3333 gsi, node, vectype, dr_chain,
faf63e39 3334 ncopies, vect_stmts_counter++);
ebfd146a 3335 }
b8698a0f
L
3336 }
3337 }
3338 }
3339 }
ebfd146a 3340
ebfd146a
IR
3341 return true;
3342}
3343
3344
3345
3346/* Vectorize SLP instance tree in postorder. */
3347
3348static bool
3349vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 3350 unsigned int vectorization_factor)
ebfd146a 3351{
355fe088 3352 gimple *stmt;
0d0293ac 3353 bool grouped_store, is_store;
ebfd146a
IR
3354 gimple_stmt_iterator si;
3355 stmt_vec_info stmt_info;
3356 unsigned int vec_stmts_size, nunits, group_size;
3357 tree vectype;
3358 int i;
d755c7ef 3359 slp_tree child;
ebfd146a
IR
3360
3361 if (!node)
3362 return false;
3363
9771b263 3364 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3365 vect_schedule_slp_instance (child, instance, vectorization_factor);
b8698a0f 3366
9771b263 3367 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3368 stmt_info = vinfo_for_stmt (stmt);
3369
3370 /* VECTYPE is the type of the destination. */
b690cc0f 3371 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3372 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3373 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3374
3375 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 3376 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
3377 elements in one vector iteration is the number of scalar elements in
3378 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
14a61437
RB
3379 size.
3380 Unless this is a SLP reduction in which case the number of vector
3381 stmts is equal to the number of vector stmts of the children. */
3382 if (GROUP_FIRST_ELEMENT (stmt_info)
3383 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3384 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3385 else
3386 vec_stmts_size = (vectorization_factor * group_size) / nunits;
ebfd146a 3387
9771b263 3388 if (!SLP_TREE_VEC_STMTS (node).exists ())
ebfd146a 3389 {
9771b263 3390 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
ebfd146a
IR
3391 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3392 }
3393
73fbfcad 3394 if (dump_enabled_p ())
ebfd146a 3395 {
78c60e3d
SS
3396 dump_printf_loc (MSG_NOTE,vect_location,
3397 "------>vectorizing SLP node starting from: ");
3398 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 3399 dump_printf (MSG_NOTE, "\n");
b8698a0f 3400 }
ebfd146a 3401
2e8ab70c
RB
3402 /* Vectorized stmts go before the last scalar stmt which is where
3403 all uses are ready. */
3404 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3405
b010117a
IR
3406 /* Mark the first element of the reduction chain as reduction to properly
3407 transform the node. In the analysis phase only the last element of the
3408 chain is marked as reduction. */
0d0293ac 3409 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
3410 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3411 {
3412 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3413 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3414 }
3415
6876e5bc
RB
3416 /* Handle two-operation SLP nodes by vectorizing the group with
3417 both operations and then performing a merge. */
3418 if (SLP_TREE_TWO_OPERATORS (node))
3419 {
3420 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3421 enum tree_code ocode;
355fe088 3422 gimple *ostmt;
6876e5bc
RB
3423 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3424 bool allsame = true;
3425 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3426 if (gimple_assign_rhs_code (ostmt) != code0)
3427 {
3428 mask[i] = 1;
3429 allsame = false;
3430 ocode = gimple_assign_rhs_code (ostmt);
3431 }
3432 else
3433 mask[i] = 0;
3434 if (!allsame)
3435 {
355fe088
TS
3436 vec<gimple *> v0;
3437 vec<gimple *> v1;
6876e5bc
RB
3438 unsigned j;
3439 tree tmask = NULL_TREE;
3440 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3441 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3442 SLP_TREE_VEC_STMTS (node).truncate (0);
3443 gimple_assign_set_rhs_code (stmt, ocode);
3444 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3445 gimple_assign_set_rhs_code (stmt, code0);
3446 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3447 SLP_TREE_VEC_STMTS (node).truncate (0);
3448 tree meltype = build_nonstandard_integer_type
3449 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3450 tree mvectype = get_same_sized_vectype (meltype, vectype);
3451 unsigned k = 0, l;
3452 for (j = 0; j < v0.length (); ++j)
3453 {
3454 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3455 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3456 {
1ece8d4c 3457 if (k >= group_size)
6876e5bc
RB
3458 k = 0;
3459 melts[l] = build_int_cst
3460 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3461 }
3462 tmask = build_vector (mvectype, melts);
3463
3464 /* ??? Not all targets support a VEC_PERM_EXPR with a
3465 constant mask that would translate to a vec_merge RTX
3466 (with their vec_perm_const_ok). We can either not
3467 vectorize in that case or let veclower do its job.
3468 Unfortunately that isn't too great and at least for
3469 plus/minus we'd eventually like to match targets
3470 vector addsub instructions. */
355fe088 3471 gimple *vstmt;
6876e5bc
RB
3472 vstmt = gimple_build_assign (make_ssa_name (vectype),
3473 VEC_PERM_EXPR,
3474 gimple_assign_lhs (v0[j]),
3475 gimple_assign_lhs (v1[j]), tmask);
3476 vect_finish_stmt_generation (stmt, vstmt, &si);
3477 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3478 }
3479 v0.release ();
3480 v1.release ();
3481 return false;
3482 }
3483 }
0d0293ac 3484 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
b5aeb3bb 3485 return is_store;
ebfd146a
IR
3486}
3487
dd34c087
JJ
3488/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3489 For loop vectorization this is done in vectorizable_call, but for SLP
3490 it needs to be deferred until end of vect_schedule_slp, because multiple
3491 SLP instances may refer to the same scalar stmt. */
3492
3493static void
3494vect_remove_slp_scalar_calls (slp_tree node)
3495{
355fe088 3496 gimple *stmt, *new_stmt;
dd34c087
JJ
3497 gimple_stmt_iterator gsi;
3498 int i;
d755c7ef 3499 slp_tree child;
dd34c087
JJ
3500 tree lhs;
3501 stmt_vec_info stmt_info;
3502
3503 if (!node)
3504 return;
3505
9771b263 3506 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3507 vect_remove_slp_scalar_calls (child);
dd34c087 3508
9771b263 3509 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
3510 {
3511 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3512 continue;
3513 stmt_info = vinfo_for_stmt (stmt);
3514 if (stmt_info == NULL
3515 || is_pattern_stmt_p (stmt_info)
3516 || !PURE_SLP_STMT (stmt_info))
3517 continue;
3518 lhs = gimple_call_lhs (stmt);
3519 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3520 set_vinfo_for_stmt (new_stmt, stmt_info);
3521 set_vinfo_for_stmt (stmt, NULL);
3522 STMT_VINFO_STMT (stmt_info) = new_stmt;
3523 gsi = gsi_for_stmt (stmt);
3524 gsi_replace (&gsi, new_stmt, false);
3525 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3526 }
3527}
ebfd146a 3528
ff802fa1
IR
3529/* Generate vector code for all SLP instances in the loop/basic block. */
3530
ebfd146a 3531bool
310213d4 3532vect_schedule_slp (vec_info *vinfo)
ebfd146a 3533{
9771b263 3534 vec<slp_instance> slp_instances;
ebfd146a 3535 slp_instance instance;
01d8bf07 3536 unsigned int i, vf;
ebfd146a
IR
3537 bool is_store = false;
3538
310213d4
RB
3539 slp_instances = vinfo->slp_instances;
3540 if (is_a <loop_vec_info> (vinfo))
3541 vf = as_a <loop_vec_info> (vinfo)->vectorization_factor;
a70d6342 3542 else
310213d4 3543 vf = 1;
a70d6342 3544
9771b263 3545 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3546 {
3547 /* Schedule the tree of INSTANCE. */
3548 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 3549 instance, vf);
73fbfcad 3550 if (dump_enabled_p ())
78c60e3d 3551 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3552 "vectorizing stmts using SLP.\n");
ebfd146a
IR
3553 }
3554
9771b263 3555 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3556 {
3557 slp_tree root = SLP_INSTANCE_TREE (instance);
355fe088 3558 gimple *store;
b5aeb3bb
IR
3559 unsigned int j;
3560 gimple_stmt_iterator gsi;
3561
c40eced0
RB
3562 /* Remove scalar call stmts. Do not do this for basic-block
3563 vectorization as not all uses may be vectorized.
3564 ??? Why should this be necessary? DCE should be able to
3565 remove the stmts itself.
3566 ??? For BB vectorization we can as well remove scalar
3567 stmts starting from the SLP tree root if they have no
3568 uses. */
310213d4 3569 if (is_a <loop_vec_info> (vinfo))
c40eced0 3570 vect_remove_slp_scalar_calls (root);
dd34c087 3571
9771b263 3572 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
3573 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3574 {
3575 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3576 break;
3577
a024e70e
IR
3578 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3579 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
3580 /* Free the attached stmt_vec_info and remove the stmt. */
3581 gsi = gsi_for_stmt (store);
3d3f2249 3582 unlink_stmt_vdef (store);
b5aeb3bb 3583 gsi_remove (&gsi, true);
3d3f2249 3584 release_defs (store);
b5aeb3bb
IR
3585 free_stmt_vec_info (store);
3586 }
3587 }
3588
ebfd146a
IR
3589 return is_store;
3590}