]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-slp.c
Daily bump.
[thirdparty/gcc.git] / gcc / tree-vect-slp.c
CommitLineData
ebfd146a 1/* SLP - Basic Block Vectorization
5624e564 2 Copyright (C) 2007-2015 Free Software Foundation, Inc.
b8698a0f 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
ebfd146a
IR
4 and Ira Rosen <irar@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
10Software Foundation; either version 3, or (at your option) any later
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
78c60e3d 25#include "dumpfile.h"
c7131fb2 26#include "backend.h"
ebfd146a 27#include "tree.h"
c7131fb2 28#include "gimple.h"
49b8fe6c 29#include "params.h"
c7131fb2
AM
30#include "rtl.h"
31#include "ssa.h"
32#include "alias.h"
40e23961 33#include "fold-const.h"
d8a2d370 34#include "stor-layout.h"
ebfd146a 35#include "target.h"
cf835838 36#include "gimple-pretty-print.h"
2fb9a547 37#include "internal-fn.h"
5be5c238 38#include "gimple-iterator.h"
7ee2468b 39#include "tree-pass.h"
ebfd146a 40#include "cfgloop.h"
36566b39 41#include "flags.h"
36566b39 42#include "insn-config.h"
7ee2468b 43#include "recog.h" /* FIXME: for insn_data */
b0710fe1 44#include "insn-codes.h"
385399a8 45#include "optabs-tree.h"
ebfd146a 46#include "tree-vectorizer.h"
2635892a 47#include "langhooks.h"
642fce57 48#include "gimple-walk.h"
ebfd146a 49
a70d6342
IR
50/* Extract the location of the basic block in the source code.
51 Return the basic block location if succeed and NULL if not. */
52
b05e0233 53source_location
a70d6342
IR
54find_bb_location (basic_block bb)
55{
56 gimple stmt = NULL;
57 gimple_stmt_iterator si;
58
59 if (!bb)
b05e0233 60 return UNKNOWN_LOCATION;
a70d6342
IR
61
62 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
63 {
64 stmt = gsi_stmt (si);
b05e0233 65 if (gimple_location (stmt) != UNKNOWN_LOCATION)
a70d6342
IR
66 return gimple_location (stmt);
67 }
68
b05e0233 69 return UNKNOWN_LOCATION;
a70d6342
IR
70}
71
72
ebfd146a
IR
73/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
74
75static void
76vect_free_slp_tree (slp_tree node)
77{
d092494c 78 int i;
d755c7ef 79 slp_tree child;
d092494c 80
ebfd146a
IR
81 if (!node)
82 return;
83
9771b263 84 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 85 vect_free_slp_tree (child);
b8698a0f 86
9771b263
DN
87 SLP_TREE_CHILDREN (node).release ();
88 SLP_TREE_SCALAR_STMTS (node).release ();
89 SLP_TREE_VEC_STMTS (node).release ();
01d8bf07 90 SLP_TREE_LOAD_PERMUTATION (node).release ();
ebfd146a
IR
91
92 free (node);
93}
94
95
96/* Free the memory allocated for the SLP instance. */
97
98void
99vect_free_slp_instance (slp_instance instance)
100{
101 vect_free_slp_tree (SLP_INSTANCE_TREE (instance));
9771b263 102 SLP_INSTANCE_LOADS (instance).release ();
c7e62a26 103 free (instance);
ebfd146a
IR
104}
105
106
d092494c
IR
107/* Create an SLP node for SCALAR_STMTS. */
108
109static slp_tree
9771b263 110vect_create_new_slp_node (vec<gimple> scalar_stmts)
d092494c 111{
d3cfd39e 112 slp_tree node;
9771b263 113 gimple stmt = scalar_stmts[0];
d092494c
IR
114 unsigned int nops;
115
116 if (is_gimple_call (stmt))
117 nops = gimple_call_num_args (stmt);
118 else if (is_gimple_assign (stmt))
f7e531cf
IR
119 {
120 nops = gimple_num_ops (stmt) - 1;
121 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
122 nops++;
123 }
d092494c
IR
124 else
125 return NULL;
126
d3cfd39e 127 node = XNEW (struct _slp_tree);
d092494c 128 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
9771b263
DN
129 SLP_TREE_VEC_STMTS (node).create (0);
130 SLP_TREE_CHILDREN (node).create (nops);
01d8bf07 131 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
6876e5bc 132 SLP_TREE_TWO_OPERATORS (node) = false;
d092494c
IR
133
134 return node;
135}
136
137
138/* Allocate operands info for NOPS operands, and GROUP_SIZE def-stmts for each
139 operand. */
9771b263 140static vec<slp_oprnd_info>
d092494c
IR
141vect_create_oprnd_info (int nops, int group_size)
142{
143 int i;
144 slp_oprnd_info oprnd_info;
9771b263 145 vec<slp_oprnd_info> oprnds_info;
d092494c 146
9771b263 147 oprnds_info.create (nops);
d092494c
IR
148 for (i = 0; i < nops; i++)
149 {
150 oprnd_info = XNEW (struct _slp_oprnd_info);
9771b263 151 oprnd_info->def_stmts.create (group_size);
d092494c 152 oprnd_info->first_dt = vect_uninitialized_def;
793d9a16 153 oprnd_info->first_op_type = NULL_TREE;
d092494c 154 oprnd_info->first_pattern = false;
effb52da 155 oprnd_info->second_pattern = false;
9771b263 156 oprnds_info.quick_push (oprnd_info);
d092494c
IR
157 }
158
159 return oprnds_info;
160}
161
162
d3cfd39e
JJ
163/* Free operands info. */
164
d092494c 165static void
9771b263 166vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
d092494c
IR
167{
168 int i;
169 slp_oprnd_info oprnd_info;
170
9771b263 171 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
d3cfd39e 172 {
9771b263 173 oprnd_info->def_stmts.release ();
d3cfd39e
JJ
174 XDELETE (oprnd_info);
175 }
d092494c 176
9771b263 177 oprnds_info.release ();
d092494c
IR
178}
179
180
d755c7ef
RB
181/* Find the place of the data-ref in STMT in the interleaving chain that starts
182 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */
183
184static int
185vect_get_place_in_interleaving_chain (gimple stmt, gimple first_stmt)
186{
187 gimple next_stmt = first_stmt;
188 int result = 0;
189
190 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
191 return -1;
192
193 do
194 {
195 if (next_stmt == stmt)
196 return result;
d755c7ef 197 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt));
c8047699
RB
198 if (next_stmt)
199 result += GROUP_GAP (vinfo_for_stmt (next_stmt));
d755c7ef
RB
200 }
201 while (next_stmt);
202
203 return -1;
204}
205
206
d092494c
IR
207/* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
208 they are of a valid type and that they match the defs of the first stmt of
b0b4483e
RB
209 the SLP group (stored in OPRNDS_INFO). If there was a fatal error
210 return -1, if the error could be corrected by swapping operands of the
211 operation return 1, if everything is ok return 0. */
ebfd146a 212
b0b4483e 213static int
a70d6342 214vect_get_and_check_slp_defs (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
effb52da 215 gimple stmt, unsigned stmt_num,
23847df4 216 vec<slp_oprnd_info> *oprnds_info)
ebfd146a
IR
217{
218 tree oprnd;
219 unsigned int i, number_of_oprnds;
abf9bfbc 220 tree def;
ebfd146a 221 gimple def_stmt;
d092494c 222 enum vect_def_type dt = vect_uninitialized_def;
a70d6342 223 struct loop *loop = NULL;
d092494c 224 bool pattern = false;
abf9bfbc 225 slp_oprnd_info oprnd_info;
b0b4483e
RB
226 int first_op_idx = 1;
227 bool commutative = false;
228 bool first_op_cond = false;
effb52da
RB
229 bool first = stmt_num == 0;
230 bool second = stmt_num == 1;
b8698a0f 231
a70d6342
IR
232 if (loop_vinfo)
233 loop = LOOP_VINFO_LOOP (loop_vinfo);
ebfd146a 234
d092494c 235 if (is_gimple_call (stmt))
190c2236
JJ
236 {
237 number_of_oprnds = gimple_call_num_args (stmt);
b0b4483e 238 first_op_idx = 3;
190c2236 239 }
f7e531cf
IR
240 else if (is_gimple_assign (stmt))
241 {
b0b4483e 242 enum tree_code code = gimple_assign_rhs_code (stmt);
f7e531cf
IR
243 number_of_oprnds = gimple_num_ops (stmt) - 1;
244 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
b0b4483e
RB
245 {
246 first_op_cond = true;
247 commutative = true;
248 number_of_oprnds++;
249 }
250 else
251 commutative = commutative_tree_code (code);
f7e531cf 252 }
d092494c 253 else
b0b4483e 254 return -1;
ebfd146a 255
b0b4483e 256 bool swapped = false;
ebfd146a
IR
257 for (i = 0; i < number_of_oprnds; i++)
258 {
b0b4483e
RB
259again:
260 if (first_op_cond)
f7e531cf 261 {
b0b4483e
RB
262 if (i == 0 || i == 1)
263 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx),
264 swapped ? !i : i);
265 else
266 oprnd = gimple_op (stmt, first_op_idx + i - 1);
f7e531cf
IR
267 }
268 else
b0b4483e 269 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i));
f7e531cf 270
9771b263 271 oprnd_info = (*oprnds_info)[i];
ebfd146a 272
24ee1384 273 if (!vect_is_simple_use (oprnd, NULL, loop_vinfo, bb_vinfo, &def_stmt,
3fc356dc 274 &def, &dt))
ebfd146a 275 {
73fbfcad 276 if (dump_enabled_p ())
ebfd146a 277 {
78c60e3d 278 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3fc356dc 279 "Build SLP failed: can't analyze def for ");
78c60e3d 280 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 281 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
282 }
283
b0b4483e 284 return -1;
ebfd146a
IR
285 }
286
a70d6342 287 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
ff802fa1 288 from the pattern. Check that all the stmts of the node are in the
ebfd146a 289 pattern. */
f5709183
IR
290 if (def_stmt && gimple_bb (def_stmt)
291 && ((loop && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
292 || (!loop && gimple_bb (def_stmt) == BB_VINFO_BB (bb_vinfo)
293 && gimple_code (def_stmt) != GIMPLE_PHI))
ebfd146a 294 && vinfo_for_stmt (def_stmt)
83197f37 295 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
f5709183
IR
296 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
297 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
ebfd146a 298 {
d092494c 299 pattern = true;
effb52da
RB
300 if (!first && !oprnd_info->first_pattern
301 /* Allow different pattern state for the defs of the
302 first stmt in reduction chains. */
303 && (oprnd_info->first_dt != vect_reduction_def
304 || (!second && !oprnd_info->second_pattern)))
d092494c 305 {
b0b4483e
RB
306 if (i == 0
307 && !swapped
308 && commutative)
309 {
310 swapped = true;
311 goto again;
312 }
313
73fbfcad 314 if (dump_enabled_p ())
d092494c 315 {
78c60e3d
SS
316 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
317 "Build SLP failed: some of the stmts"
318 " are in a pattern, and others are not ");
319 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
e645e942 320 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
d092494c 321 }
ebfd146a 322
b0b4483e 323 return 1;
ebfd146a
IR
324 }
325
326 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
d092494c 327 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
ebfd146a 328
f7e531cf 329 if (dt == vect_unknown_def_type)
ebfd146a 330 {
73fbfcad 331 if (dump_enabled_p ())
78c60e3d 332 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 333 "Unsupported pattern.\n");
b0b4483e 334 return -1;
ebfd146a
IR
335 }
336
337 switch (gimple_code (def_stmt))
338 {
339 case GIMPLE_PHI:
d092494c 340 def = gimple_phi_result (def_stmt);
ebfd146a
IR
341 break;
342
343 case GIMPLE_ASSIGN:
d092494c 344 def = gimple_assign_lhs (def_stmt);
ebfd146a
IR
345 break;
346
347 default:
73fbfcad 348 if (dump_enabled_p ())
78c60e3d 349 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 350 "unsupported defining stmt:\n");
b0b4483e 351 return -1;
ebfd146a
IR
352 }
353 }
354
effb52da
RB
355 if (second)
356 oprnd_info->second_pattern = pattern;
357
d092494c 358 if (first)
ebfd146a 359 {
d092494c
IR
360 oprnd_info->first_dt = dt;
361 oprnd_info->first_pattern = pattern;
793d9a16 362 oprnd_info->first_op_type = TREE_TYPE (oprnd);
ebfd146a 363 }
ebfd146a
IR
364 else
365 {
d092494c
IR
366 /* Not first stmt of the group, check that the def-stmt/s match
367 the def-stmt/s of the first stmt. Allow different definition
368 types for reduction chains: the first stmt must be a
369 vect_reduction_def (a phi node), and the rest
370 vect_internal_def. */
371 if (((oprnd_info->first_dt != dt
372 && !(oprnd_info->first_dt == vect_reduction_def
793d9a16
RB
373 && dt == vect_internal_def)
374 && !((oprnd_info->first_dt == vect_external_def
375 || oprnd_info->first_dt == vect_constant_def)
376 && (dt == vect_external_def
377 || dt == vect_constant_def)))
378 || !types_compatible_p (oprnd_info->first_op_type,
379 TREE_TYPE (oprnd))))
ebfd146a 380 {
b0b4483e
RB
381 /* Try swapping operands if we got a mismatch. */
382 if (i == 0
383 && !swapped
384 && commutative)
385 {
386 swapped = true;
387 goto again;
388 }
389
abf9bfbc
RB
390 if (dump_enabled_p ())
391 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 392 "Build SLP failed: different types\n");
d092494c 393
b0b4483e 394 return 1;
ebfd146a
IR
395 }
396 }
397
398 /* Check the types of the definitions. */
d092494c 399 switch (dt)
ebfd146a
IR
400 {
401 case vect_constant_def:
8644a673 402 case vect_external_def:
d092494c 403 case vect_reduction_def:
ebfd146a 404 break;
b8698a0f 405
8644a673 406 case vect_internal_def:
abf9bfbc 407 oprnd_info->def_stmts.quick_push (def_stmt);
ebfd146a
IR
408 break;
409
410 default:
411 /* FORNOW: Not supported. */
73fbfcad 412 if (dump_enabled_p ())
ebfd146a 413 {
78c60e3d
SS
414 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
415 "Build SLP failed: illegal type of def ");
416 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def);
e645e942 417 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
418 }
419
b0b4483e 420 return -1;
ebfd146a
IR
421 }
422 }
423
b0b4483e
RB
424 /* Swap operands. */
425 if (swapped)
426 {
427 if (first_op_cond)
428 {
429 tree cond = gimple_assign_rhs1 (stmt);
430 swap_ssa_operands (stmt, &TREE_OPERAND (cond, 0),
431 &TREE_OPERAND (cond, 1));
432 TREE_SET_CODE (cond, swap_tree_comparison (TREE_CODE (cond)));
433 }
434 else
435 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
436 gimple_assign_rhs2_ptr (stmt));
437 }
438
439 return 0;
ebfd146a
IR
440}
441
442
6983e6b5
RB
443/* Verify if the scalar stmts STMTS are isomorphic, require data
444 permutation or are of unsupported types of operation. Return
445 true if they are, otherwise return false and indicate in *MATCHES
446 which stmts are not isomorphic to the first one. If MATCHES[0]
447 is false then this indicates the comparison could not be
448 carried out or the stmts will never be vectorized by SLP. */
ebfd146a
IR
449
450static bool
6983e6b5
RB
451vect_build_slp_tree_1 (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
452 vec<gimple> stmts, unsigned int group_size,
453 unsigned nops, unsigned int *max_nunits,
6876e5bc
RB
454 unsigned int vectorization_factor, bool *matches,
455 bool *two_operators)
ebfd146a 456{
ebfd146a 457 unsigned int i;
6876e5bc
RB
458 gimple first_stmt = stmts[0], stmt = stmts[0];
459 enum tree_code first_stmt_code = ERROR_MARK;
460 enum tree_code alt_stmt_code = ERROR_MARK;
461 enum tree_code rhs_code = ERROR_MARK;
f7e531cf 462 enum tree_code first_cond_code = ERROR_MARK;
ebfd146a 463 tree lhs;
6983e6b5 464 bool need_same_oprnds = false;
e00cdb8a 465 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
ebfd146a
IR
466 optab optab;
467 int icode;
ef4bddc2
RS
468 machine_mode optab_op2_mode;
469 machine_mode vec_mode;
ebfd146a 470 HOST_WIDE_INT dummy;
91ff1504 471 gimple first_load = NULL, prev_first_load = NULL;
f7e531cf 472 tree cond;
d092494c 473
ebfd146a 474 /* For every stmt in NODE find its def stmt/s. */
9771b263 475 FOR_EACH_VEC_ELT (stmts, i, stmt)
ebfd146a 476 {
6983e6b5
RB
477 matches[i] = false;
478
73fbfcad 479 if (dump_enabled_p ())
ebfd146a 480 {
78c60e3d
SS
481 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
482 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 483 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
484 }
485
4b5caab7
IR
486 /* Fail to vectorize statements marked as unvectorizable. */
487 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
488 {
73fbfcad 489 if (dump_enabled_p ())
4b5caab7 490 {
78c60e3d
SS
491 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
492 "Build SLP failed: unvectorizable statement ");
493 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 494 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
4b5caab7 495 }
6983e6b5
RB
496 /* Fatal mismatch. */
497 matches[0] = false;
4b5caab7
IR
498 return false;
499 }
500
ebfd146a
IR
501 lhs = gimple_get_lhs (stmt);
502 if (lhs == NULL_TREE)
503 {
73fbfcad 504 if (dump_enabled_p ())
ebfd146a 505 {
78c60e3d
SS
506 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
507 "Build SLP failed: not GIMPLE_ASSIGN nor "
508 "GIMPLE_CALL ");
509 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 510 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 511 }
6983e6b5
RB
512 /* Fatal mismatch. */
513 matches[0] = false;
ebfd146a
IR
514 return false;
515 }
516
f7e531cf
IR
517 if (is_gimple_assign (stmt)
518 && gimple_assign_rhs_code (stmt) == COND_EXPR
519 && (cond = gimple_assign_rhs1 (stmt))
520 && !COMPARISON_CLASS_P (cond))
521 {
73fbfcad 522 if (dump_enabled_p ())
f7e531cf 523 {
78c60e3d
SS
524 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
525 "Build SLP failed: condition is not "
526 "comparison ");
527 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 528 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f7e531cf 529 }
6983e6b5
RB
530 /* Fatal mismatch. */
531 matches[0] = false;
f7e531cf
IR
532 return false;
533 }
534
b8698a0f 535 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy);
ebfd146a
IR
536 vectype = get_vectype_for_scalar_type (scalar_type);
537 if (!vectype)
538 {
73fbfcad 539 if (dump_enabled_p ())
ebfd146a 540 {
78c60e3d
SS
541 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
542 "Build SLP failed: unsupported data-type ");
543 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
544 scalar_type);
e645e942 545 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 546 }
6983e6b5
RB
547 /* Fatal mismatch. */
548 matches[0] = false;
ebfd146a
IR
549 return false;
550 }
b8698a0f 551
dfc55d30
RB
552 /* If populating the vector type requires unrolling then fail
553 before adjusting *max_nunits for basic-block vectorization. */
554 if (bb_vinfo
555 && TYPE_VECTOR_SUBPARTS (vectype) > group_size)
556 {
557 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
558 "Build SLP failed: unrolling required "
559 "in basic block SLP\n");
560 /* Fatal mismatch. */
561 matches[0] = false;
562 return false;
563 }
564
4ef69dfc
IR
565 /* In case of multiple types we need to detect the smallest type. */
566 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype))
a70d6342 567 {
4ef69dfc
IR
568 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
569 if (bb_vinfo)
570 vectorization_factor = *max_nunits;
a70d6342 571 }
b8698a0f 572
538dd0b7 573 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
190c2236
JJ
574 {
575 rhs_code = CALL_EXPR;
538dd0b7
DM
576 if (gimple_call_internal_p (call_stmt)
577 || gimple_call_tail_p (call_stmt)
578 || gimple_call_noreturn_p (call_stmt)
579 || !gimple_call_nothrow_p (call_stmt)
580 || gimple_call_chain (call_stmt))
190c2236 581 {
73fbfcad 582 if (dump_enabled_p ())
190c2236 583 {
78c60e3d
SS
584 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
585 "Build SLP failed: unsupported call type ");
538dd0b7
DM
586 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
587 call_stmt, 0);
e645e942 588 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
190c2236 589 }
6983e6b5
RB
590 /* Fatal mismatch. */
591 matches[0] = false;
190c2236
JJ
592 return false;
593 }
594 }
ebfd146a
IR
595 else
596 rhs_code = gimple_assign_rhs_code (stmt);
597
598 /* Check the operation. */
599 if (i == 0)
600 {
601 first_stmt_code = rhs_code;
602
b8698a0f 603 /* Shift arguments should be equal in all the packed stmts for a
ebfd146a
IR
604 vector shift with scalar shift operand. */
605 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
606 || rhs_code == LROTATE_EXPR
607 || rhs_code == RROTATE_EXPR)
608 {
609 vec_mode = TYPE_MODE (vectype);
610
611 /* First see if we have a vector/vector shift. */
612 optab = optab_for_tree_code (rhs_code, vectype,
613 optab_vector);
614
615 if (!optab
947131ba 616 || optab_handler (optab, vec_mode) == CODE_FOR_nothing)
ebfd146a
IR
617 {
618 /* No vector/vector shift, try for a vector/scalar shift. */
619 optab = optab_for_tree_code (rhs_code, vectype,
620 optab_scalar);
621
622 if (!optab)
623 {
73fbfcad 624 if (dump_enabled_p ())
78c60e3d 625 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
e645e942 626 "Build SLP failed: no optab.\n");
6983e6b5
RB
627 /* Fatal mismatch. */
628 matches[0] = false;
ebfd146a
IR
629 return false;
630 }
947131ba 631 icode = (int) optab_handler (optab, vec_mode);
ebfd146a
IR
632 if (icode == CODE_FOR_nothing)
633 {
73fbfcad 634 if (dump_enabled_p ())
78c60e3d
SS
635 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
636 "Build SLP failed: "
e645e942 637 "op not supported by target.\n");
6983e6b5
RB
638 /* Fatal mismatch. */
639 matches[0] = false;
ebfd146a
IR
640 return false;
641 }
642 optab_op2_mode = insn_data[icode].operand[2].mode;
643 if (!VECTOR_MODE_P (optab_op2_mode))
644 {
645 need_same_oprnds = true;
646 first_op1 = gimple_assign_rhs2 (stmt);
647 }
648 }
649 }
36ba4aae
IR
650 else if (rhs_code == WIDEN_LSHIFT_EXPR)
651 {
652 need_same_oprnds = true;
653 first_op1 = gimple_assign_rhs2 (stmt);
654 }
ebfd146a
IR
655 }
656 else
657 {
6876e5bc
RB
658 if (first_stmt_code != rhs_code
659 && alt_stmt_code == ERROR_MARK)
660 alt_stmt_code = rhs_code;
ebfd146a
IR
661 if (first_stmt_code != rhs_code
662 && (first_stmt_code != IMAGPART_EXPR
663 || rhs_code != REALPART_EXPR)
664 && (first_stmt_code != REALPART_EXPR
69f11a13 665 || rhs_code != IMAGPART_EXPR)
6876e5bc
RB
666 /* Handle mismatches in plus/minus by computing both
667 and merging the results. */
668 && !((first_stmt_code == PLUS_EXPR
669 || first_stmt_code == MINUS_EXPR)
670 && (alt_stmt_code == PLUS_EXPR
671 || alt_stmt_code == MINUS_EXPR)
672 && rhs_code == alt_stmt_code)
0d0293ac 673 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
69f11a13 674 && (first_stmt_code == ARRAY_REF
38000232 675 || first_stmt_code == BIT_FIELD_REF
69f11a13
IR
676 || first_stmt_code == INDIRECT_REF
677 || first_stmt_code == COMPONENT_REF
678 || first_stmt_code == MEM_REF)))
ebfd146a 679 {
73fbfcad 680 if (dump_enabled_p ())
ebfd146a 681 {
78c60e3d
SS
682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
683 "Build SLP failed: different operation "
684 "in stmt ");
685 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
6876e5bc
RB
686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
687 "original stmt ");
688 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
689 first_stmt, 0);
ebfd146a 690 }
6983e6b5
RB
691 /* Mismatch. */
692 continue;
ebfd146a 693 }
b8698a0f
L
694
695 if (need_same_oprnds
ebfd146a
IR
696 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
697 {
73fbfcad 698 if (dump_enabled_p ())
ebfd146a 699 {
78c60e3d
SS
700 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
701 "Build SLP failed: different shift "
702 "arguments in ");
703 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 704 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 705 }
6983e6b5
RB
706 /* Mismatch. */
707 continue;
ebfd146a 708 }
190c2236
JJ
709
710 if (rhs_code == CALL_EXPR)
711 {
9771b263 712 gimple first_stmt = stmts[0];
190c2236
JJ
713 if (gimple_call_num_args (stmt) != nops
714 || !operand_equal_p (gimple_call_fn (first_stmt),
715 gimple_call_fn (stmt), 0)
716 || gimple_call_fntype (first_stmt)
717 != gimple_call_fntype (stmt))
718 {
73fbfcad 719 if (dump_enabled_p ())
190c2236 720 {
78c60e3d
SS
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different calls in ");
723 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
724 stmt, 0);
e645e942 725 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
190c2236 726 }
6983e6b5
RB
727 /* Mismatch. */
728 continue;
190c2236
JJ
729 }
730 }
ebfd146a
IR
731 }
732
0d0293ac
MM
733 /* Grouped store or load. */
734 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
ebfd146a
IR
735 {
736 if (REFERENCE_CLASS_P (lhs))
737 {
738 /* Store. */
6983e6b5 739 ;
ebfd146a 740 }
b5aeb3bb
IR
741 else
742 {
743 /* Load. */
b5aeb3bb
IR
744 /* Check that the size of interleaved loads group is not
745 greater than the SLP group size. */
314f64eb
RB
746 unsigned ncopies
747 = vectorization_factor / TYPE_VECTOR_SUBPARTS (vectype);
6aa904c4 748 if (loop_vinfo
a64b9c26
RB
749 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) == stmt
750 && ((GROUP_SIZE (vinfo_for_stmt (stmt))
751 - GROUP_GAP (vinfo_for_stmt (stmt)))
752 > ncopies * group_size))
b5aeb3bb 753 {
73fbfcad 754 if (dump_enabled_p ())
b5aeb3bb 755 {
78c60e3d
SS
756 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
757 "Build SLP failed: the number "
758 "of interleaved loads is greater than "
759 "the SLP group size ");
760 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
761 stmt, 0);
e645e942 762 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b5aeb3bb 763 }
6983e6b5
RB
764 /* Fatal mismatch. */
765 matches[0] = false;
b5aeb3bb
IR
766 return false;
767 }
768
e14c1050 769 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt));
b5aeb3bb
IR
770 if (prev_first_load)
771 {
772 /* Check that there are no loads from different interleaving
6983e6b5
RB
773 chains in the same node. */
774 if (prev_first_load != first_load)
78c60e3d 775 {
73fbfcad 776 if (dump_enabled_p ())
b5aeb3bb 777 {
78c60e3d
SS
778 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
779 vect_location,
780 "Build SLP failed: different "
781 "interleaving chains in one node ");
782 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
783 stmt, 0);
e645e942 784 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
b5aeb3bb 785 }
6983e6b5
RB
786 /* Mismatch. */
787 continue;
b5aeb3bb
IR
788 }
789 }
790 else
791 prev_first_load = first_load;
ebfd146a 792 }
0d0293ac 793 } /* Grouped access. */
ebfd146a
IR
794 else
795 {
796 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
797 {
0d0293ac 798 /* Not grouped load. */
73fbfcad 799 if (dump_enabled_p ())
ebfd146a 800 {
78c60e3d
SS
801 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
802 "Build SLP failed: not grouped load ");
803 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 804 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
805 }
806
0d0293ac 807 /* FORNOW: Not grouped loads are not supported. */
6983e6b5
RB
808 /* Fatal mismatch. */
809 matches[0] = false;
ebfd146a
IR
810 return false;
811 }
812
813 /* Not memory operation. */
814 if (TREE_CODE_CLASS (rhs_code) != tcc_binary
f7e531cf 815 && TREE_CODE_CLASS (rhs_code) != tcc_unary
effb52da 816 && TREE_CODE_CLASS (rhs_code) != tcc_expression
190c2236 817 && rhs_code != CALL_EXPR)
ebfd146a 818 {
73fbfcad 819 if (dump_enabled_p ())
ebfd146a 820 {
78c60e3d
SS
821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
822 "Build SLP failed: operation");
823 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
824 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 825 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 826 }
6983e6b5
RB
827 /* Fatal mismatch. */
828 matches[0] = false;
ebfd146a
IR
829 return false;
830 }
831
f7e531cf
IR
832 if (rhs_code == COND_EXPR)
833 {
834 tree cond_expr = gimple_assign_rhs1 (stmt);
835
836 if (i == 0)
837 first_cond_code = TREE_CODE (cond_expr);
838 else if (first_cond_code != TREE_CODE (cond_expr))
839 {
73fbfcad 840 if (dump_enabled_p ())
f7e531cf 841 {
78c60e3d
SS
842 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
843 "Build SLP failed: different"
844 " operation");
845 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
846 stmt, 0);
e645e942 847 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
f7e531cf 848 }
6983e6b5
RB
849 /* Mismatch. */
850 continue;
f7e531cf
IR
851 }
852 }
ebfd146a 853 }
6983e6b5
RB
854
855 matches[i] = true;
856 }
857
858 for (i = 0; i < group_size; ++i)
859 if (!matches[i])
860 return false;
861
6876e5bc
RB
862 /* If we allowed a two-operation SLP node verify the target can cope
863 with the permute we are going to use. */
864 if (alt_stmt_code != ERROR_MARK
865 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
866 {
867 unsigned char *sel
868 = XALLOCAVEC (unsigned char, TYPE_VECTOR_SUBPARTS (vectype));
869 for (i = 0; i < TYPE_VECTOR_SUBPARTS (vectype); ++i)
870 {
871 sel[i] = i;
872 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
873 sel[i] += TYPE_VECTOR_SUBPARTS (vectype);
874 }
875 if (!can_vec_perm_p (TYPE_MODE (vectype), false, sel))
876 {
877 for (i = 0; i < group_size; ++i)
878 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code)
879 {
880 matches[i] = false;
881 if (dump_enabled_p ())
882 {
883 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
884 "Build SLP failed: different operation "
885 "in stmt ");
886 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
887 stmts[i], 0);
888 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
889 "original stmt ");
890 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
891 first_stmt, 0);
892 }
893 }
894 return false;
895 }
896 *two_operators = true;
897 }
898
6983e6b5
RB
899 return true;
900}
901
902/* Recursively build an SLP tree starting from NODE.
903 Fail (and return a value not equal to zero) if def-stmts are not
904 isomorphic, require data permutation or are of unsupported types of
905 operation. Otherwise, return 0.
906 The value returned is the depth in the SLP tree where a mismatch
907 was found. */
908
909static bool
910vect_build_slp_tree (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
911 slp_tree *node, unsigned int group_size,
912 unsigned int *max_nunits,
913 vec<slp_tree> *loads,
914 unsigned int vectorization_factor,
1428105c
RB
915 bool *matches, unsigned *npermutes, unsigned *tree_size,
916 unsigned max_tree_size)
6983e6b5 917{
89d390e5 918 unsigned nops, i, this_tree_size = 0;
6983e6b5
RB
919 gimple stmt;
920
6983e6b5
RB
921 matches[0] = false;
922
923 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
924 if (is_gimple_call (stmt))
925 nops = gimple_call_num_args (stmt);
926 else if (is_gimple_assign (stmt))
927 {
928 nops = gimple_num_ops (stmt) - 1;
929 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
930 nops++;
ebfd146a 931 }
6983e6b5
RB
932 else
933 return false;
934
6876e5bc 935 bool two_operators = false;
6983e6b5
RB
936 if (!vect_build_slp_tree_1 (loop_vinfo, bb_vinfo,
937 SLP_TREE_SCALAR_STMTS (*node), group_size, nops,
6876e5bc
RB
938 max_nunits, vectorization_factor, matches,
939 &two_operators))
6983e6b5 940 return false;
6876e5bc 941 SLP_TREE_TWO_OPERATORS (*node) = two_operators;
ebfd146a 942
6983e6b5
RB
943 /* If the SLP node is a load, terminate the recursion. */
944 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))
945 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))))
ebfd146a 946 {
9771b263 947 loads->safe_push (*node);
ebfd146a
IR
948 return true;
949 }
950
6983e6b5
RB
951 /* Get at the operands, verifying they are compatible. */
952 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
953 slp_oprnd_info oprnd_info;
954 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (*node), i, stmt)
955 {
b0b4483e 956 switch (vect_get_and_check_slp_defs (loop_vinfo, bb_vinfo,
effb52da 957 stmt, i, &oprnds_info))
6983e6b5 958 {
b0b4483e
RB
959 case 0:
960 break;
961 case -1:
962 matches[0] = false;
6983e6b5
RB
963 vect_free_oprnd_info (oprnds_info);
964 return false;
b0b4483e
RB
965 case 1:
966 matches[i] = false;
967 break;
6983e6b5
RB
968 }
969 }
b0b4483e
RB
970 for (i = 0; i < group_size; ++i)
971 if (!matches[i])
972 {
973 vect_free_oprnd_info (oprnds_info);
974 return false;
975 }
6983e6b5
RB
976
977 stmt = SLP_TREE_SCALAR_STMTS (*node)[0];
978
b8698a0f 979 /* Create SLP_TREE nodes for the definition node/s. */
9771b263 980 FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
ebfd146a 981 {
d092494c 982 slp_tree child;
6983e6b5
RB
983 unsigned old_nloads = loads->length ();
984 unsigned old_max_nunits = *max_nunits;
b8698a0f 985
d092494c
IR
986 if (oprnd_info->first_dt != vect_internal_def)
987 continue;
ebfd146a 988
1428105c
RB
989 if (++this_tree_size > max_tree_size)
990 {
991 vect_free_oprnd_info (oprnds_info);
992 return false;
993 }
994
d092494c 995 child = vect_create_new_slp_node (oprnd_info->def_stmts);
6983e6b5
RB
996 if (!child)
997 {
9771b263 998 vect_free_oprnd_info (oprnds_info);
6983e6b5 999 return false;
d092494c 1000 }
b8698a0f 1001
6983e6b5
RB
1002 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1003 group_size, max_nunits, loads,
1428105c
RB
1004 vectorization_factor, matches,
1005 npermutes, &this_tree_size, max_tree_size))
6983e6b5 1006 {
3fc356dc
RB
1007 /* If we have all children of child built up from scalars then just
1008 throw that away and build it up this node from scalars. */
1009 if (!SLP_TREE_CHILDREN (child).is_empty ())
1010 {
1011 unsigned int j;
1012 slp_tree grandchild;
1013
1014 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1015 if (grandchild != NULL)
1016 break;
1017 if (!grandchild)
1018 {
1019 /* Roll back. */
1020 *max_nunits = old_max_nunits;
1021 loads->truncate (old_nloads);
1022 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1023 vect_free_slp_tree (grandchild);
1024 SLP_TREE_CHILDREN (child).truncate (0);
1025
1026 dump_printf_loc (MSG_NOTE, vect_location,
1027 "Building parent vector operands from "
1028 "scalars instead\n");
1029 oprnd_info->def_stmts = vNULL;
1030 vect_free_slp_tree (child);
1031 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1032 continue;
1033 }
1034 }
1035
6983e6b5
RB
1036 oprnd_info->def_stmts = vNULL;
1037 SLP_TREE_CHILDREN (*node).quick_push (child);
1038 continue;
1039 }
1040
90dd6e3d
RB
1041 /* If the SLP build failed fatally and we analyze a basic-block
1042 simply treat nodes we fail to build as externally defined
1043 (and thus build vectors from the scalar defs).
1044 The cost model will reject outright expensive cases.
1045 ??? This doesn't treat cases where permutation ultimatively
1046 fails (or we don't try permutation below). Ideally we'd
1047 even compute a permutation that will end up with the maximum
1048 SLP tree size... */
1049 if (bb_vinfo
1050 && !matches[0]
1051 /* ??? Rejecting patterns this way doesn't work. We'd have to
1052 do extra work to cancel the pattern so the uses see the
1053 scalar version. */
1054 && !is_pattern_stmt_p (vinfo_for_stmt (stmt)))
1055 {
9626d143
RB
1056 unsigned int j;
1057 slp_tree grandchild;
1058
1059 /* Roll back. */
1060 *max_nunits = old_max_nunits;
1061 loads->truncate (old_nloads);
1062 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1063 vect_free_slp_tree (grandchild);
1064 SLP_TREE_CHILDREN (child).truncate (0);
1065
90dd6e3d
RB
1066 dump_printf_loc (MSG_NOTE, vect_location,
1067 "Building vector operands from scalars\n");
1068 oprnd_info->def_stmts = vNULL;
1069 vect_free_slp_tree (child);
1070 SLP_TREE_CHILDREN (*node).quick_push (NULL);
1071 continue;
1072 }
1073
6983e6b5
RB
1074 /* If the SLP build for operand zero failed and operand zero
1075 and one can be commutated try that for the scalar stmts
1076 that failed the match. */
1077 if (i == 0
1078 /* A first scalar stmt mismatch signals a fatal mismatch. */
1079 && matches[0]
1080 /* ??? For COND_EXPRs we can swap the comparison operands
1081 as well as the arms under some constraints. */
1082 && nops == 2
1083 && oprnds_info[1]->first_dt == vect_internal_def
1084 && is_gimple_assign (stmt)
1085 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1ece8d4c 1086 && !SLP_TREE_TWO_OPERATORS (*node)
6983e6b5
RB
1087 /* Do so only if the number of not successful permutes was nor more
1088 than a cut-ff as re-trying the recursive match on
1089 possibly each level of the tree would expose exponential
1090 behavior. */
1091 && *npermutes < 4)
1092 {
e72baed7
JJ
1093 unsigned int j;
1094 slp_tree grandchild;
1095
6983e6b5
RB
1096 /* Roll back. */
1097 *max_nunits = old_max_nunits;
1098 loads->truncate (old_nloads);
e72baed7
JJ
1099 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1100 vect_free_slp_tree (grandchild);
1101 SLP_TREE_CHILDREN (child).truncate (0);
1102
6983e6b5 1103 /* Swap mismatched definition stmts. */
b0b4483e
RB
1104 dump_printf_loc (MSG_NOTE, vect_location,
1105 "Re-trying with swapped operands of stmts ");
e72baed7 1106 for (j = 0; j < group_size; ++j)
6983e6b5
RB
1107 if (!matches[j])
1108 {
6b4db501
MM
1109 std::swap (oprnds_info[0]->def_stmts[j],
1110 oprnds_info[1]->def_stmts[j]);
b0b4483e 1111 dump_printf (MSG_NOTE, "%d ", j);
6983e6b5 1112 }
b0b4483e 1113 dump_printf (MSG_NOTE, "\n");
74574669
RB
1114 /* And try again with scratch 'matches' ... */
1115 bool *tem = XALLOCAVEC (bool, group_size);
6983e6b5
RB
1116 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &child,
1117 group_size, max_nunits, loads,
1118 vectorization_factor,
74574669 1119 tem, npermutes, &this_tree_size,
1428105c 1120 max_tree_size))
6983e6b5 1121 {
74574669
RB
1122 /* ... so if successful we can apply the operand swapping
1123 to the GIMPLE IL. This is necessary because for example
1124 vect_get_slp_defs uses operand indexes and thus expects
1125 canonical operand order. */
1126 for (j = 0; j < group_size; ++j)
1127 if (!matches[j])
1128 {
1129 gimple stmt = SLP_TREE_SCALAR_STMTS (*node)[j];
1130 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1131 gimple_assign_rhs2_ptr (stmt));
1132 }
6983e6b5
RB
1133 oprnd_info->def_stmts = vNULL;
1134 SLP_TREE_CHILDREN (*node).quick_push (child);
1135 continue;
1136 }
1137
1138 ++*npermutes;
1139 }
1140
1141 oprnd_info->def_stmts = vNULL;
1142 vect_free_slp_tree (child);
1143 vect_free_oprnd_info (oprnds_info);
1144 return false;
ebfd146a
IR
1145 }
1146
1428105c
RB
1147 if (tree_size)
1148 *tree_size += this_tree_size;
1149
9771b263 1150 vect_free_oprnd_info (oprnds_info);
ebfd146a
IR
1151 return true;
1152}
1153
78c60e3d 1154/* Dump a slp tree NODE using flags specified in DUMP_KIND. */
ebfd146a
IR
1155
1156static void
78c60e3d 1157vect_print_slp_tree (int dump_kind, slp_tree node)
ebfd146a
IR
1158{
1159 int i;
1160 gimple stmt;
d755c7ef 1161 slp_tree child;
ebfd146a
IR
1162
1163 if (!node)
1164 return;
1165
78c60e3d 1166 dump_printf (dump_kind, "node ");
9771b263 1167 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a 1168 {
78c60e3d
SS
1169 dump_printf (dump_kind, "\n\tstmt %d ", i);
1170 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
ebfd146a 1171 }
78c60e3d 1172 dump_printf (dump_kind, "\n");
ebfd146a 1173
9771b263 1174 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1175 vect_print_slp_tree (dump_kind, child);
ebfd146a
IR
1176}
1177
1178
b8698a0f
L
1179/* Mark the tree rooted at NODE with MARK (PURE_SLP or HYBRID).
1180 If MARK is HYBRID, it refers to a specific stmt in NODE (the stmt at index
ff802fa1 1181 J). Otherwise, MARK is PURE_SLP and J is -1, which indicates that all the
ebfd146a
IR
1182 stmts in NODE are to be marked. */
1183
1184static void
1185vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1186{
1187 int i;
1188 gimple stmt;
d755c7ef 1189 slp_tree child;
ebfd146a
IR
1190
1191 if (!node)
1192 return;
1193
9771b263 1194 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
ebfd146a
IR
1195 if (j < 0 || i == j)
1196 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark;
1197
9771b263 1198 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1199 vect_mark_slp_stmts (child, mark, j);
ebfd146a
IR
1200}
1201
1202
a70d6342
IR
1203/* Mark the statements of the tree rooted at NODE as relevant (vect_used). */
1204
1205static void
1206vect_mark_slp_stmts_relevant (slp_tree node)
1207{
1208 int i;
1209 gimple stmt;
1210 stmt_vec_info stmt_info;
d755c7ef 1211 slp_tree child;
a70d6342
IR
1212
1213 if (!node)
1214 return;
1215
9771b263 1216 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
1217 {
1218 stmt_info = vinfo_for_stmt (stmt);
b8698a0f 1219 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
a70d6342
IR
1220 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1221 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1222 }
1223
9771b263 1224 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1225 vect_mark_slp_stmts_relevant (child);
a70d6342
IR
1226}
1227
1228
b5aeb3bb
IR
1229/* Rearrange the statements of NODE according to PERMUTATION. */
1230
1231static void
1232vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
01d8bf07 1233 vec<unsigned> permutation)
b5aeb3bb
IR
1234{
1235 gimple stmt;
9771b263 1236 vec<gimple> tmp_stmts;
d755c7ef
RB
1237 unsigned int i;
1238 slp_tree child;
b5aeb3bb 1239
9771b263 1240 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 1241 vect_slp_rearrange_stmts (child, group_size, permutation);
b5aeb3bb 1242
9771b263
DN
1243 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1244 tmp_stmts.create (group_size);
d755c7ef 1245 tmp_stmts.quick_grow_cleared (group_size);
b5aeb3bb 1246
9771b263 1247 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
d755c7ef 1248 tmp_stmts[permutation[i]] = stmt;
b5aeb3bb 1249
9771b263 1250 SLP_TREE_SCALAR_STMTS (node).release ();
b5aeb3bb
IR
1251 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1252}
1253
1254
b266b968
RB
1255/* Attempt to reorder stmts in a reduction chain so that we don't
1256 require any load permutation. Return true if that was possible,
1257 otherwise return false. */
1258
1259static bool
1260vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
1261{
1262 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1263 unsigned int i, j;
1264 sbitmap load_index;
1265 unsigned int lidx;
1266 slp_tree node, load;
1267
1268 /* Compare all the permutation sequences to the first one. We know
1269 that at least one load is permuted. */
1270 node = SLP_INSTANCE_LOADS (slp_instn)[0];
1271 if (!node->load_permutation.exists ())
1272 return false;
1273 for (i = 1; SLP_INSTANCE_LOADS (slp_instn).iterate (i, &load); ++i)
1274 {
1275 if (!load->load_permutation.exists ())
1276 return false;
1277 FOR_EACH_VEC_ELT (load->load_permutation, j, lidx)
1278 if (lidx != node->load_permutation[j])
1279 return false;
1280 }
1281
1282 /* Check that the loads in the first sequence are different and there
1283 are no gaps between them. */
1284 load_index = sbitmap_alloc (group_size);
1285 bitmap_clear (load_index);
1286 FOR_EACH_VEC_ELT (node->load_permutation, i, lidx)
1287 {
1288 if (bitmap_bit_p (load_index, lidx))
1289 {
1290 sbitmap_free (load_index);
1291 return false;
1292 }
1293 bitmap_set_bit (load_index, lidx);
1294 }
1295 for (i = 0; i < group_size; i++)
1296 if (!bitmap_bit_p (load_index, i))
1297 {
1298 sbitmap_free (load_index);
1299 return false;
1300 }
1301 sbitmap_free (load_index);
1302
1303 /* This permutation is valid for reduction. Since the order of the
1304 statements in the nodes is not important unless they are memory
1305 accesses, we can rearrange the statements in all the nodes
1306 according to the order of the loads. */
1307 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1308 node->load_permutation);
1309
1310 /* We are done, no actual permutations need to be generated. */
1311 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1312 SLP_TREE_LOAD_PERMUTATION (node).release ();
1313 return true;
1314}
1315
01d8bf07
RB
1316/* Check if the required load permutations in the SLP instance
1317 SLP_INSTN are supported. */
ebfd146a
IR
1318
1319static bool
01d8bf07 1320vect_supported_load_permutation_p (slp_instance slp_instn)
ebfd146a 1321{
01d8bf07
RB
1322 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1323 unsigned int i, j, k, next;
6983e6b5
RB
1324 slp_tree node;
1325 gimple stmt, load, next_load, first_load;
6aa904c4 1326 struct data_reference *dr;
ebfd146a 1327
73fbfcad 1328 if (dump_enabled_p ())
ebfd146a 1329 {
78c60e3d 1330 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
01d8bf07
RB
1331 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1332 if (node->load_permutation.exists ())
1333 FOR_EACH_VEC_ELT (node->load_permutation, j, next)
1334 dump_printf (MSG_NOTE, "%d ", next);
1335 else
bddc974e
TJ
1336 for (k = 0; k < group_size; ++k)
1337 dump_printf (MSG_NOTE, "%d ", k);
e645e942 1338 dump_printf (MSG_NOTE, "\n");
ebfd146a
IR
1339 }
1340
b5aeb3bb
IR
1341 /* In case of reduction every load permutation is allowed, since the order
1342 of the reduction statements is not important (as opposed to the case of
0d0293ac 1343 grouped stores). The only condition we need to check is that all the
b5aeb3bb
IR
1344 load nodes are of the same size and have the same permutation (and then
1345 rearrange all the nodes of the SLP instance according to this
1346 permutation). */
1347
1348 /* Check that all the load nodes are of the same size. */
01d8bf07 1349 /* ??? Can't we assert this? */
9771b263 1350 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6983e6b5
RB
1351 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1352 return false;
2200fc49 1353
b5aeb3bb 1354 node = SLP_INSTANCE_TREE (slp_instn);
9771b263 1355 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
b5aeb3bb 1356
b010117a 1357 /* Reduction (there are no data-refs in the root).
b266b968 1358 In reduction chain the order of the loads is not important. */
b010117a
IR
1359 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt))
1360 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1361 {
b266b968
RB
1362 if (vect_attempt_slp_rearrange_stmts (slp_instn))
1363 return true;
b5aeb3bb 1364
b266b968 1365 /* Fallthru to general load permutation handling. */
b5aeb3bb
IR
1366 }
1367
6aa904c4
IR
1368 /* In basic block vectorization we allow any subchain of an interleaving
1369 chain.
1370 FORNOW: not supported in loop SLP because of realignment compications. */
01d8bf07 1371 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt)))
6aa904c4 1372 {
240a94da
RB
1373 /* Check whether the loads in an instance form a subchain and thus
1374 no permutation is necessary. */
9771b263 1375 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
6aa904c4 1376 {
9626d143
RB
1377 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1378 continue;
240a94da 1379 bool subchain_p = true;
6aa904c4 1380 next_load = NULL;
9771b263 1381 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load)
6aa904c4 1382 {
5b5826c4
RB
1383 if (j != 0
1384 && (next_load != load
1385 || GROUP_GAP (vinfo_for_stmt (load)) != 1))
240a94da
RB
1386 {
1387 subchain_p = false;
1388 break;
1389 }
6aa904c4
IR
1390 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load));
1391 }
240a94da
RB
1392 if (subchain_p)
1393 SLP_TREE_LOAD_PERMUTATION (node).release ();
1394 else
1395 {
1396 /* Verify the permutation can be generated. */
1397 vec<tree> tem;
1398 if (!vect_transform_slp_perm_load (node, tem, NULL,
1399 1, slp_instn, true))
1400 {
1401 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1402 vect_location,
1403 "unsupported load permutation\n");
1404 return false;
1405 }
1406 }
6aa904c4
IR
1407 }
1408
1409 /* Check that the alignment of the first load in every subchain, i.e.,
01d8bf07
RB
1410 the first statement in every load node, is supported.
1411 ??? This belongs in alignment checking. */
1412 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1413 {
1414 first_load = SLP_TREE_SCALAR_STMTS (node)[0];
1415 if (first_load != GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_load)))
1416 {
1417 dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (first_load));
1418 if (vect_supportable_dr_alignment (dr, false)
1419 == dr_unaligned_unsupported)
1420 {
1421 if (dump_enabled_p ())
1422 {
1423 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1424 vect_location,
1425 "unsupported unaligned load ");
1426 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
1427 first_load, 0);
e645e942 1428 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
01d8bf07
RB
1429 }
1430 return false;
1431 }
1432 }
1433 }
6aa904c4 1434
01d8bf07 1435 return true;
6aa904c4
IR
1436 }
1437
9b999e8c 1438 /* For loop vectorization verify we can generate the permutation. */
01d8bf07
RB
1439 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1440 if (node->load_permutation.exists ()
1441 && !vect_transform_slp_perm_load
1442 (node, vNULL, NULL,
1443 SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), slp_instn, true))
1444 return false;
9b999e8c 1445
01d8bf07 1446 return true;
ebfd146a
IR
1447}
1448
1449
e4a707c4 1450/* Find the last store in SLP INSTANCE. */
ff802fa1 1451
e4a707c4 1452static gimple
2e8ab70c 1453vect_find_last_scalar_stmt_in_slp (slp_tree node)
e4a707c4 1454{
2e8ab70c 1455 gimple last = NULL, stmt;
e4a707c4 1456
2e8ab70c
RB
1457 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++)
1458 {
1459 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
1460 if (is_pattern_stmt_p (stmt_vinfo))
1461 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last);
1462 else
1463 last = get_later_stmt (stmt, last);
1464 }
e4a707c4 1465
2e8ab70c 1466 return last;
e4a707c4
IR
1467}
1468
23847df4
RB
1469/* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */
1470
1471static void
1a4b99c1 1472vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node,
23847df4 1473 stmt_vector_for_cost *prologue_cost_vec,
1a4b99c1 1474 stmt_vector_for_cost *body_cost_vec,
23847df4
RB
1475 unsigned ncopies_for_cost)
1476{
23847df4
RB
1477 unsigned i;
1478 slp_tree child;
1479 gimple stmt, s;
1480 stmt_vec_info stmt_info;
1481 tree lhs;
1482 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1483
1484 /* Recurse down the SLP tree. */
1485 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
90dd6e3d 1486 if (child)
1a4b99c1
RB
1487 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1488 body_cost_vec, ncopies_for_cost);
23847df4
RB
1489
1490 /* Look at the first scalar stmt to determine the cost. */
1491 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1492 stmt_info = vinfo_for_stmt (stmt);
1493 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1494 {
1495 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1496 vect_model_store_cost (stmt_info, ncopies_for_cost, false,
1497 vect_uninitialized_def,
1498 node, prologue_cost_vec, body_cost_vec);
1499 else
1500 {
1501 int i;
1502 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1503 vect_model_load_cost (stmt_info, ncopies_for_cost, false,
1504 node, prologue_cost_vec, body_cost_vec);
1505 /* If the load is permuted record the cost for the permutation.
1506 ??? Loads from multiple chains are let through here only
1507 for a single special case involving complex numbers where
1508 in the end no permutation is necessary. */
1509 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, s)
1510 if ((STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo_for_stmt (s))
1511 == STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info))
1512 && vect_get_place_in_interleaving_chain
1513 (s, STMT_VINFO_GROUP_FIRST_ELEMENT (stmt_info)) != i)
1514 {
1515 record_stmt_cost (body_cost_vec, group_size, vec_perm,
1516 stmt_info, 0, vect_body);
1517 break;
1518 }
1519 }
1520 }
1521 else
6876e5bc
RB
1522 {
1523 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1524 stmt_info, 0, vect_body);
1525 if (SLP_TREE_TWO_OPERATORS (node))
1526 {
1527 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1528 stmt_info, 0, vect_body);
1529 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1530 stmt_info, 0, vect_body);
1531 }
1532 }
23847df4
RB
1533
1534 /* Scan operands and account for prologue cost of constants/externals.
1535 ??? This over-estimates cost for multiple uses and should be
1536 re-engineered. */
1537 lhs = gimple_get_lhs (stmt);
1538 for (i = 0; i < gimple_num_ops (stmt); ++i)
1539 {
1540 tree def, op = gimple_op (stmt, i);
1541 gimple def_stmt;
1542 enum vect_def_type dt;
1543 if (!op || op == lhs)
1544 continue;
1a4b99c1
RB
1545 if (vect_is_simple_use (op, NULL, STMT_VINFO_LOOP_VINFO (stmt_info),
1546 STMT_VINFO_BB_VINFO (stmt_info),
2e8ab70c
RB
1547 &def_stmt, &def, &dt))
1548 {
1549 /* Without looking at the actual initializer a vector of
1550 constants can be implemented as load from the constant pool.
1551 ??? We need to pass down stmt_info for a vector type
1552 even if it points to the wrong stmt. */
1553 if (dt == vect_constant_def)
1554 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1555 stmt_info, 0, vect_prologue);
1556 else if (dt == vect_external_def)
1557 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1558 stmt_info, 0, vect_prologue);
1559 }
23847df4
RB
1560 }
1561}
1562
1563/* Compute the cost for the SLP instance INSTANCE. */
1564
1565static void
1a4b99c1 1566vect_analyze_slp_cost (slp_instance instance, void *data)
23847df4
RB
1567{
1568 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1569 unsigned ncopies_for_cost;
1570 stmt_info_for_cost *si;
1571 unsigned i;
1572
1573 /* Calculate the number of vector stmts to create based on the unrolling
1574 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1575 GROUP_SIZE / NUNITS otherwise. */
1576 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1a4b99c1
RB
1577 slp_tree node = SLP_INSTANCE_TREE (instance);
1578 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1579 /* Adjust the group_size by the vectorization factor which is always one
1580 for basic-block vectorization. */
1581 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1582 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1583 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1584 /* For reductions look at a reduction operand in case the reduction
1585 operation is widening like DOT_PROD or SAD. */
1586 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1587 {
1588 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1589 switch (gimple_assign_rhs_code (stmt))
1590 {
1591 case DOT_PROD_EXPR:
1592 case SAD_EXPR:
1593 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1594 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1595 break;
1596 default:;
1597 }
1598 }
23847df4
RB
1599 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1600
1601 prologue_cost_vec.create (10);
1602 body_cost_vec.create (10);
1a4b99c1
RB
1603 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1604 &prologue_cost_vec, &body_cost_vec,
1605 ncopies_for_cost);
23847df4
RB
1606
1607 /* Record the prologue costs, which were delayed until we were
1a4b99c1 1608 sure that SLP was successful. */
23847df4
RB
1609 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1610 {
1611 struct _stmt_vec_info *stmt_info
1612 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1613 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1614 si->misalign, vect_prologue);
1615 }
1616
1a4b99c1
RB
1617 /* Record the instance's instructions in the target cost model. */
1618 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1619 {
1620 struct _stmt_vec_info *stmt_info
1621 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1622 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1623 si->misalign, vect_body);
1624 }
1625
23847df4 1626 prologue_cost_vec.release ();
1a4b99c1 1627 body_cost_vec.release ();
23847df4 1628}
e4a707c4 1629
0d0293ac 1630/* Analyze an SLP instance starting from a group of grouped stores. Call
b8698a0f 1631 vect_build_slp_tree to build a tree of packed stmts if possible.
ebfd146a
IR
1632 Return FALSE if it's impossible to SLP any stmt in the loop. */
1633
1634static bool
a70d6342 1635vect_analyze_slp_instance (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1428105c 1636 gimple stmt, unsigned max_tree_size)
ebfd146a
IR
1637{
1638 slp_instance new_instance;
d092494c 1639 slp_tree node;
e14c1050 1640 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
ebfd146a 1641 unsigned int unrolling_factor = 1, nunits;
b5aeb3bb 1642 tree vectype, scalar_type = NULL_TREE;
ebfd146a 1643 gimple next;
0f900dfa 1644 unsigned int vectorization_factor = 0;
23847df4 1645 int i;
ebfd146a 1646 unsigned int max_nunits = 0;
9771b263 1647 vec<slp_tree> loads;
b5aeb3bb 1648 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt));
9771b263 1649 vec<gimple> scalar_stmts;
b5aeb3bb 1650
b010117a 1651 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
b5aeb3bb 1652 {
b010117a
IR
1653 if (dr)
1654 {
1655 scalar_type = TREE_TYPE (DR_REF (dr));
1656 vectype = get_vectype_for_scalar_type (scalar_type);
1657 }
1658 else
1659 {
1660 gcc_assert (loop_vinfo);
1661 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
1662 }
1663
e14c1050 1664 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
b5aeb3bb
IR
1665 }
1666 else
1667 {
1668 gcc_assert (loop_vinfo);
1669 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt));
9771b263 1670 group_size = LOOP_VINFO_REDUCTIONS (loop_vinfo).length ();
b5aeb3bb 1671 }
b8698a0f 1672
ebfd146a
IR
1673 if (!vectype)
1674 {
73fbfcad 1675 if (dump_enabled_p ())
ebfd146a 1676 {
78c60e3d
SS
1677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1678 "Build SLP failed: unsupported data-type ");
1679 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
e645e942 1680 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1681 }
b5aeb3bb 1682
ebfd146a
IR
1683 return false;
1684 }
1685
1686 nunits = TYPE_VECTOR_SUBPARTS (vectype);
a70d6342
IR
1687 if (loop_vinfo)
1688 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1689 else
a70d6342
IR
1690 vectorization_factor = nunits;
1691
a70d6342
IR
1692 /* Calculate the unrolling factor. */
1693 unrolling_factor = least_common_multiple (nunits, group_size) / group_size;
1694 if (unrolling_factor != 1 && !loop_vinfo)
1695 {
73fbfcad 1696 if (dump_enabled_p ())
e645e942 1697 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1698 "Build SLP failed: unrolling required in basic"
e645e942 1699 " block SLP\n");
b8698a0f 1700
a70d6342
IR
1701 return false;
1702 }
1703
0d0293ac 1704 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
9771b263 1705 scalar_stmts.create (group_size);
ebfd146a 1706 next = stmt;
b010117a 1707 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)))
ebfd146a 1708 {
b5aeb3bb
IR
1709 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
1710 while (next)
1711 {
f7e531cf
IR
1712 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))
1713 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)))
9771b263
DN
1714 scalar_stmts.safe_push (
1715 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next)));
f7e531cf 1716 else
9771b263 1717 scalar_stmts.safe_push (next);
e14c1050 1718 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next));
b5aeb3bb 1719 }
14a61437
RB
1720 /* Mark the first element of the reduction chain as reduction to properly
1721 transform the node. In the reduction analysis phase only the last
1722 element of the chain is marked as reduction. */
1723 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)))
1724 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
b5aeb3bb
IR
1725 }
1726 else
1727 {
1728 /* Collect reduction statements. */
9771b263
DN
1729 vec<gimple> reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1730 for (i = 0; reductions.iterate (i, &next); i++)
1731 scalar_stmts.safe_push (next);
ebfd146a
IR
1732 }
1733
d092494c 1734 node = vect_create_new_slp_node (scalar_stmts);
ebfd146a 1735
9771b263 1736 loads.create (group_size);
ebfd146a
IR
1737
1738 /* Build the tree for the SLP instance. */
89d390e5
RB
1739 bool *matches = XALLOCAVEC (bool, group_size);
1740 unsigned npermutes = 0;
b8698a0f 1741 if (vect_build_slp_tree (loop_vinfo, bb_vinfo, &node, group_size,
abf9bfbc 1742 &max_nunits, &loads,
89d390e5 1743 vectorization_factor, matches, &npermutes, NULL,
1428105c 1744 max_tree_size))
ebfd146a 1745 {
4ef69dfc 1746 /* Calculate the unrolling factor based on the smallest type. */
ebfd146a
IR
1747 if (max_nunits > nunits)
1748 unrolling_factor = least_common_multiple (max_nunits, group_size)
1749 / group_size;
b8698a0f 1750
4ef69dfc
IR
1751 if (unrolling_factor != 1 && !loop_vinfo)
1752 {
73fbfcad 1753 if (dump_enabled_p ())
e645e942 1754 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 1755 "Build SLP failed: unrolling required in basic"
e645e942 1756 " block SLP\n");
c7e62a26 1757 vect_free_slp_tree (node);
9771b263 1758 loads.release ();
4ef69dfc
IR
1759 return false;
1760 }
1761
1762 /* Create a new SLP instance. */
1763 new_instance = XNEW (struct _slp_instance);
1764 SLP_INSTANCE_TREE (new_instance) = node;
1765 SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
ebfd146a 1766 SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
ebfd146a 1767 SLP_INSTANCE_LOADS (new_instance) = loads;
abf9bfbc
RB
1768
1769 /* Compute the load permutation. */
1770 slp_tree load_node;
1771 bool loads_permuted = false;
abf9bfbc
RB
1772 FOR_EACH_VEC_ELT (loads, i, load_node)
1773 {
01d8bf07 1774 vec<unsigned> load_permutation;
abf9bfbc 1775 int j;
6983e6b5 1776 gimple load, first_stmt;
01d8bf07
RB
1777 bool this_load_permuted = false;
1778 load_permutation.create (group_size);
6983e6b5
RB
1779 first_stmt = GROUP_FIRST_ELEMENT
1780 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0]));
abf9bfbc
RB
1781 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load)
1782 {
6983e6b5
RB
1783 int load_place
1784 = vect_get_place_in_interleaving_chain (load, first_stmt);
1785 gcc_assert (load_place != -1);
1786 if (load_place != j)
01d8bf07 1787 this_load_permuted = true;
abf9bfbc
RB
1788 load_permutation.safe_push (load_place);
1789 }
fe2bef71
RB
1790 if (!this_load_permuted
1791 /* The load requires permutation when unrolling exposes
1792 a gap either because the group is larger than the SLP
1793 group-size or because there is a gap between the groups. */
1794 && (unrolling_factor == 1
1795 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
1796 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)))
01d8bf07
RB
1797 {
1798 load_permutation.release ();
1799 continue;
1800 }
1801 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
1802 loads_permuted = true;
abf9bfbc 1803 }
6aa904c4
IR
1804
1805 if (loads_permuted)
ebfd146a 1806 {
01d8bf07 1807 if (!vect_supported_load_permutation_p (new_instance))
ebfd146a 1808 {
73fbfcad 1809 if (dump_enabled_p ())
ebfd146a 1810 {
e645e942 1811 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
1812 "Build SLP failed: unsupported load "
1813 "permutation ");
1814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 1815 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 1816 }
ebfd146a
IR
1817 vect_free_slp_instance (new_instance);
1818 return false;
1819 }
ebfd146a 1820 }
ebfd146a 1821
92345349 1822
a70d6342 1823 if (loop_vinfo)
1a4b99c1 1824 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).safe_push (new_instance);
a70d6342 1825 else
9771b263 1826 BB_VINFO_SLP_INSTANCES (bb_vinfo).safe_push (new_instance);
b8698a0f 1827
73fbfcad 1828 if (dump_enabled_p ())
78c60e3d 1829 vect_print_slp_tree (MSG_NOTE, node);
ebfd146a
IR
1830
1831 return true;
1832 }
1833
1834 /* Failed to SLP. */
1835 /* Free the allocated memory. */
1836 vect_free_slp_tree (node);
9771b263 1837 loads.release ();
b8698a0f 1838
a70d6342 1839 return false;
ebfd146a
IR
1840}
1841
1842
ff802fa1 1843/* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
ebfd146a
IR
1844 trees of packed scalar stmts if SLP is possible. */
1845
1846bool
1428105c
RB
1847vect_analyze_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo,
1848 unsigned max_tree_size)
ebfd146a
IR
1849{
1850 unsigned int i;
9771b263 1851 vec<gimple> grouped_stores;
6e1aa848
DN
1852 vec<gimple> reductions = vNULL;
1853 vec<gimple> reduc_chains = vNULL;
b010117a 1854 gimple first_element;
a70d6342 1855 bool ok = false;
ebfd146a 1856
73fbfcad 1857 if (dump_enabled_p ())
e645e942 1858 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
ebfd146a 1859
a70d6342 1860 if (loop_vinfo)
b5aeb3bb 1861 {
0d0293ac 1862 grouped_stores = LOOP_VINFO_GROUPED_STORES (loop_vinfo);
b010117a 1863 reduc_chains = LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo);
b5aeb3bb
IR
1864 reductions = LOOP_VINFO_REDUCTIONS (loop_vinfo);
1865 }
a70d6342 1866 else
0d0293ac 1867 grouped_stores = BB_VINFO_GROUPED_STORES (bb_vinfo);
b8698a0f 1868
0d0293ac 1869 /* Find SLP sequences starting from groups of grouped stores. */
9771b263 1870 FOR_EACH_VEC_ELT (grouped_stores, i, first_element)
1428105c
RB
1871 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1872 max_tree_size))
a70d6342 1873 ok = true;
ebfd146a 1874
effb52da 1875 if (reduc_chains.length () > 0)
b010117a
IR
1876 {
1877 /* Find SLP sequences starting from reduction chains. */
9771b263 1878 FOR_EACH_VEC_ELT (reduc_chains, i, first_element)
1428105c
RB
1879 if (vect_analyze_slp_instance (loop_vinfo, bb_vinfo, first_element,
1880 max_tree_size))
b010117a
IR
1881 ok = true;
1882 else
1883 return false;
1884
1885 /* Don't try to vectorize SLP reductions if reduction chain was
1886 detected. */
1887 return ok;
1888 }
1889
b5aeb3bb 1890 /* Find SLP sequences starting from groups of reductions. */
effb52da 1891 if (reductions.length () > 1
1428105c
RB
1892 && vect_analyze_slp_instance (loop_vinfo, bb_vinfo, reductions[0],
1893 max_tree_size))
b5aeb3bb
IR
1894 ok = true;
1895
ebfd146a
IR
1896 return true;
1897}
1898
1899
1900/* For each possible SLP instance decide whether to SLP it and calculate overall
437f4a00
IR
1901 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
1902 least one instance. */
ebfd146a 1903
437f4a00 1904bool
ebfd146a
IR
1905vect_make_slp_decision (loop_vec_info loop_vinfo)
1906{
1907 unsigned int i, unrolling_factor = 1;
9771b263 1908 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
1909 slp_instance instance;
1910 int decided_to_slp = 0;
1911
73fbfcad 1912 if (dump_enabled_p ())
e645e942
TJ
1913 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
1914 "\n");
ebfd146a 1915
9771b263 1916 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
1917 {
1918 /* FORNOW: SLP if you can. */
1919 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance))
1920 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance);
1921
ff802fa1 1922 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
b8698a0f 1923 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
ff802fa1 1924 loop-based vectorization. Such stmts will be marked as HYBRID. */
ebfd146a
IR
1925 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
1926 decided_to_slp++;
1927 }
1928
1929 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
1930
73fbfcad 1931 if (decided_to_slp && dump_enabled_p ())
ccb3ad87 1932 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1933 "Decided to SLP %d instances. Unrolling factor %d\n",
78c60e3d 1934 decided_to_slp, unrolling_factor);
437f4a00
IR
1935
1936 return (decided_to_slp > 0);
ebfd146a
IR
1937}
1938
1939
1940/* Find stmts that must be both vectorized and SLPed (since they feed stmts that
ff802fa1 1941 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
ebfd146a
IR
1942
1943static void
642fce57 1944vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
ebfd146a 1945{
642fce57 1946 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[i];
ebfd146a
IR
1947 imm_use_iterator imm_iter;
1948 gimple use_stmt;
642fce57 1949 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt);
d755c7ef 1950 slp_tree child;
f2c74cc4 1951 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
642fce57
RB
1952 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1953 int j;
1954
1955 /* Propagate hybrid down the SLP tree. */
1956 if (stype == hybrid)
1957 ;
1958 else if (HYBRID_SLP_STMT (stmt_vinfo))
1959 stype = hybrid;
1960 else
1961 {
1962 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
1963 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
29764870
RB
1964 /* We always get the pattern stmt here, but for immediate
1965 uses we have to use the LHS of the original stmt. */
1966 gcc_checking_assert (!STMT_VINFO_IN_PATTERN_P (stmt_vinfo));
1967 if (STMT_VINFO_RELATED_STMT (stmt_vinfo))
1968 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
642fce57
RB
1969 if (TREE_CODE (gimple_op (stmt, 0)) == SSA_NAME)
1970 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, gimple_op (stmt, 0))
29764870
RB
1971 {
1972 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
1973 continue;
1974 use_vinfo = vinfo_for_stmt (use_stmt);
1975 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
1976 && STMT_VINFO_RELATED_STMT (use_vinfo))
1977 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
1978 if (!STMT_SLP_TYPE (use_vinfo)
1979 && (STMT_VINFO_RELEVANT (use_vinfo)
1980 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
1981 && !(gimple_code (use_stmt) == GIMPLE_PHI
1982 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
502f0263
RB
1983 {
1984 if (dump_enabled_p ())
1985 {
1986 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
1987 "def in non-SLP stmt: ");
1988 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
1989 }
1990 stype = hybrid;
1991 }
29764870 1992 }
642fce57 1993 }
ebfd146a 1994
502f0263
RB
1995 if (stype == hybrid
1996 && !HYBRID_SLP_STMT (stmt_vinfo))
b1af7da6
RB
1997 {
1998 if (dump_enabled_p ())
1999 {
2000 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2001 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2002 }
2003 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2004 }
ebfd146a 2005
642fce57 2006 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
90dd6e3d
RB
2007 if (child)
2008 vect_detect_hybrid_slp_stmts (child, i, stype);
642fce57 2009}
f2c74cc4 2010
642fce57 2011/* Helpers for vect_detect_hybrid_slp walking pattern stmt uses. */
ebfd146a 2012
642fce57
RB
2013static tree
2014vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2015{
2016 walk_stmt_info *wi = (walk_stmt_info *)data;
2017 struct loop *loopp = (struct loop *)wi->info;
2018
2019 if (wi->is_lhs)
2020 return NULL_TREE;
2021
2022 if (TREE_CODE (*tp) == SSA_NAME
2023 && !SSA_NAME_IS_DEFAULT_DEF (*tp))
2024 {
2025 gimple def_stmt = SSA_NAME_DEF_STMT (*tp);
2026 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt))
2027 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt)))
b1af7da6
RB
2028 {
2029 if (dump_enabled_p ())
2030 {
2031 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2032 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2033 }
2034 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2035 }
642fce57
RB
2036 }
2037
2038 return NULL_TREE;
ebfd146a
IR
2039}
2040
642fce57
RB
2041static tree
2042vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2043 walk_stmt_info *)
2044{
2045 /* If the stmt is in a SLP instance then this isn't a reason
2046 to mark use definitions in other SLP instances as hybrid. */
2047 if (STMT_SLP_TYPE (vinfo_for_stmt (gsi_stmt (*gsi))) != loop_vect)
2048 *handled = true;
2049 return NULL_TREE;
2050}
ebfd146a
IR
2051
2052/* Find stmts that must be both vectorized and SLPed. */
2053
2054void
2055vect_detect_hybrid_slp (loop_vec_info loop_vinfo)
2056{
2057 unsigned int i;
9771b263 2058 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
ebfd146a
IR
2059 slp_instance instance;
2060
73fbfcad 2061 if (dump_enabled_p ())
e645e942
TJ
2062 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2063 "\n");
ebfd146a 2064
642fce57
RB
2065 /* First walk all pattern stmt in the loop and mark defs of uses as
2066 hybrid because immediate uses in them are not recorded. */
2067 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2068 {
2069 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2070 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2071 gsi_next (&gsi))
2072 {
2073 gimple stmt = gsi_stmt (gsi);
2074 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2075 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2076 {
2077 walk_stmt_info wi;
2078 memset (&wi, 0, sizeof (wi));
2079 wi.info = LOOP_VINFO_LOOP (loop_vinfo);
2080 gimple_stmt_iterator gsi2
2081 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info));
2082 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2083 vect_detect_hybrid_slp_1, &wi);
2084 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2085 vect_detect_hybrid_slp_2,
2086 vect_detect_hybrid_slp_1, &wi);
2087 }
2088 }
2089 }
2090
2091 /* Then walk the SLP instance trees marking stmts with uses in
2092 non-SLP stmts as hybrid, also propagating hybrid down the
2093 SLP tree, collecting the above info on-the-fly. */
9771b263 2094 FOR_EACH_VEC_ELT (slp_instances, i, instance)
642fce57
RB
2095 {
2096 for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
2097 vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
2098 i, pure_slp);
2099 }
ebfd146a
IR
2100}
2101
a70d6342
IR
2102
2103/* Create and initialize a new bb_vec_info struct for BB, as well as
2104 stmt_vec_info structs for all the stmts in it. */
b8698a0f 2105
a70d6342
IR
2106static bb_vec_info
2107new_bb_vec_info (basic_block bb)
2108{
2109 bb_vec_info res = NULL;
2110 gimple_stmt_iterator gsi;
2111
2112 res = (bb_vec_info) xcalloc (1, sizeof (struct _bb_vec_info));
2113 BB_VINFO_BB (res) = bb;
2114
2115 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2116 {
2117 gimple stmt = gsi_stmt (gsi);
2118 gimple_set_uid (stmt, 0);
2119 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, NULL, res));
2120 }
2121
9771b263
DN
2122 BB_VINFO_GROUPED_STORES (res).create (10);
2123 BB_VINFO_SLP_INSTANCES (res).create (2);
c3e7ee41 2124 BB_VINFO_TARGET_COST_DATA (res) = init_cost (NULL);
a70d6342
IR
2125
2126 bb->aux = res;
2127 return res;
2128}
2129
2130
2131/* Free BB_VINFO struct, as well as all the stmt_vec_info structs of all the
2132 stmts in the basic block. */
2133
2134static void
2135destroy_bb_vec_info (bb_vec_info bb_vinfo)
2136{
9771b263 2137 vec<slp_instance> slp_instances;
c7e62a26 2138 slp_instance instance;
a70d6342
IR
2139 basic_block bb;
2140 gimple_stmt_iterator si;
c7e62a26 2141 unsigned i;
a70d6342
IR
2142
2143 if (!bb_vinfo)
2144 return;
2145
2146 bb = BB_VINFO_BB (bb_vinfo);
2147
2148 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
2149 {
2150 gimple stmt = gsi_stmt (si);
2151 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2152
2153 if (stmt_info)
2154 /* Free stmt_vec_info. */
2155 free_stmt_vec_info (stmt);
2156 }
2157
c716e67f 2158 vect_destroy_datarefs (NULL, bb_vinfo);
01be8516 2159 free_dependence_relations (BB_VINFO_DDRS (bb_vinfo));
9771b263 2160 BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
c7e62a26 2161 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
9771b263 2162 FOR_EACH_VEC_ELT (slp_instances, i, instance)
c7e62a26 2163 vect_free_slp_instance (instance);
9771b263 2164 BB_VINFO_SLP_INSTANCES (bb_vinfo).release ();
c3e7ee41 2165 destroy_cost_data (BB_VINFO_TARGET_COST_DATA (bb_vinfo));
a70d6342
IR
2166 free (bb_vinfo);
2167 bb->aux = NULL;
2168}
2169
2170
2171/* Analyze statements contained in SLP tree node after recursively analyzing
2172 the subtree. Return TRUE if the operations are supported. */
2173
2174static bool
a12e42fc 2175vect_slp_analyze_node_operations (slp_tree node)
a70d6342
IR
2176{
2177 bool dummy;
2178 int i;
2179 gimple stmt;
d755c7ef 2180 slp_tree child;
a70d6342
IR
2181
2182 if (!node)
2183 return true;
2184
9771b263 2185 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
a12e42fc 2186 if (!vect_slp_analyze_node_operations (child))
d092494c 2187 return false;
a70d6342 2188
9771b263 2189 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
a70d6342
IR
2190 {
2191 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2192 gcc_assert (stmt_info);
a12e42fc 2193 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
a70d6342
IR
2194
2195 if (!vect_analyze_stmt (stmt, &dummy, node))
a12e42fc 2196 return false;
a70d6342
IR
2197 }
2198
2199 return true;
2200}
2201
2202
ff802fa1 2203/* Analyze statements in SLP instances of the basic block. Return TRUE if the
a70d6342
IR
2204 operations are supported. */
2205
a12e42fc 2206bool
1a4b99c1 2207vect_slp_analyze_operations (vec<slp_instance> slp_instances, void *data)
a70d6342 2208{
a70d6342
IR
2209 slp_instance instance;
2210 int i;
2211
a12e42fc
RB
2212 if (dump_enabled_p ())
2213 dump_printf_loc (MSG_NOTE, vect_location,
2214 "=== vect_slp_analyze_operations ===\n");
2215
9771b263 2216 for (i = 0; slp_instances.iterate (i, &instance); )
a70d6342 2217 {
a12e42fc 2218 if (!vect_slp_analyze_node_operations (SLP_INSTANCE_TREE (instance)))
a70d6342 2219 {
a12e42fc
RB
2220 dump_printf_loc (MSG_NOTE, vect_location,
2221 "removing SLP instance operations starting from: ");
2222 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
2223 SLP_TREE_SCALAR_STMTS
2224 (SLP_INSTANCE_TREE (instance))[0], 0);
2225 vect_free_slp_instance (instance);
9771b263 2226 slp_instances.ordered_remove (i);
a70d6342
IR
2227 }
2228 else
1a4b99c1
RB
2229 {
2230 /* Compute the costs of the SLP instance. */
2231 vect_analyze_slp_cost (instance, data);
2232 i++;
2233 }
b8698a0f
L
2234 }
2235
9771b263 2236 if (!slp_instances.length ())
a70d6342
IR
2237 return false;
2238
2239 return true;
2240}
2241
6eddf228
RB
2242
2243/* Compute the scalar cost of the SLP node NODE and its children
2244 and return it. Do not account defs that are marked in LIFE and
2245 update LIFE according to uses of NODE. */
2246
2247static unsigned
292cba13 2248vect_bb_slp_scalar_cost (basic_block bb,
ff4c81cc 2249 slp_tree node, vec<bool, va_heap> *life)
6eddf228
RB
2250{
2251 unsigned scalar_cost = 0;
2252 unsigned i;
2253 gimple stmt;
2254 slp_tree child;
2255
2256 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
2257 {
2258 unsigned stmt_cost;
2259 ssa_op_iter op_iter;
2260 def_operand_p def_p;
2261 stmt_vec_info stmt_info;
2262
ff4c81cc 2263 if ((*life)[i])
6eddf228
RB
2264 continue;
2265
2266 /* If there is a non-vectorized use of the defs then the scalar
2267 stmt is kept live in which case we do not account it or any
2268 required defs in the SLP children in the scalar cost. This
2269 way we make the vectorization more costly when compared to
2270 the scalar cost. */
2271 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2272 {
2273 imm_use_iterator use_iter;
2274 gimple use_stmt;
2275 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
f30a0ba5
RB
2276 if (!is_gimple_debug (use_stmt)
2277 && (gimple_code (use_stmt) == GIMPLE_PHI
2278 || gimple_bb (use_stmt) != bb
2279 || !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (use_stmt))))
6eddf228 2280 {
ff4c81cc 2281 (*life)[i] = true;
6eddf228
RB
2282 BREAK_FROM_IMM_USE_STMT (use_iter);
2283 }
2284 }
ff4c81cc 2285 if ((*life)[i])
6eddf228
RB
2286 continue;
2287
2288 stmt_info = vinfo_for_stmt (stmt);
2289 if (STMT_VINFO_DATA_REF (stmt_info))
2290 {
2291 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2292 stmt_cost = vect_get_stmt_cost (scalar_load);
2293 else
2294 stmt_cost = vect_get_stmt_cost (scalar_store);
2295 }
2296 else
2297 stmt_cost = vect_get_stmt_cost (scalar_stmt);
2298
2299 scalar_cost += stmt_cost;
2300 }
2301
2302 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
90dd6e3d
RB
2303 if (child)
2304 scalar_cost += vect_bb_slp_scalar_cost (bb, child, life);
6eddf228
RB
2305
2306 return scalar_cost;
2307}
2308
69f11a13
IR
2309/* Check if vectorization of the basic block is profitable. */
2310
2311static bool
2312vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
2313{
9771b263 2314 vec<slp_instance> slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
69f11a13 2315 slp_instance instance;
1a4b99c1 2316 int i;
c3e7ee41 2317 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
92345349 2318 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
69f11a13
IR
2319
2320 /* Calculate scalar cost. */
6eddf228 2321 FOR_EACH_VEC_ELT (slp_instances, i, instance)
69f11a13 2322 {
00f96dc9 2323 auto_vec<bool, 20> life;
ff4c81cc 2324 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
292cba13
RB
2325 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2326 SLP_INSTANCE_TREE (instance),
ff4c81cc 2327 &life);
69f11a13
IR
2328 }
2329
c3e7ee41 2330 /* Complete the target-specific cost calculation. */
92345349
BS
2331 finish_cost (BB_VINFO_TARGET_COST_DATA (bb_vinfo), &vec_prologue_cost,
2332 &vec_inside_cost, &vec_epilogue_cost);
2333
2334 vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
c3e7ee41 2335
73fbfcad 2336 if (dump_enabled_p ())
69f11a13 2337 {
78c60e3d
SS
2338 dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
2339 dump_printf (MSG_NOTE, " Vector inside of basic block cost: %d\n",
2340 vec_inside_cost);
2341 dump_printf (MSG_NOTE, " Vector prologue cost: %d\n", vec_prologue_cost);
2342 dump_printf (MSG_NOTE, " Vector epilogue cost: %d\n", vec_epilogue_cost);
e645e942 2343 dump_printf (MSG_NOTE, " Scalar cost of basic block: %d\n", scalar_cost);
69f11a13
IR
2344 }
2345
2346 /* Vectorization is profitable if its cost is less than the cost of scalar
2347 version. */
2348 if (vec_outside_cost + vec_inside_cost >= scalar_cost)
2349 return false;
2350
2351 return true;
2352}
2353
2354/* Check if the basic block can be vectorized. */
a70d6342 2355
8e19f5a1
IR
2356static bb_vec_info
2357vect_slp_analyze_bb_1 (basic_block bb)
a70d6342
IR
2358{
2359 bb_vec_info bb_vinfo;
9771b263 2360 vec<slp_instance> slp_instances;
a70d6342 2361 slp_instance instance;
8e19f5a1 2362 int i;
777e1f09 2363 int min_vf = 2;
1428105c 2364 unsigned n_stmts = 0;
e4a707c4 2365
a70d6342
IR
2366 bb_vinfo = new_bb_vec_info (bb);
2367 if (!bb_vinfo)
2368 return NULL;
2369
1428105c 2370 if (!vect_analyze_data_refs (NULL, bb_vinfo, &min_vf, &n_stmts))
a70d6342 2371 {
73fbfcad 2372 if (dump_enabled_p ())
78c60e3d
SS
2373 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2374 "not vectorized: unhandled data-ref in basic "
2375 "block.\n");
b8698a0f 2376
a70d6342
IR
2377 destroy_bb_vec_info (bb_vinfo);
2378 return NULL;
2379 }
2380
fcac74a1 2381 if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
a70d6342 2382 {
73fbfcad 2383 if (dump_enabled_p ())
78c60e3d
SS
2384 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2385 "not vectorized: not enough data-refs in "
2386 "basic block.\n");
a70d6342
IR
2387
2388 destroy_bb_vec_info (bb_vinfo);
2389 return NULL;
2390 }
2391
5abe1e05
RB
2392 if (!vect_analyze_data_ref_accesses (NULL, bb_vinfo))
2393 {
2394 if (dump_enabled_p ())
2395 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2396 "not vectorized: unhandled data access in "
2397 "basic block.\n");
2398
2399 destroy_bb_vec_info (bb_vinfo);
2400 return NULL;
2401 }
2402
f5709183
IR
2403 vect_pattern_recog (NULL, bb_vinfo);
2404
a70d6342
IR
2405 if (!vect_analyze_data_refs_alignment (NULL, bb_vinfo))
2406 {
73fbfcad 2407 if (dump_enabled_p ())
78c60e3d
SS
2408 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2409 "not vectorized: bad data alignment in basic "
2410 "block.\n");
b8698a0f 2411
a70d6342
IR
2412 destroy_bb_vec_info (bb_vinfo);
2413 return NULL;
2414 }
b8698a0f 2415
a70d6342
IR
2416 /* Check the SLP opportunities in the basic block, analyze and build SLP
2417 trees. */
1428105c 2418 if (!vect_analyze_slp (NULL, bb_vinfo, n_stmts))
a70d6342 2419 {
73fbfcad 2420 if (dump_enabled_p ())
effb52da
RB
2421 {
2422 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2423 "Failed to SLP the basic block.\n");
2424 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2425 "not vectorized: failed to find SLP opportunities "
2426 "in basic block.\n");
2427 }
a70d6342
IR
2428
2429 destroy_bb_vec_info (bb_vinfo);
2430 return NULL;
2431 }
b8698a0f 2432
a70d6342
IR
2433 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
2434
2435 /* Mark all the statements that we want to vectorize as pure SLP and
2436 relevant. */
9771b263 2437 FOR_EACH_VEC_ELT (slp_instances, i, instance)
a70d6342
IR
2438 {
2439 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2440 vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
b8698a0f 2441 }
a70d6342 2442
5e6667b2
RB
2443 /* Mark all the statements that we do not want to vectorize. */
2444 for (gimple_stmt_iterator gsi = gsi_start_bb (BB_VINFO_BB (bb_vinfo));
2445 !gsi_end_p (gsi); gsi_next (&gsi))
2446 {
2447 stmt_vec_info vinfo = vinfo_for_stmt (gsi_stmt (gsi));
2448 if (STMT_SLP_TYPE (vinfo) != pure_slp)
2449 STMT_VINFO_VECTORIZABLE (vinfo) = false;
2450 }
2451
2452 /* Analyze dependences. At this point all stmts not participating in
2453 vectorization have to be marked. Dependence analysis assumes
2454 that we either vectorize all SLP instances or none at all. */
2455 if (!vect_slp_analyze_data_ref_dependences (bb_vinfo))
2456 {
2457 if (dump_enabled_p ())
2458 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2459 "not vectorized: unhandled data dependence "
2460 "in basic block.\n");
2461
2462 destroy_bb_vec_info (bb_vinfo);
2463 return NULL;
2464 }
2465
c3e7ee41 2466 if (!vect_verify_datarefs_alignment (NULL, bb_vinfo))
38eec4c6 2467 {
73fbfcad 2468 if (dump_enabled_p ())
78c60e3d
SS
2469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2470 "not vectorized: unsupported alignment in basic "
2471 "block.\n");
38eec4c6
UW
2472 destroy_bb_vec_info (bb_vinfo);
2473 return NULL;
2474 }
2475
1a4b99c1
RB
2476 if (!vect_slp_analyze_operations (BB_VINFO_SLP_INSTANCES (bb_vinfo),
2477 BB_VINFO_TARGET_COST_DATA (bb_vinfo)))
a70d6342 2478 {
73fbfcad 2479 if (dump_enabled_p ())
e645e942 2480 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d 2481 "not vectorized: bad operation in basic block.\n");
a70d6342
IR
2482
2483 destroy_bb_vec_info (bb_vinfo);
2484 return NULL;
2485 }
2486
69f11a13 2487 /* Cost model: check if the vectorization is worthwhile. */
8b5e1202 2488 if (!unlimited_cost_model (NULL)
69f11a13
IR
2489 && !vect_bb_vectorization_profitable_p (bb_vinfo))
2490 {
73fbfcad 2491 if (dump_enabled_p ())
78c60e3d
SS
2492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2493 "not vectorized: vectorization is not "
2494 "profitable.\n");
69f11a13
IR
2495
2496 destroy_bb_vec_info (bb_vinfo);
2497 return NULL;
2498 }
2499
73fbfcad 2500 if (dump_enabled_p ())
78c60e3d
SS
2501 dump_printf_loc (MSG_NOTE, vect_location,
2502 "Basic block will be vectorized using SLP\n");
a70d6342
IR
2503
2504 return bb_vinfo;
2505}
2506
2507
8e19f5a1
IR
2508bb_vec_info
2509vect_slp_analyze_bb (basic_block bb)
2510{
2511 bb_vec_info bb_vinfo;
2512 int insns = 0;
2513 gimple_stmt_iterator gsi;
2514 unsigned int vector_sizes;
2515
73fbfcad 2516 if (dump_enabled_p ())
78c60e3d 2517 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n");
8e19f5a1
IR
2518
2519 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2520 {
2521 gimple stmt = gsi_stmt (gsi);
2522 if (!is_gimple_debug (stmt)
2523 && !gimple_nop_p (stmt)
2524 && gimple_code (stmt) != GIMPLE_LABEL)
2525 insns++;
2526 }
2527
2528 if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2529 {
73fbfcad 2530 if (dump_enabled_p ())
78c60e3d
SS
2531 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2532 "not vectorized: too many instructions in "
2533 "basic block.\n");
8e19f5a1
IR
2534
2535 return NULL;
2536 }
2537
2538 /* Autodetect first vector size we try. */
2539 current_vector_size = 0;
2540 vector_sizes = targetm.vectorize.autovectorize_vector_sizes ();
2541
2542 while (1)
2543 {
2544 bb_vinfo = vect_slp_analyze_bb_1 (bb);
2545 if (bb_vinfo)
2546 return bb_vinfo;
2547
2548 destroy_bb_vec_info (bb_vinfo);
2549
2550 vector_sizes &= ~current_vector_size;
2551 if (vector_sizes == 0
2552 || current_vector_size == 0)
2553 return NULL;
2554
2555 /* Try the next biggest vector size. */
2556 current_vector_size = 1 << floor_log2 (vector_sizes);
73fbfcad 2557 if (dump_enabled_p ())
78c60e3d
SS
2558 dump_printf_loc (MSG_NOTE, vect_location,
2559 "***** Re-trying analysis with "
2560 "vector size %d\n", current_vector_size);
8e19f5a1
IR
2561 }
2562}
2563
2564
b8698a0f
L
2565/* For constant and loop invariant defs of SLP_NODE this function returns
2566 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
d59dc888
IR
2567 OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
2568 scalar stmts. NUMBER_OF_VECTORS is the number of vector defs to create.
b5aeb3bb
IR
2569 REDUC_INDEX is the index of the reduction operand in the statements, unless
2570 it is -1. */
ebfd146a
IR
2571
2572static void
9dc3f7de 2573vect_get_constant_vectors (tree op, slp_tree slp_node,
9771b263 2574 vec<tree> *vec_oprnds,
b5aeb3bb
IR
2575 unsigned int op_num, unsigned int number_of_vectors,
2576 int reduc_index)
ebfd146a 2577{
9771b263
DN
2578 vec<gimple> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2579 gimple stmt = stmts[0];
ebfd146a 2580 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
d2a12ae7 2581 unsigned nunits;
ebfd146a 2582 tree vec_cst;
d2a12ae7
RG
2583 tree *elts;
2584 unsigned j, number_of_places_left_in_vector;
ebfd146a 2585 tree vector_type;
9dc3f7de 2586 tree vop;
9771b263 2587 int group_size = stmts.length ();
ebfd146a 2588 unsigned int vec_num, i;
d2a12ae7 2589 unsigned number_of_copies = 1;
9771b263
DN
2590 vec<tree> voprnds;
2591 voprnds.create (number_of_vectors);
ebfd146a 2592 bool constant_p, is_store;
b5aeb3bb 2593 tree neutral_op = NULL;
bac430c9 2594 enum tree_code code = gimple_expr_code (stmt);
0e93a64e
IR
2595 gimple def_stmt;
2596 struct loop *loop;
13396b6e 2597 gimple_seq ctor_seq = NULL;
b5aeb3bb 2598
afbe6325
RB
2599 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
2600 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
2601
29ed4920
IR
2602 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
2603 && reduc_index != -1)
b5aeb3bb 2604 {
afbe6325
RB
2605 op_num = reduc_index;
2606 op = gimple_op (stmt, op_num + 1);
b5aeb3bb 2607 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
ff802fa1 2608 we need either neutral operands or the original operands. See
b5aeb3bb
IR
2609 get_initial_def_for_reduction() for details. */
2610 switch (code)
2611 {
2612 case WIDEN_SUM_EXPR:
2613 case DOT_PROD_EXPR:
afbe6325 2614 case SAD_EXPR:
b5aeb3bb
IR
2615 case PLUS_EXPR:
2616 case MINUS_EXPR:
2617 case BIT_IOR_EXPR:
2618 case BIT_XOR_EXPR:
2619 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2620 neutral_op = build_real (TREE_TYPE (op), dconst0);
2621 else
2622 neutral_op = build_int_cst (TREE_TYPE (op), 0);
2623
2624 break;
2625
2626 case MULT_EXPR:
b5aeb3bb
IR
2627 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (op)))
2628 neutral_op = build_real (TREE_TYPE (op), dconst1);
2629 else
2630 neutral_op = build_int_cst (TREE_TYPE (op), 1);
2631
2632 break;
2633
c1e822d5
IR
2634 case BIT_AND_EXPR:
2635 neutral_op = build_int_cst (TREE_TYPE (op), -1);
2636 break;
2637
f1485e5b
RB
2638 /* For MIN/MAX we don't have an easy neutral operand but
2639 the initial values can be used fine here. Only for
2640 a reduction chain we have to force a neutral element. */
2641 case MAX_EXPR:
2642 case MIN_EXPR:
2643 if (!GROUP_FIRST_ELEMENT (stmt_vinfo))
2644 neutral_op = NULL;
2645 else
2646 {
2647 def_stmt = SSA_NAME_DEF_STMT (op);
2648 loop = (gimple_bb (stmt))->loop_father;
2649 neutral_op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2650 loop_preheader_edge (loop));
2651 }
2652 break;
0e93a64e 2653
b5aeb3bb 2654 default:
afbe6325 2655 gcc_assert (!GROUP_FIRST_ELEMENT (stmt_vinfo));
0e93a64e 2656 neutral_op = NULL;
b5aeb3bb
IR
2657 }
2658 }
ebfd146a
IR
2659
2660 if (STMT_VINFO_DATA_REF (stmt_vinfo))
2661 {
2662 is_store = true;
2663 op = gimple_assign_rhs1 (stmt);
2664 }
2665 else
9dc3f7de
IR
2666 is_store = false;
2667
2668 gcc_assert (op);
ebfd146a
IR
2669
2670 if (CONSTANT_CLASS_P (op))
d59dc888 2671 constant_p = true;
ebfd146a 2672 else
d59dc888
IR
2673 constant_p = false;
2674
ebfd146a 2675 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
b8698a0f 2676 created vectors. It is greater than 1 if unrolling is performed.
ebfd146a
IR
2677
2678 For example, we have two scalar operands, s1 and s2 (e.g., group of
2679 strided accesses of size two), while NUNITS is four (i.e., four scalars
f7e531cf
IR
2680 of this type can be packed in a vector). The output vector will contain
2681 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
ebfd146a
IR
2682 will be 2).
2683
b8698a0f 2684 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors
ebfd146a
IR
2685 containing the operands.
2686
2687 For example, NUNITS is four as before, and the group size is 8
f7e531cf 2688 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
ebfd146a 2689 {s5, s6, s7, s8}. */
b8698a0f 2690
14a61437 2691 number_of_copies = nunits * number_of_vectors / group_size;
ebfd146a
IR
2692
2693 number_of_places_left_in_vector = nunits;
d2a12ae7 2694 elts = XALLOCAVEC (tree, nunits);
90dd6e3d 2695 bool place_after_defs = false;
ebfd146a
IR
2696 for (j = 0; j < number_of_copies; j++)
2697 {
9771b263 2698 for (i = group_size - 1; stmts.iterate (i, &stmt); i--)
ebfd146a
IR
2699 {
2700 if (is_store)
2701 op = gimple_assign_rhs1 (stmt);
bac430c9 2702 else
f7e531cf 2703 {
bac430c9 2704 switch (code)
f7e531cf 2705 {
bac430c9
IR
2706 case COND_EXPR:
2707 if (op_num == 0 || op_num == 1)
2708 {
2709 tree cond = gimple_assign_rhs1 (stmt);
2710 op = TREE_OPERAND (cond, op_num);
2711 }
2712 else
2713 {
2714 if (op_num == 2)
2715 op = gimple_assign_rhs2 (stmt);
2716 else
2717 op = gimple_assign_rhs3 (stmt);
2718 }
2719 break;
2720
2721 case CALL_EXPR:
2722 op = gimple_call_arg (stmt, op_num);
2723 break;
2724
b84b294a
JJ
2725 case LSHIFT_EXPR:
2726 case RSHIFT_EXPR:
2727 case LROTATE_EXPR:
2728 case RROTATE_EXPR:
2729 op = gimple_op (stmt, op_num + 1);
2730 /* Unlike the other binary operators, shifts/rotates have
2731 the shift count being int, instead of the same type as
2732 the lhs, so make sure the scalar is the right type if
2733 we are dealing with vectors of
2734 long long/long/short/char. */
793d9a16 2735 if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
b84b294a
JJ
2736 op = fold_convert (TREE_TYPE (vector_type), op);
2737 break;
2738
bac430c9
IR
2739 default:
2740 op = gimple_op (stmt, op_num + 1);
b84b294a 2741 break;
f7e531cf
IR
2742 }
2743 }
b8698a0f 2744
b5aeb3bb
IR
2745 if (reduc_index != -1)
2746 {
0e93a64e
IR
2747 loop = (gimple_bb (stmt))->loop_father;
2748 def_stmt = SSA_NAME_DEF_STMT (op);
b5aeb3bb
IR
2749
2750 gcc_assert (loop);
b010117a
IR
2751
2752 /* Get the def before the loop. In reduction chain we have only
2753 one initial value. */
2754 if ((j != (number_of_copies - 1)
2755 || (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))
2756 && i != 0))
2757 && neutral_op)
b5aeb3bb 2758 op = neutral_op;
b010117a
IR
2759 else
2760 op = PHI_ARG_DEF_FROM_EDGE (def_stmt,
2761 loop_preheader_edge (loop));
b5aeb3bb
IR
2762 }
2763
ebfd146a 2764 /* Create 'vect_ = {op0,op1,...,opn}'. */
ebfd146a 2765 number_of_places_left_in_vector--;
90dd6e3d 2766 tree orig_op = op;
13396b6e 2767 if (!types_compatible_p (TREE_TYPE (vector_type), TREE_TYPE (op)))
50eeef09 2768 {
793d9a16 2769 if (CONSTANT_CLASS_P (op))
13396b6e
JJ
2770 {
2771 op = fold_unary (VIEW_CONVERT_EXPR,
2772 TREE_TYPE (vector_type), op);
2773 gcc_assert (op && CONSTANT_CLASS_P (op));
2774 }
2775 else
2776 {
b731b390 2777 tree new_temp = make_ssa_name (TREE_TYPE (vector_type));
13396b6e 2778 gimple init_stmt;
0d0e4a03 2779 op = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vector_type), op);
13396b6e 2780 init_stmt
0d0e4a03 2781 = gimple_build_assign (new_temp, VIEW_CONVERT_EXPR, op);
13396b6e
JJ
2782 gimple_seq_add_stmt (&ctor_seq, init_stmt);
2783 op = new_temp;
2784 }
50eeef09 2785 }
d2a12ae7 2786 elts[number_of_places_left_in_vector] = op;
793d9a16
RB
2787 if (!CONSTANT_CLASS_P (op))
2788 constant_p = false;
90dd6e3d
RB
2789 if (TREE_CODE (orig_op) == SSA_NAME
2790 && !SSA_NAME_IS_DEFAULT_DEF (orig_op)
2791 && STMT_VINFO_BB_VINFO (stmt_vinfo)
2792 && (STMT_VINFO_BB_VINFO (stmt_vinfo)->bb
2793 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
2794 place_after_defs = true;
ebfd146a
IR
2795
2796 if (number_of_places_left_in_vector == 0)
2797 {
2798 number_of_places_left_in_vector = nunits;
2799
2800 if (constant_p)
d2a12ae7 2801 vec_cst = build_vector (vector_type, elts);
ebfd146a 2802 else
d2a12ae7 2803 {
9771b263 2804 vec<constructor_elt, va_gc> *v;
d2a12ae7 2805 unsigned k;
9771b263 2806 vec_alloc (v, nunits);
d2a12ae7
RG
2807 for (k = 0; k < nunits; ++k)
2808 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]);
2809 vec_cst = build_constructor (vector_type, v);
2810 }
90dd6e3d
RB
2811 tree init;
2812 gimple_stmt_iterator gsi;
2813 if (place_after_defs)
2814 {
2815 gsi = gsi_for_stmt
2816 (vect_find_last_scalar_stmt_in_slp (slp_node));
2817 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi);
2818 }
2819 else
2820 init = vect_init_vector (stmt, vec_cst, vector_type, NULL);
13396b6e
JJ
2821 if (ctor_seq != NULL)
2822 {
90dd6e3d 2823 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
13396b6e
JJ
2824 gsi_insert_seq_before_without_update (&gsi, ctor_seq,
2825 GSI_SAME_STMT);
2826 ctor_seq = NULL;
2827 }
90dd6e3d
RB
2828 voprnds.quick_push (init);
2829 place_after_defs = false;
ebfd146a
IR
2830 }
2831 }
2832 }
2833
b8698a0f 2834 /* Since the vectors are created in the reverse order, we should invert
ebfd146a 2835 them. */
9771b263 2836 vec_num = voprnds.length ();
d2a12ae7 2837 for (j = vec_num; j != 0; j--)
ebfd146a 2838 {
9771b263
DN
2839 vop = voprnds[j - 1];
2840 vec_oprnds->quick_push (vop);
ebfd146a
IR
2841 }
2842
9771b263 2843 voprnds.release ();
ebfd146a
IR
2844
2845 /* In case that VF is greater than the unrolling factor needed for the SLP
b8698a0f
L
2846 group of stmts, NUMBER_OF_VECTORS to be created is greater than
2847 NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
ebfd146a 2848 to replicate the vectors. */
9771b263 2849 while (number_of_vectors > vec_oprnds->length ())
ebfd146a 2850 {
b5aeb3bb
IR
2851 tree neutral_vec = NULL;
2852
2853 if (neutral_op)
2854 {
2855 if (!neutral_vec)
b9acc9f1 2856 neutral_vec = build_vector_from_val (vector_type, neutral_op);
b5aeb3bb 2857
9771b263 2858 vec_oprnds->quick_push (neutral_vec);
b5aeb3bb
IR
2859 }
2860 else
2861 {
9771b263
DN
2862 for (i = 0; vec_oprnds->iterate (i, &vop) && i < vec_num; i++)
2863 vec_oprnds->quick_push (vop);
b5aeb3bb 2864 }
ebfd146a
IR
2865 }
2866}
2867
2868
2869/* Get vectorized definitions from SLP_NODE that contains corresponding
2870 vectorized def-stmts. */
2871
2872static void
9771b263 2873vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
ebfd146a
IR
2874{
2875 tree vec_oprnd;
2876 gimple vec_def_stmt;
2877 unsigned int i;
2878
9771b263 2879 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
ebfd146a 2880
9771b263 2881 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt)
ebfd146a
IR
2882 {
2883 gcc_assert (vec_def_stmt);
2884 vec_oprnd = gimple_get_lhs (vec_def_stmt);
9771b263 2885 vec_oprnds->quick_push (vec_oprnd);
ebfd146a
IR
2886 }
2887}
2888
2889
b8698a0f
L
2890/* Get vectorized definitions for SLP_NODE.
2891 If the scalar definitions are loop invariants or constants, collect them and
ebfd146a
IR
2892 call vect_get_constant_vectors() to create vector stmts.
2893 Otherwise, the def-stmts must be already vectorized and the vectorized stmts
d092494c
IR
2894 must be stored in the corresponding child of SLP_NODE, and we call
2895 vect_get_slp_vect_defs () to retrieve them. */
b8698a0f 2896
ebfd146a 2897void
9771b263 2898vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
37b5ec8f 2899 vec<vec<tree> > *vec_oprnds, int reduc_index)
ebfd146a 2900{
e44978dc 2901 gimple first_stmt;
d092494c
IR
2902 int number_of_vects = 0, i;
2903 unsigned int child_index = 0;
b8698a0f 2904 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
d092494c 2905 slp_tree child = NULL;
37b5ec8f 2906 vec<tree> vec_defs;
e44978dc 2907 tree oprnd;
d092494c 2908 bool vectorized_defs;
ebfd146a 2909
9771b263
DN
2910 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0];
2911 FOR_EACH_VEC_ELT (ops, i, oprnd)
ebfd146a 2912 {
d092494c
IR
2913 /* For each operand we check if it has vectorized definitions in a child
2914 node or we need to create them (for invariants and constants). We
2915 check if the LHS of the first stmt of the next child matches OPRND.
2916 If it does, we found the correct child. Otherwise, we call
2917 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
2918 to check this child node for the next operand. */
2919 vectorized_defs = false;
9771b263 2920 if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
ebfd146a 2921 {
01d8bf07 2922 child = SLP_TREE_CHILDREN (slp_node)[child_index];
d092494c 2923
e44978dc 2924 /* We have to check both pattern and original def, if available. */
90dd6e3d 2925 if (child)
e44978dc 2926 {
90dd6e3d
RB
2927 gimple first_def = SLP_TREE_SCALAR_STMTS (child)[0];
2928 gimple related
2929 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
2930
2931 if (operand_equal_p (oprnd, gimple_get_lhs (first_def), 0)
2932 || (related
2933 && operand_equal_p (oprnd, gimple_get_lhs (related), 0)))
2934 {
2935 /* The number of vector defs is determined by the number of
2936 vector statements in the node from which we get those
2937 statements. */
2938 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
2939 vectorized_defs = true;
2940 child_index++;
2941 }
e44978dc 2942 }
90dd6e3d
RB
2943 else
2944 child_index++;
d092494c 2945 }
ebfd146a 2946
d092494c
IR
2947 if (!vectorized_defs)
2948 {
2949 if (i == 0)
2950 {
2951 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
2952 /* Number of vector stmts was calculated according to LHS in
2953 vect_schedule_slp_instance (), fix it by replacing LHS with
2954 RHS, if necessary. See vect_get_smallest_scalar_type () for
2955 details. */
2956 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit,
2957 &rhs_size_unit);
2958 if (rhs_size_unit != lhs_size_unit)
2959 {
2960 number_of_vects *= rhs_size_unit;
2961 number_of_vects /= lhs_size_unit;
2962 }
2963 }
2964 }
b5aeb3bb 2965
d092494c 2966 /* Allocate memory for vectorized defs. */
37b5ec8f
JJ
2967 vec_defs = vNULL;
2968 vec_defs.create (number_of_vects);
ebfd146a 2969
d092494c
IR
2970 /* For reduction defs we call vect_get_constant_vectors (), since we are
2971 looking for initial loop invariant values. */
2972 if (vectorized_defs && reduc_index == -1)
2973 /* The defs are already vectorized. */
37b5ec8f 2974 vect_get_slp_vect_defs (child, &vec_defs);
d092494c
IR
2975 else
2976 /* Build vectors from scalar defs. */
37b5ec8f 2977 vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
d092494c 2978 number_of_vects, reduc_index);
ebfd146a 2979
37b5ec8f 2980 vec_oprnds->quick_push (vec_defs);
ebfd146a 2981
d092494c
IR
2982 /* For reductions, we only need initial values. */
2983 if (reduc_index != -1)
2984 return;
2985 }
ebfd146a
IR
2986}
2987
a70d6342 2988
b8698a0f 2989/* Create NCOPIES permutation statements using the mask MASK_BYTES (by
ebfd146a
IR
2990 building a vector of type MASK_TYPE from it) and two input vectors placed in
2991 DR_CHAIN at FIRST_VEC_INDX and SECOND_VEC_INDX for the first copy and
2992 shifting by STRIDE elements of DR_CHAIN for every copy.
2993 (STRIDE is the number of vectorized stmts for NODE divided by the number of
b8698a0f 2994 copies).
ebfd146a
IR
2995 VECT_STMTS_COUNTER specifies the index in the vectorized stmts of NODE, where
2996 the created stmts must be inserted. */
2997
2998static inline void
502f0263 2999vect_create_mask_and_perm (gimple stmt,
faf63e39 3000 tree mask, int first_vec_indx, int second_vec_indx,
b8698a0f 3001 gimple_stmt_iterator *gsi, slp_tree node,
9771b263 3002 tree vectype, vec<tree> dr_chain,
ebfd146a
IR
3003 int ncopies, int vect_stmts_counter)
3004{
faf63e39 3005 tree perm_dest;
ebfd146a 3006 gimple perm_stmt = NULL;
0f900dfa 3007 int i, stride;
ebfd146a 3008 tree first_vec, second_vec, data_ref;
ebfd146a 3009
ebfd146a 3010 stride = SLP_TREE_NUMBER_OF_VEC_STMTS (node) / ncopies;
ebfd146a 3011
b8698a0f 3012 /* Initialize the vect stmts of NODE to properly insert the generated
ebfd146a 3013 stmts later. */
9771b263 3014 for (i = SLP_TREE_VEC_STMTS (node).length ();
ebfd146a 3015 i < (int) SLP_TREE_NUMBER_OF_VEC_STMTS (node); i++)
9771b263 3016 SLP_TREE_VEC_STMTS (node).quick_push (NULL);
ebfd146a
IR
3017
3018 perm_dest = vect_create_destination_var (gimple_assign_lhs (stmt), vectype);
3019 for (i = 0; i < ncopies; i++)
3020 {
9771b263
DN
3021 first_vec = dr_chain[first_vec_indx];
3022 second_vec = dr_chain[second_vec_indx];
ebfd146a 3023
ebfd146a 3024 /* Generate the permute statement. */
0d0e4a03
JJ
3025 perm_stmt = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3026 first_vec, second_vec, mask);
ebfd146a 3027 data_ref = make_ssa_name (perm_dest, perm_stmt);
2635892a 3028 gimple_set_lhs (perm_stmt, data_ref);
ebfd146a 3029 vect_finish_stmt_generation (stmt, perm_stmt, gsi);
ebfd146a 3030
b8698a0f 3031 /* Store the vector statement in NODE. */
9771b263 3032 SLP_TREE_VEC_STMTS (node)[stride * i + vect_stmts_counter] = perm_stmt;
ebfd146a
IR
3033
3034 first_vec_indx += stride;
3035 second_vec_indx += stride;
3036 }
ebfd146a
IR
3037}
3038
3039
b8698a0f 3040/* Given FIRST_MASK_ELEMENT - the mask element in element representation,
ebfd146a 3041 return in CURRENT_MASK_ELEMENT its equivalent in target specific
ff802fa1 3042 representation. Check that the mask is valid and return FALSE if not.
ebfd146a
IR
3043 Return TRUE in NEED_NEXT_VECTOR if the permutation requires to move to
3044 the next vector, i.e., the current first vector is not needed. */
b8698a0f 3045
ebfd146a 3046static bool
b8698a0f 3047vect_get_mask_element (gimple stmt, int first_mask_element, int m,
ebfd146a 3048 int mask_nunits, bool only_one_vec, int index,
22e4dee7 3049 unsigned char *mask, int *current_mask_element,
694a4f61
IR
3050 bool *need_next_vector, int *number_of_mask_fixes,
3051 bool *mask_fixed, bool *needs_first_vector)
ebfd146a
IR
3052{
3053 int i;
ebfd146a
IR
3054
3055 /* Convert to target specific representation. */
3056 *current_mask_element = first_mask_element + m;
3057 /* Adjust the value in case it's a mask for second and third vectors. */
694a4f61 3058 *current_mask_element -= mask_nunits * (*number_of_mask_fixes - 1);
ebfd146a 3059
c8047699
RB
3060 if (*current_mask_element < 0)
3061 {
3062 if (dump_enabled_p ())
3063 {
3064 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3065 "permutation requires past vector ");
3066 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
3067 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
3068 }
3069 return false;
3070 }
3071
ebfd146a 3072 if (*current_mask_element < mask_nunits)
694a4f61 3073 *needs_first_vector = true;
ebfd146a
IR
3074
3075 /* We have only one input vector to permute but the mask accesses values in
3076 the next vector as well. */
3077 if (only_one_vec && *current_mask_element >= mask_nunits)
3078 {
73fbfcad 3079 if (dump_enabled_p ())
ebfd146a 3080 {
e645e942 3081 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
78c60e3d
SS
3082 "permutation requires at least two vectors ");
3083 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3084 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
3085 }
3086
3087 return false;
3088 }
3089
3090 /* The mask requires the next vector. */
496d3346 3091 while (*current_mask_element >= mask_nunits * 2)
ebfd146a 3092 {
694a4f61 3093 if (*needs_first_vector || *mask_fixed)
ebfd146a
IR
3094 {
3095 /* We either need the first vector too or have already moved to the
b8698a0f 3096 next vector. In both cases, this permutation needs three
ebfd146a 3097 vectors. */
73fbfcad 3098 if (dump_enabled_p ())
ebfd146a 3099 {
78c60e3d
SS
3100 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3101 "permutation requires at "
3102 "least three vectors ");
3103 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3104 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a
IR
3105 }
3106
3107 return false;
3108 }
3109
3110 /* We move to the next vector, dropping the first one and working with
3111 the second and the third - we need to adjust the values of the mask
3112 accordingly. */
694a4f61 3113 *current_mask_element -= mask_nunits * *number_of_mask_fixes;
ebfd146a
IR
3114
3115 for (i = 0; i < index; i++)
694a4f61 3116 mask[i] -= mask_nunits * *number_of_mask_fixes;
ebfd146a 3117
694a4f61
IR
3118 (*number_of_mask_fixes)++;
3119 *mask_fixed = true;
ebfd146a
IR
3120 }
3121
694a4f61 3122 *need_next_vector = *mask_fixed;
ebfd146a
IR
3123
3124 /* This was the last element of this mask. Start a new one. */
3125 if (index == mask_nunits - 1)
3126 {
694a4f61
IR
3127 *number_of_mask_fixes = 1;
3128 *mask_fixed = false;
3129 *needs_first_vector = false;
ebfd146a
IR
3130 }
3131
3132 return true;
3133}
3134
3135
3136/* Generate vector permute statements from a list of loads in DR_CHAIN.
3137 If ANALYZE_ONLY is TRUE, only check that it is possible to create valid
01d8bf07
RB
3138 permute statements for the SLP node NODE of the SLP instance
3139 SLP_NODE_INSTANCE. */
3140
ebfd146a 3141bool
01d8bf07 3142vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
ebfd146a
IR
3143 gimple_stmt_iterator *gsi, int vf,
3144 slp_instance slp_node_instance, bool analyze_only)
3145{
01d8bf07 3146 gimple stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3147 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
3148 tree mask_element_type = NULL_TREE, mask_type;
502f0263 3149 int i, j, k, nunits, vec_index = 0;
2635892a 3150 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3151 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3152 int first_mask_element;
22e4dee7
RH
3153 int index, unroll_factor, current_mask_element, ncopies;
3154 unsigned char *mask;
ebfd146a
IR
3155 bool only_one_vec = false, need_next_vector = false;
3156 int first_vec_index, second_vec_index, orig_vec_stmts_num, vect_stmts_counter;
694a4f61
IR
3157 int number_of_mask_fixes = 1;
3158 bool mask_fixed = false;
3159 bool needs_first_vector = false;
ef4bddc2 3160 machine_mode mode;
ebfd146a 3161
91ff1504
RB
3162 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3163 return false;
3164
3165 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info));
3166
22e4dee7
RH
3167 mode = TYPE_MODE (vectype);
3168
3169 if (!can_vec_perm_p (mode, false, NULL))
ebfd146a 3170 {
73fbfcad 3171 if (dump_enabled_p ())
ebfd146a 3172 {
78c60e3d
SS
3173 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3174 "no vect permute for ");
3175 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
e645e942 3176 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
ebfd146a 3177 }
2635892a 3178 return false;
ebfd146a
IR
3179 }
3180
2635892a
RH
3181 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3182 same size as the vector element being permuted. */
96f9265a
RG
3183 mask_element_type = lang_hooks.types.type_for_mode
3184 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))), 1);
ebfd146a 3185 mask_type = get_vectype_for_scalar_type (mask_element_type);
ebfd146a 3186 nunits = TYPE_VECTOR_SUBPARTS (vectype);
22e4dee7 3187 mask = XALLOCAVEC (unsigned char, nunits);
ebfd146a
IR
3188 unroll_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
3189
3190 /* The number of vector stmts to generate based only on SLP_NODE_INSTANCE
3191 unrolling factor. */
91ff1504
RB
3192 orig_vec_stmts_num
3193 = (STMT_VINFO_GROUP_SIZE (stmt_info)
3194 * SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance)
3195 + nunits - 1) / nunits;
ebfd146a
IR
3196 if (orig_vec_stmts_num == 1)
3197 only_one_vec = true;
3198
b8698a0f 3199 /* Number of copies is determined by the final vectorization factor
ebfd146a 3200 relatively to SLP_NODE_INSTANCE unrolling factor. */
b8698a0f 3201 ncopies = vf / SLP_INSTANCE_UNROLLING_FACTOR (slp_node_instance);
ebfd146a 3202
b8698a0f
L
3203 /* Generate permutation masks for every NODE. Number of masks for each NODE
3204 is equal to GROUP_SIZE.
3205 E.g., we have a group of three nodes with three loads from the same
3206 location in each node, and the vector size is 4. I.e., we have a
3207 a0b0c0a1b1c1... sequence and we need to create the following vectors:
ebfd146a
IR
3208 for a's: a0a0a0a1 a1a1a2a2 a2a3a3a3
3209 for b's: b0b0b0b1 b1b1b2b2 b2b3b3b3
3210 ...
3211
2635892a 3212 The masks for a's should be: {0,0,0,3} {3,3,6,6} {6,9,9,9}.
b8698a0f 3213 The last mask is illegal since we assume two operands for permute
ff802fa1
IR
3214 operation, and the mask element values can't be outside that range.
3215 Hence, the last mask must be converted into {2,5,5,5}.
b8698a0f 3216 For the first two permutations we need the first and the second input
ebfd146a 3217 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
b8698a0f 3218 we need the second and the third vectors: {b1,c1,a2,b2} and
ebfd146a
IR
3219 {c2,a3,b3,c3}. */
3220
21c0a521 3221 {
ebfd146a
IR
3222 index = 0;
3223 vect_stmts_counter = 0;
3224 vec_index = 0;
3225 first_vec_index = vec_index++;
3226 if (only_one_vec)
3227 second_vec_index = first_vec_index;
3228 else
3229 second_vec_index = vec_index++;
3230
3231 for (j = 0; j < unroll_factor; j++)
3232 {
3233 for (k = 0; k < group_size; k++)
3234 {
01d8bf07 3235 i = SLP_TREE_LOAD_PERMUTATION (node)[k];
9b999e8c 3236 first_mask_element = i + j * STMT_VINFO_GROUP_SIZE (stmt_info);
2635892a
RH
3237 if (!vect_get_mask_element (stmt, first_mask_element, 0,
3238 nunits, only_one_vec, index,
3239 mask, &current_mask_element,
3240 &need_next_vector,
3241 &number_of_mask_fixes, &mask_fixed,
3242 &needs_first_vector))
3243 return false;
c8047699
RB
3244 gcc_assert (current_mask_element >= 0
3245 && current_mask_element < 2 * nunits);
2635892a 3246 mask[index++] = current_mask_element;
ebfd146a 3247
2635892a 3248 if (index == nunits)
ebfd146a 3249 {
01d8bf07 3250 index = 0;
22e4dee7
RH
3251 if (!can_vec_perm_p (mode, false, mask))
3252 {
73fbfcad 3253 if (dump_enabled_p ())
22e4dee7 3254 {
78c60e3d
SS
3255 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3256 vect_location,
3257 "unsupported vect permute { ");
22e4dee7 3258 for (i = 0; i < nunits; ++i)
78c60e3d
SS
3259 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ",
3260 mask[i]);
3261 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
22e4dee7
RH
3262 }
3263 return false;
3264 }
3265
ebfd146a
IR
3266 if (!analyze_only)
3267 {
01d8bf07
RB
3268 int l;
3269 tree mask_vec, *mask_elts;
3270 mask_elts = XALLOCAVEC (tree, nunits);
3271 for (l = 0; l < nunits; ++l)
3272 mask_elts[l] = build_int_cst (mask_element_type,
3273 mask[l]);
3274 mask_vec = build_vector (mask_type, mask_elts);
3275
3276 if (need_next_vector)
ebfd146a
IR
3277 {
3278 first_vec_index = second_vec_index;
3279 second_vec_index = vec_index;
3280 }
3281
502f0263 3282 vect_create_mask_and_perm (stmt,
faf63e39 3283 mask_vec, first_vec_index, second_vec_index,
2635892a 3284 gsi, node, vectype, dr_chain,
faf63e39 3285 ncopies, vect_stmts_counter++);
ebfd146a 3286 }
b8698a0f
L
3287 }
3288 }
3289 }
3290 }
ebfd146a 3291
ebfd146a
IR
3292 return true;
3293}
3294
3295
3296
3297/* Vectorize SLP instance tree in postorder. */
3298
3299static bool
3300vect_schedule_slp_instance (slp_tree node, slp_instance instance,
a70d6342 3301 unsigned int vectorization_factor)
ebfd146a
IR
3302{
3303 gimple stmt;
0d0293ac 3304 bool grouped_store, is_store;
ebfd146a
IR
3305 gimple_stmt_iterator si;
3306 stmt_vec_info stmt_info;
3307 unsigned int vec_stmts_size, nunits, group_size;
3308 tree vectype;
3309 int i;
d755c7ef 3310 slp_tree child;
ebfd146a
IR
3311
3312 if (!node)
3313 return false;
3314
9771b263 3315 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3316 vect_schedule_slp_instance (child, instance, vectorization_factor);
b8698a0f 3317
9771b263 3318 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
ebfd146a
IR
3319 stmt_info = vinfo_for_stmt (stmt);
3320
3321 /* VECTYPE is the type of the destination. */
b690cc0f 3322 vectype = STMT_VINFO_VECTYPE (stmt_info);
ebfd146a
IR
3323 nunits = (unsigned int) TYPE_VECTOR_SUBPARTS (vectype);
3324 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3325
3326 /* For each SLP instance calculate number of vector stmts to be created
ff802fa1 3327 for the scalar stmts in each node of the SLP tree. Number of vector
ebfd146a
IR
3328 elements in one vector iteration is the number of scalar elements in
3329 one scalar iteration (GROUP_SIZE) multiplied by VF divided by vector
14a61437
RB
3330 size.
3331 Unless this is a SLP reduction in which case the number of vector
3332 stmts is equal to the number of vector stmts of the children. */
3333 if (GROUP_FIRST_ELEMENT (stmt_info)
3334 && !STMT_VINFO_GROUPED_ACCESS (stmt_info))
3335 vec_stmts_size = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
3336 else
3337 vec_stmts_size = (vectorization_factor * group_size) / nunits;
ebfd146a 3338
9771b263 3339 if (!SLP_TREE_VEC_STMTS (node).exists ())
ebfd146a 3340 {
9771b263 3341 SLP_TREE_VEC_STMTS (node).create (vec_stmts_size);
ebfd146a
IR
3342 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = vec_stmts_size;
3343 }
3344
73fbfcad 3345 if (dump_enabled_p ())
ebfd146a 3346 {
78c60e3d
SS
3347 dump_printf_loc (MSG_NOTE,vect_location,
3348 "------>vectorizing SLP node starting from: ");
3349 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 3350 dump_printf (MSG_NOTE, "\n");
b8698a0f 3351 }
ebfd146a 3352
2e8ab70c
RB
3353 /* Vectorized stmts go before the last scalar stmt which is where
3354 all uses are ready. */
3355 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node));
e4a707c4 3356
b010117a
IR
3357 /* Mark the first element of the reduction chain as reduction to properly
3358 transform the node. In the analysis phase only the last element of the
3359 chain is marked as reduction. */
0d0293ac 3360 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info)
b010117a
IR
3361 && GROUP_FIRST_ELEMENT (stmt_info) == stmt)
3362 {
3363 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3364 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3365 }
3366
6876e5bc
RB
3367 /* Handle two-operation SLP nodes by vectorizing the group with
3368 both operations and then performing a merge. */
3369 if (SLP_TREE_TWO_OPERATORS (node))
3370 {
3371 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3372 enum tree_code ocode;
3373 gimple ostmt;
3374 unsigned char *mask = XALLOCAVEC (unsigned char, group_size);
3375 bool allsame = true;
3376 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt)
3377 if (gimple_assign_rhs_code (ostmt) != code0)
3378 {
3379 mask[i] = 1;
3380 allsame = false;
3381 ocode = gimple_assign_rhs_code (ostmt);
3382 }
3383 else
3384 mask[i] = 0;
3385 if (!allsame)
3386 {
3387 vec<gimple> v0;
3388 vec<gimple> v1;
3389 unsigned j;
3390 tree tmask = NULL_TREE;
3391 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3392 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3393 SLP_TREE_VEC_STMTS (node).truncate (0);
3394 gimple_assign_set_rhs_code (stmt, ocode);
3395 vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
3396 gimple_assign_set_rhs_code (stmt, code0);
3397 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3398 SLP_TREE_VEC_STMTS (node).truncate (0);
3399 tree meltype = build_nonstandard_integer_type
3400 (GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (vectype))), 1);
3401 tree mvectype = get_same_sized_vectype (meltype, vectype);
3402 unsigned k = 0, l;
3403 for (j = 0; j < v0.length (); ++j)
3404 {
3405 tree *melts = XALLOCAVEC (tree, TYPE_VECTOR_SUBPARTS (vectype));
3406 for (l = 0; l < TYPE_VECTOR_SUBPARTS (vectype); ++l)
3407 {
1ece8d4c 3408 if (k >= group_size)
6876e5bc
RB
3409 k = 0;
3410 melts[l] = build_int_cst
3411 (meltype, mask[k++] * TYPE_VECTOR_SUBPARTS (vectype) + l);
3412 }
3413 tmask = build_vector (mvectype, melts);
3414
3415 /* ??? Not all targets support a VEC_PERM_EXPR with a
3416 constant mask that would translate to a vec_merge RTX
3417 (with their vec_perm_const_ok). We can either not
3418 vectorize in that case or let veclower do its job.
3419 Unfortunately that isn't too great and at least for
3420 plus/minus we'd eventually like to match targets
3421 vector addsub instructions. */
3422 gimple vstmt;
3423 vstmt = gimple_build_assign (make_ssa_name (vectype),
3424 VEC_PERM_EXPR,
3425 gimple_assign_lhs (v0[j]),
3426 gimple_assign_lhs (v1[j]), tmask);
3427 vect_finish_stmt_generation (stmt, vstmt, &si);
3428 SLP_TREE_VEC_STMTS (node).quick_push (vstmt);
3429 }
3430 v0.release ();
3431 v1.release ();
3432 return false;
3433 }
3434 }
0d0293ac 3435 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance);
b5aeb3bb 3436 return is_store;
ebfd146a
IR
3437}
3438
dd34c087
JJ
3439/* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3440 For loop vectorization this is done in vectorizable_call, but for SLP
3441 it needs to be deferred until end of vect_schedule_slp, because multiple
3442 SLP instances may refer to the same scalar stmt. */
3443
3444static void
3445vect_remove_slp_scalar_calls (slp_tree node)
3446{
3447 gimple stmt, new_stmt;
3448 gimple_stmt_iterator gsi;
3449 int i;
d755c7ef 3450 slp_tree child;
dd34c087
JJ
3451 tree lhs;
3452 stmt_vec_info stmt_info;
3453
3454 if (!node)
3455 return;
3456
9771b263 3457 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
d755c7ef 3458 vect_remove_slp_scalar_calls (child);
dd34c087 3459
9771b263 3460 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt)
dd34c087
JJ
3461 {
3462 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL)
3463 continue;
3464 stmt_info = vinfo_for_stmt (stmt);
3465 if (stmt_info == NULL
3466 || is_pattern_stmt_p (stmt_info)
3467 || !PURE_SLP_STMT (stmt_info))
3468 continue;
3469 lhs = gimple_call_lhs (stmt);
3470 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3471 set_vinfo_for_stmt (new_stmt, stmt_info);
3472 set_vinfo_for_stmt (stmt, NULL);
3473 STMT_VINFO_STMT (stmt_info) = new_stmt;
3474 gsi = gsi_for_stmt (stmt);
3475 gsi_replace (&gsi, new_stmt, false);
3476 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3477 }
3478}
ebfd146a 3479
ff802fa1
IR
3480/* Generate vector code for all SLP instances in the loop/basic block. */
3481
ebfd146a 3482bool
a70d6342 3483vect_schedule_slp (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
ebfd146a 3484{
9771b263 3485 vec<slp_instance> slp_instances;
ebfd146a 3486 slp_instance instance;
01d8bf07 3487 unsigned int i, vf;
ebfd146a
IR
3488 bool is_store = false;
3489
a70d6342
IR
3490 if (loop_vinfo)
3491 {
3492 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
3493 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
b8698a0f 3494 }
a70d6342
IR
3495 else
3496 {
3497 slp_instances = BB_VINFO_SLP_INSTANCES (bb_vinfo);
3498 vf = 1;
b8698a0f 3499 }
a70d6342 3500
9771b263 3501 FOR_EACH_VEC_ELT (slp_instances, i, instance)
ebfd146a
IR
3502 {
3503 /* Schedule the tree of INSTANCE. */
3504 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
a70d6342 3505 instance, vf);
73fbfcad 3506 if (dump_enabled_p ())
78c60e3d 3507 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3508 "vectorizing stmts using SLP.\n");
ebfd146a
IR
3509 }
3510
9771b263 3511 FOR_EACH_VEC_ELT (slp_instances, i, instance)
b5aeb3bb
IR
3512 {
3513 slp_tree root = SLP_INSTANCE_TREE (instance);
3514 gimple store;
3515 unsigned int j;
3516 gimple_stmt_iterator gsi;
3517
c40eced0
RB
3518 /* Remove scalar call stmts. Do not do this for basic-block
3519 vectorization as not all uses may be vectorized.
3520 ??? Why should this be necessary? DCE should be able to
3521 remove the stmts itself.
3522 ??? For BB vectorization we can as well remove scalar
3523 stmts starting from the SLP tree root if they have no
3524 uses. */
3525 if (loop_vinfo)
3526 vect_remove_slp_scalar_calls (root);
dd34c087 3527
9771b263 3528 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store)
b5aeb3bb
IR
3529 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3530 {
3531 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store)))
3532 break;
3533
a024e70e
IR
3534 if (is_pattern_stmt_p (vinfo_for_stmt (store)))
3535 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store));
b5aeb3bb
IR
3536 /* Free the attached stmt_vec_info and remove the stmt. */
3537 gsi = gsi_for_stmt (store);
3d3f2249 3538 unlink_stmt_vdef (store);
b5aeb3bb 3539 gsi_remove (&gsi, true);
3d3f2249 3540 release_defs (store);
b5aeb3bb
IR
3541 free_stmt_vec_info (store);
3542 }
3543 }
3544
ebfd146a
IR
3545 return is_store;
3546}
a70d6342
IR
3547
3548
3549/* Vectorize the basic block. */
3550
3551void
3552vect_slp_transform_bb (basic_block bb)
3553{
3554 bb_vec_info bb_vinfo = vec_info_for_bb (bb);
3555 gimple_stmt_iterator si;
3556
3557 gcc_assert (bb_vinfo);
3558
73fbfcad 3559 if (dump_enabled_p ())
78c60e3d 3560 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB\n");
a70d6342
IR
3561
3562 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3563 {
3564 gimple stmt = gsi_stmt (si);
3565 stmt_vec_info stmt_info;
3566
73fbfcad 3567 if (dump_enabled_p ())
a70d6342 3568 {
78c60e3d
SS
3569 dump_printf_loc (MSG_NOTE, vect_location,
3570 "------>SLPing statement: ");
3571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
e645e942 3572 dump_printf (MSG_NOTE, "\n");
a70d6342
IR
3573 }
3574
3575 stmt_info = vinfo_for_stmt (stmt);
3576 gcc_assert (stmt_info);
3577
3578 /* Schedule all the SLP instances when the first SLP stmt is reached. */
3579 if (STMT_SLP_TYPE (stmt_info))
3580 {
3581 vect_schedule_slp (NULL, bb_vinfo);
3582 break;
3583 }
3584 }
3585
73fbfcad 3586 if (dump_enabled_p ())
5d318fd4 3587 dump_printf_loc (MSG_NOTE, vect_location,
ccb3ad87 3588 "BASIC BLOCK VECTORIZED\n");
a70d6342 3589
12aaf609
IR
3590 destroy_bb_vec_info (bb_vinfo);
3591}