]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
[17/46] Make LOOP_VINFO_REDUCTIONS an auto_vec<stmt_vec_info>
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
85ec4feb 2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
957060b5 25#include "rtl.h"
20f06221 26#include "tree.h"
c7131fb2 27#include "gimple.h"
c7131fb2 28#include "ssa.h"
957060b5
AM
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
40e23961 33#include "fold-const.h"
d8a2d370 34#include "stor-layout.h"
2fb9a547 35#include "tree-eh.h"
45b0be94 36#include "gimplify.h"
5be5c238 37#include "gimple-iterator.h"
20f06221 38#include "cfgloop.h"
20f06221 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
9b2b7279 41#include "builtins.h"
b4e5bc47 42#include "internal-fn.h"
7a31e5ef 43#include "case-cfn-macros.h"
848bb6fc
JJ
44#include "fold-const-call.h"
45#include "attribs.h"
46#include "cgraph.h"
47#include "omp-simd-clone.h"
8f76f377 48#include "predict.h"
20f06221 49
370c2ebe
RS
50/* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
52
53static bool
54vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
55{
56 value_range_type vr_type = get_range_info (var, min_value, max_value);
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
61 {
62 if (dump_enabled_p ())
63 {
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
70 }
71 return true;
72 }
73 else
74 {
75 if (dump_enabled_p ())
76 {
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
79 }
80 return false;
81 }
82}
83
49d8df1b
RS
84/* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
86
87static void
88vect_pattern_detected (const char *name, gimple *stmt)
89{
90 if (dump_enabled_p ())
91 {
92 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: ", name);
93 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
94 }
95}
96
10681ce8
RS
97/* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
98 return the pattern statement's stmt_vec_info. Set its vector type to
99 VECTYPE if it doesn't have one already. */
41949de9 100
10681ce8 101static stmt_vec_info
41949de9
RS
102vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
103 tree vectype)
104{
6585ff8f
RS
105 vec_info *vinfo = orig_stmt_info->vinfo;
106 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
dbe1b846 107 if (pattern_stmt_info == NULL_STMT_VEC_INFO)
4fbeb363 108 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
41949de9
RS
109 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
110
10681ce8 111 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
41949de9
RS
112 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
113 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
114 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
115 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
10681ce8 116 return pattern_stmt_info;
41949de9
RS
117}
118
119/* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
120 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
121 have one already. */
122
123static void
124vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
125 tree vectype)
126{
127 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
10681ce8
RS
128 STMT_VINFO_RELATED_STMT (orig_stmt_info)
129 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
41949de9
RS
130}
131
00347934
RS
132/* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
133 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
134 be different from the vector type of the final pattern statement. */
135
083481d8 136static inline void
00347934
RS
137append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
138 tree vectype = NULL_TREE)
083481d8 139{
00347934
RS
140 vec_info *vinfo = stmt_info->vinfo;
141 if (vectype)
142 {
4fbeb363 143 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
00347934
RS
144 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
145 }
a1a6c5b2 146 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
00347934 147 new_stmt);
083481d8
JJ
148}
149
3330053e
RS
150/* The caller wants to perform new operations on vect_external variable
151 VAR, so that the result of the operations would also be vect_external.
152 Return the edge on which the operations can be performed, if one exists.
153 Return null if the operations should instead be treated as part of
154 the pattern that needs them. */
155
156static edge
157vect_get_external_def_edge (vec_info *vinfo, tree var)
158{
159 edge e = NULL;
160 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
161 {
162 e = loop_preheader_edge (loop_vinfo->loop);
163 if (!SSA_NAME_IS_DEFAULT_DEF (var))
164 {
165 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
166 if (bb == NULL
167 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
168 e = NULL;
169 }
170 }
171 return e;
172}
173
1cbfeccc
RS
174/* Return true if the target supports a vector version of CODE,
175 where CODE is known to map to a direct optab. ITYPE specifies
176 the type of (some of) the scalar inputs and OTYPE specifies the
177 type of the scalar result.
178
179 If CODE allows the inputs and outputs to have different type
180 (such as for WIDEN_SUM_EXPR), it is the input mode rather
181 than the output mode that determines the appropriate target pattern.
182 Operand 0 of the target pattern then specifies the mode that the output
183 must have.
184
185 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
186 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
187 is nonnull. */
188
189static bool
190vect_supportable_direct_optab_p (tree otype, tree_code code,
191 tree itype, tree *vecotype_out,
192 tree *vecitype_out = NULL)
193{
194 tree vecitype = get_vectype_for_scalar_type (itype);
195 if (!vecitype)
196 return false;
197
198 tree vecotype = get_vectype_for_scalar_type (otype);
199 if (!vecotype)
200 return false;
201
202 optab optab = optab_for_tree_code (code, vecitype, optab_default);
203 if (!optab)
204 return false;
205
206 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
207 if (icode == CODE_FOR_nothing
208 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
209 return false;
210
211 *vecotype_out = vecotype;
212 if (vecitype_out)
213 *vecitype_out = vecitype;
214 return true;
215}
216
00347934
RS
217/* Round bit precision PRECISION up to a full element. */
218
219static unsigned int
220vect_element_precision (unsigned int precision)
221{
222 precision = 1 << ceil_log2 (precision);
223 return MAX (precision, BITS_PER_UNIT);
224}
225
25927307
RS
226/* If OP is defined by a statement that's being considered for vectorization,
227 return information about that statement, otherwise return NULL. */
228
229static stmt_vec_info
230vect_get_internal_def (vec_info *vinfo, tree op)
231{
c98d0595
RS
232 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
233 if (def_stmt_info
234 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
235 return def_stmt_info;
236 return NULL;
25927307
RS
237}
238
bc4fb355 239/* Check whether NAME, an ssa-name used in USE_STMT,
79d652a5 240 is a result of a type promotion, such that:
20f06221 241 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
242 If CHECK_SIGN is TRUE, check that either both types are signed or both are
243 unsigned. */
20f06221
DN
244
245static bool
355fe088
TS
246type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
247 tree *orig_type, gimple **def_stmt, bool *promotion)
20f06221 248{
20f06221 249 stmt_vec_info stmt_vinfo;
20f06221
DN
250 tree type = TREE_TYPE (name);
251 tree oprnd0;
252 enum vect_def_type dt;
20f06221
DN
253
254 stmt_vinfo = vinfo_for_stmt (use_stmt);
fef96d8e
RS
255 stmt_vec_info def_stmt_info;
256 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
257 def_stmt))
20f06221
DN
258 return false;
259
8644a673
IR
260 if (dt != vect_internal_def
261 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
262 return false;
263
bc4fb355 264 if (!*def_stmt)
20f06221
DN
265 return false;
266
726a989a 267 if (!is_gimple_assign (*def_stmt))
20f06221
DN
268 return false;
269
bc4fb355 270 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
271 return false;
272
726a989a 273 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 274
bc4fb355
IR
275 *orig_type = TREE_TYPE (oprnd0);
276 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
277 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
278 return false;
279
280 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
281 *promotion = true;
bc4fb355 282 else
79d652a5 283 *promotion = false;
20f06221 284
894dd753 285 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
20f06221
DN
286 return false;
287
20f06221
DN
288 return true;
289}
290
00347934
RS
291/* Holds information about an input operand after some sign changes
292 and type promotions have been peeled away. */
293struct vect_unpromoted_value {
294 vect_unpromoted_value ();
295
296 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
297
298 /* The value obtained after peeling away zero or more casts. */
299 tree op;
300
301 /* The type of OP. */
302 tree type;
303
304 /* The definition type of OP. */
305 vect_def_type dt;
306
307 /* If OP is the result of peeling at least one cast, and if the cast
308 of OP itself is a vectorizable statement, CASTER identifies that
309 statement, otherwise it is null. */
310 stmt_vec_info caster;
311};
312
313inline vect_unpromoted_value::vect_unpromoted_value ()
314 : op (NULL_TREE),
315 type (NULL_TREE),
316 dt (vect_uninitialized_def),
317 caster (NULL)
318{
319}
320
321/* Set the operand to OP_IN, its definition type to DT_IN, and the
322 statement that casts it to CASTER_IN. */
323
324inline void
325vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
326 stmt_vec_info caster_in)
327{
328 op = op_in;
329 type = TREE_TYPE (op);
330 dt = dt_in;
331 caster = caster_in;
332}
333
334/* If OP is a vectorizable SSA name, strip a sequence of integer conversions
335 to reach some vectorizable inner operand OP', continuing as long as it
336 is possible to convert OP' back to OP using a possible sign change
337 followed by a possible promotion P. Return this OP', or null if OP is
338 not a vectorizable SSA name. If there is a promotion P, describe its
370c2ebe
RS
339 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
340 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
341 have more than one user.
00347934
RS
342
343 A successful return means that it is possible to go from OP' to OP
344 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
345 whereas the cast from UNPROM to OP might be a promotion, a sign
346 change, or a nop.
347
348 E.g. say we have:
349
350 signed short *ptr = ...;
351 signed short C = *ptr;
352 unsigned short B = (unsigned short) C; // sign change
353 signed int A = (signed int) B; // unsigned promotion
354 ...possible other uses of A...
355 unsigned int OP = (unsigned int) A; // sign change
356
357 In this case it's possible to go directly from C to OP using:
358
359 OP = (unsigned int) (unsigned short) C;
360 +------------+ +--------------+
361 promotion sign change
362
363 so OP' would be C. The input to the promotion is B, so UNPROM
364 would describe B. */
365
366static tree
367vect_look_through_possible_promotion (vec_info *vinfo, tree op,
370c2ebe
RS
368 vect_unpromoted_value *unprom,
369 bool *single_use_p = NULL)
00347934
RS
370{
371 tree res = NULL_TREE;
372 tree op_type = TREE_TYPE (op);
373 unsigned int orig_precision = TYPE_PRECISION (op_type);
374 stmt_vec_info caster = NULL;
375 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
376 {
377 /* See whether OP is simple enough to vectorize. */
fef96d8e 378 stmt_vec_info def_stmt_info;
00347934
RS
379 gimple *def_stmt;
380 vect_def_type dt;
fef96d8e 381 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
00347934
RS
382 break;
383
384 /* If OP is the input of a demotion, skip over it to see whether
385 OP is itself the result of a promotion. If so, the combined
386 effect of the promotion and the demotion might fit the required
387 pattern, otherwise neither operation fits.
388
389 This copes with cases such as the result of an arithmetic
390 operation being truncated before being stored, and where that
391 arithmetic operation has been recognized as an over-widened one. */
392 if (TYPE_PRECISION (op_type) <= orig_precision)
393 {
394 /* Use OP as the UNPROM described above if we haven't yet
395 found a promotion, or if using the new input preserves the
396 sign of the previous promotion. */
397 if (!res
398 || TYPE_PRECISION (unprom->type) == orig_precision
399 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
400 unprom->set_op (op, dt, caster);
401 /* Stop if we've already seen a promotion and if this
402 conversion does more than change the sign. */
403 else if (TYPE_PRECISION (op_type)
404 != TYPE_PRECISION (unprom->type))
405 break;
406
407 /* The sequence now extends to OP. */
408 res = op;
409 }
410
411 /* See whether OP is defined by a cast. Record it as CASTER if
412 the cast is potentially vectorizable. */
413 if (!def_stmt)
414 break;
fef96d8e
RS
415 caster = def_stmt_info;
416
417 /* Ignore pattern statements, since we don't link uses for them. */
418 if (caster
419 && single_use_p
420 && !STMT_VINFO_RELATED_STMT (caster)
421 && !has_single_use (res))
422 *single_use_p = false;
423
00347934
RS
424 gassign *assign = dyn_cast <gassign *> (def_stmt);
425 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
426 break;
427
428 /* Continue with the input to the cast. */
429 op = gimple_assign_rhs1 (def_stmt);
430 op_type = TREE_TYPE (op);
431 }
432 return res;
433}
434
435/* OP is an integer operand to an operation that returns TYPE, and we
436 want to treat the operation as a widening one. So far we can treat
437 it as widening from *COMMON_TYPE.
438
439 Return true if OP is suitable for such a widening operation,
440 either widening from *COMMON_TYPE or from some supertype of it.
441 Update *COMMON_TYPE to the supertype in the latter case.
442
443 SHIFT_P is true if OP is a shift amount. */
444
445static bool
446vect_joust_widened_integer (tree type, bool shift_p, tree op,
447 tree *common_type)
448{
449 /* Calculate the minimum precision required by OP, without changing
450 the sign of either operand. */
451 unsigned int precision;
452 if (shift_p)
453 {
454 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
455 return false;
456 precision = TREE_INT_CST_LOW (op);
457 }
458 else
459 {
460 precision = wi::min_precision (wi::to_widest (op),
461 TYPE_SIGN (*common_type));
462 if (precision * 2 > TYPE_PRECISION (type))
463 return false;
464 }
465
466 /* If OP requires a wider type, switch to that type. The checks
467 above ensure that this is still narrower than the result. */
468 precision = vect_element_precision (precision);
469 if (TYPE_PRECISION (*common_type) < precision)
470 *common_type = build_nonstandard_integer_type
471 (precision, TYPE_UNSIGNED (*common_type));
472 return true;
473}
474
475/* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
477
478static bool
479vect_joust_widened_type (tree type, tree new_type, tree *common_type)
480{
481 if (types_compatible_p (*common_type, new_type))
482 return true;
483
484 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
485 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
486 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
487 return true;
488
489 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
490 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
491 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
492 {
493 *common_type = new_type;
494 return true;
495 }
496
497 /* We have mismatched signs, with the signed type being
498 no wider than the unsigned type. In this case we need
499 a wider signed type. */
500 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
501 TYPE_PRECISION (new_type));
502 precision *= 2;
503 if (precision * 2 > TYPE_PRECISION (type))
504 return false;
505
506 *common_type = build_nonstandard_integer_type (precision, false);
507 return true;
508}
509
510/* Check whether STMT_INFO can be viewed as a tree of integer operations
511 in which each node either performs CODE or WIDENED_CODE, and where
512 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
513 specifies the maximum number of leaf operands. SHIFT_P says whether
514 CODE and WIDENED_CODE are some sort of shift.
515
516 If STMT_INFO is such a tree, return the number of leaf operands
517 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
518 to a type that (a) is narrower than the result of STMT_INFO and
519 (b) can hold all leaf operand values.
520
521 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
522 exists. */
523
524static unsigned int
525vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
526 tree_code widened_code, bool shift_p,
527 unsigned int max_nops,
528 vect_unpromoted_value *unprom, tree *common_type)
529{
530 /* Check for an integer operation with the right code. */
c98d0595 531 vec_info *vinfo = stmt_info->vinfo;
00347934
RS
532 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
533 if (!assign)
534 return 0;
535
536 tree_code rhs_code = gimple_assign_rhs_code (assign);
537 if (rhs_code != code && rhs_code != widened_code)
538 return 0;
539
540 tree type = gimple_expr_type (assign);
541 if (!INTEGRAL_TYPE_P (type))
542 return 0;
543
544 /* Assume that both operands will be leaf operands. */
545 max_nops -= 2;
546
547 /* Check the operands. */
548 unsigned int next_op = 0;
549 for (unsigned int i = 0; i < 2; ++i)
550 {
551 vect_unpromoted_value *this_unprom = &unprom[next_op];
552 unsigned int nops = 1;
553 tree op = gimple_op (assign, i + 1);
554 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
555 {
556 /* We already have a common type from earlier operands.
557 Update it to account for OP. */
558 this_unprom->set_op (op, vect_constant_def);
559 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
560 return 0;
561 }
562 else
563 {
564 /* Only allow shifts by constants. */
565 if (shift_p && i == 1)
566 return 0;
567
568 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
569 this_unprom))
570 return 0;
571
572 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
573 {
574 /* The operand isn't widened. If STMT_INFO has the code
575 for an unwidened operation, recursively check whether
576 this operand is a node of the tree. */
577 if (rhs_code != code
578 || max_nops == 0
579 || this_unprom->dt != vect_internal_def)
580 return 0;
581
582 /* Give back the leaf slot allocated above now that we're
583 not treating this as a leaf operand. */
584 max_nops += 1;
585
586 /* Recursively process the definition of the operand. */
587 stmt_vec_info def_stmt_info
c98d0595 588 = vinfo->lookup_def (this_unprom->op);
00347934
RS
589 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
590 shift_p, max_nops, this_unprom,
591 common_type);
592 if (nops == 0)
593 return 0;
594
595 max_nops -= nops;
596 }
597 else
598 {
599 /* Make sure that the operand is narrower than the result. */
600 if (TYPE_PRECISION (this_unprom->type) * 2
601 > TYPE_PRECISION (type))
602 return 0;
603
604 /* Update COMMON_TYPE for the new operand. */
605 if (i == 0)
606 *common_type = this_unprom->type;
607 else if (!vect_joust_widened_type (type, this_unprom->type,
608 common_type))
609 return 0;
610 }
611 }
612 next_op += nops;
613 }
614 return next_op;
615}
616
726a989a
RB
617/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
618 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
619
620static tree
355fe088 621vect_recog_temp_ssa_var (tree type, gimple *stmt)
726a989a 622{
83d5977e 623 return make_temp_ssa_name (type, stmt, "patt");
726a989a 624}
20f06221 625
4ef79c96
RS
626/* STMT2_INFO describes a type conversion that could be split into STMT1
627 followed by a version of STMT2_INFO that takes NEW_RHS as its first
628 input. Try to do this using pattern statements, returning true on
629 success. */
630
631static bool
632vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
633 gimple *stmt1, tree vectype)
634{
635 if (is_pattern_stmt_p (stmt2_info))
636 {
637 /* STMT2_INFO is part of a pattern. Get the statement to which
638 the pattern is attached. */
10681ce8 639 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
4ef79c96
RS
640 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
641
642 if (dump_enabled_p ())
643 {
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "Splitting pattern statement: ");
646 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
647 }
648
649 /* Since STMT2_INFO is a pattern statement, we can change it
650 in-situ without worrying about changing the code for the
651 containing block. */
652 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
653
654 if (dump_enabled_p ())
655 {
656 dump_printf_loc (MSG_NOTE, vect_location, "into: ");
657 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
658 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
659 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
660 }
661
662 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
10681ce8 663 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
4ef79c96
RS
664 /* STMT2_INFO is the actual pattern statement. Add STMT1
665 to the end of the definition sequence. */
666 gimple_seq_add_stmt_without_update (def_seq, stmt1);
667 else
668 {
669 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
670 before it. */
671 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
672 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
673 }
674 return true;
675 }
676 else
677 {
678 /* STMT2_INFO doesn't yet have a pattern. Try to create a
679 two-statement pattern now. */
680 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
681 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
682 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
683 if (!lhs_vectype)
684 return false;
685
686 if (dump_enabled_p ())
687 {
688 dump_printf_loc (MSG_NOTE, vect_location,
689 "Splitting statement: ");
690 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt2_info->stmt, 0);
691 }
692
693 /* Add STMT1 as a singleton pattern definition sequence. */
694 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
695 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
696 gimple_seq_add_stmt_without_update (def_seq, stmt1);
697
698 /* Build the second of the two pattern statements. */
699 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
700 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
701 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
702
703 if (dump_enabled_p ())
704 {
705 dump_printf_loc (MSG_NOTE, vect_location,
706 "into pattern statements: ");
707 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
708 dump_printf_loc (MSG_NOTE, vect_location, "and: ");
709 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt2, 0);
710 }
711
712 return true;
713 }
714}
715
00347934
RS
716/* Convert UNPROM to TYPE and return the result, adding new statements
717 to STMT_INFO's pattern definition statements if no better way is
718 available. VECTYPE is the vector form of TYPE. */
719
720static tree
721vect_convert_input (stmt_vec_info stmt_info, tree type,
722 vect_unpromoted_value *unprom, tree vectype)
723{
724 /* Check for a no-op conversion. */
725 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
726 return unprom->op;
727
728 /* Allow the caller to create constant vect_unpromoted_values. */
729 if (TREE_CODE (unprom->op) == INTEGER_CST)
730 return wide_int_to_tree (type, wi::to_widest (unprom->op));
731
732 /* See if we can reuse an existing result. */
733 if (unprom->caster)
734 {
735 tree lhs = gimple_get_lhs (unprom->caster->stmt);
736 if (types_compatible_p (TREE_TYPE (lhs), type))
737 return lhs;
738 }
739
740 /* We need a new conversion statement. */
741 tree new_op = vect_recog_temp_ssa_var (type, NULL);
742 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
743
4ef79c96
RS
744 /* If the operation is the input to a vectorizable cast, try splitting
745 that cast into two, taking the required result as a mid-way point. */
746 if (unprom->caster)
747 {
748 tree lhs = gimple_get_lhs (unprom->caster->stmt);
749 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
750 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
751 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
752 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
753 return new_op;
754 }
755
3330053e
RS
756 /* If OP is an external value, see if we can insert the new statement
757 on an incoming edge. */
758 if (unprom->dt == vect_external_def)
759 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
760 {
761 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
762 gcc_assert (!new_bb);
763 return new_op;
764 }
765
00347934
RS
766 /* As a (common) last resort, add the statement to the pattern itself. */
767 append_pattern_def_seq (stmt_info, new_stmt, vectype);
768 return new_op;
769}
770
771/* Invoke vect_convert_input for N elements of UNPROM and store the
772 result in the corresponding elements of RESULT. */
773
774static void
775vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
776 tree *result, tree type, vect_unpromoted_value *unprom,
777 tree vectype)
778{
779 for (unsigned int i = 0; i < n; ++i)
780 {
781 unsigned int j;
782 for (j = 0; j < i; ++j)
783 if (unprom[j].op == unprom[i].op)
784 break;
785 if (j < i)
786 result[i] = result[j];
787 else
788 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
789 }
790}
791
792/* The caller has created a (possibly empty) sequence of pattern definition
793 statements followed by a single statement PATTERN_STMT. Cast the result
794 of this final statement to TYPE. If a new statement is needed, add
795 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
796 and return the new statement, otherwise return PATTERN_STMT as-is.
797 VECITYPE is the vector form of PATTERN_STMT's result type. */
798
799static gimple *
800vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
801 tree vecitype)
802{
803 tree lhs = gimple_get_lhs (pattern_stmt);
804 if (!types_compatible_p (type, TREE_TYPE (lhs)))
805 {
806 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
807 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
808 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
809 }
810 return pattern_stmt;
811}
812
97e52238 813/* Return true if STMT_VINFO describes a reduction for which reassociation
d99dcb77
RS
814 is allowed. If STMT_INFO is part of a group, assume that it's part of
815 a reduction chain and optimistically assume that all statements
816 except the last allow reassociation. */
97e52238
RS
817
818static bool
819vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
820{
821 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
d99dcb77 822 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
2c53b149 823 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
97e52238
RS
824}
825
7b98e98a
RS
826/* As above, but also require it to have code CODE and to be a reduction
827 in the outermost loop. When returning true, store the operands in
828 *OP0_OUT and *OP1_OUT. */
829
830static bool
831vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
832 tree *op0_out, tree *op1_out)
833{
834 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
835 if (!loop_info)
836 return false;
837
838 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
839 if (!assign || gimple_assign_rhs_code (assign) != code)
840 return false;
841
842 /* We don't allow changing the order of the computation in the inner-loop
843 when doing outer-loop vectorization. */
844 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
845 if (loop && nested_in_vect_loop_p (loop, assign))
846 return false;
847
848 if (!vect_reassociating_reduction_p (stmt_info))
849 return false;
850
851 *op0_out = gimple_assign_rhs1 (assign);
852 *op1_out = gimple_assign_rhs2 (assign);
853 return true;
854}
855
20f06221
DN
856/* Function vect_recog_dot_prod_pattern
857
858 Try to find the following pattern:
859
860 type x_t, y_t;
861 TYPE1 prod;
862 TYPE2 sum = init;
863 loop:
864 sum_0 = phi <init, sum_1>
865 S1 x_t = ...
866 S2 y_t = ...
867 S3 x_T = (TYPE1) x_t;
868 S4 y_T = (TYPE1) y_t;
869 S5 prod = x_T * y_T;
870 [S6 prod = (TYPE2) prod; #optional]
871 S7 sum_1 = prod + sum_0;
872
b8698a0f
L
873 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
874 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 875 computation.
b8698a0f 876
20f06221
DN
877 Input:
878
ba9728b0 879 * STMT_VINFO: The stmt from which the pattern search begins. In the
51312233
IR
880 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
881 will be detected.
20f06221
DN
882
883 Output:
884
20f06221
DN
885 * TYPE_OUT: The type of the output of this pattern.
886
887 * Return value: A new stmt that will be used to replace the sequence of
888 stmts that constitute the pattern. In this case it will be:
889 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
890
891 Note: The dot-prod idiom is a widening reduction pattern that is
892 vectorized without preserving all the intermediate results. It
893 produces only N/2 (widened) results (by summing up pairs of
894 intermediate results) rather than all N results. Therefore, we
895 cannot allow this pattern when we want to get all the results and in
896 the correct order (as is the case when this computation is in an
897 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 898
355fe088 899static gimple *
ba9728b0 900vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
20f06221 901{
20f06221 902 tree oprnd0, oprnd1;
ba9728b0 903 gimple *last_stmt = stmt_vinfo->stmt;
25927307 904 vec_info *vinfo = stmt_vinfo->vinfo;
20f06221 905 tree type, half_type;
355fe088 906 gimple *pattern_stmt;
f471fe72 907 tree var;
20f06221 908
b8698a0f 909 /* Look for the following pattern
20f06221
DN
910 DX = (TYPE1) X;
911 DY = (TYPE1) Y;
b8698a0f 912 DPROD = DX * DY;
20f06221
DN
913 DDPROD = (TYPE2) DPROD;
914 sum_1 = DDPROD + sum_0;
b8698a0f 915 In which
20f06221
DN
916 - DX is double the size of X
917 - DY is double the size of Y
918 - DX, DY, DPROD all have the same type
919 - sum is the same size of DPROD or bigger
920 - sum has been recognized as a reduction variable.
921
922 This is equivalent to:
923 DPROD = X w* Y; #widen mult
924 sum_1 = DPROD w+ sum_0; #widen summation
925 or
926 DPROD = X w* Y; #widen mult
927 sum_1 = DPROD + sum_0; #summation
928 */
929
930 /* Starting from LAST_STMT, follow the defs of its uses in search
931 of the above pattern. */
932
7b98e98a
RS
933 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
934 &oprnd0, &oprnd1))
1f786170
RS
935 return NULL;
936
7b98e98a 937 type = gimple_expr_type (last_stmt);
1f786170 938
00347934
RS
939 vect_unpromoted_value unprom_mult;
940 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
20f06221 941
51312233 942 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
943 we know that oprnd1 is the reduction variable (defined by a loop-header
944 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
945 Left to check that oprnd0 is defined by a (widen_)mult_expr */
00347934 946 if (!oprnd0)
ba02d3bc 947 return NULL;
20f06221 948
25927307
RS
949 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
950 if (!mult_vinfo)
3cb35c12
CF
951 return NULL;
952
b8698a0f 953 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 954 inside the loop (in case we are analyzing an outer-loop). */
00347934
RS
955 vect_unpromoted_value unprom0[2];
956 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
957 false, 2, unprom0, &half_type))
b8698a0f 958 return NULL;
0f8c840c 959
00347934
RS
960 /* If there are two widening operations, make sure they agree on
961 the sign of the extension. */
962 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
963 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
964 return NULL;
20f06221 965
49d8df1b
RS
966 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
967
00347934 968 tree half_vectype;
1cbfeccc 969 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
00347934 970 type_out, &half_vectype))
1cbfeccc 971 return NULL;
b8698a0f 972
00347934 973 /* Get the inputs in the appropriate types. */
00347934
RS
974 tree mult_oprnd[2];
975 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
976 unprom0, half_vectype);
977
726a989a 978 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 979 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
00347934 980 mult_oprnd[0], mult_oprnd[1], oprnd1);
b8698a0f 981
726a989a 982 return pattern_stmt;
20f06221 983}
b8698a0f 984
51312233 985
79d652a5
CH
986/* Function vect_recog_sad_pattern
987
988 Try to find the following Sum of Absolute Difference (SAD) pattern:
989
990 type x_t, y_t;
991 signed TYPE1 diff, abs_diff;
992 TYPE2 sum = init;
993 loop:
994 sum_0 = phi <init, sum_1>
995 S1 x_t = ...
996 S2 y_t = ...
997 S3 x_T = (TYPE1) x_t;
998 S4 y_T = (TYPE1) y_t;
999 S5 diff = x_T - y_T;
1000 S6 abs_diff = ABS_EXPR <diff>;
1001 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1002 S8 sum_1 = abs_diff + sum_0;
1003
1004 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1005 same size of 'TYPE1' or bigger. This is a special case of a reduction
1006 computation.
1007
1008 Input:
1009
ba9728b0 1010 * STMT_VINFO: The stmt from which the pattern search begins. In the
79d652a5
CH
1011 example, when this function is called with S8, the pattern
1012 {S3,S4,S5,S6,S7,S8} will be detected.
1013
1014 Output:
1015
79d652a5
CH
1016 * TYPE_OUT: The type of the output of this pattern.
1017
1018 * Return value: A new stmt that will be used to replace the sequence of
1019 stmts that constitute the pattern. In this case it will be:
1020 SAD_EXPR <x_t, y_t, sum_0>
1021 */
1022
355fe088 1023static gimple *
ba9728b0 1024vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
79d652a5 1025{
ba9728b0 1026 gimple *last_stmt = stmt_vinfo->stmt;
25927307 1027 vec_info *vinfo = stmt_vinfo->vinfo;
79d652a5 1028 tree half_type;
79d652a5 1029
79d652a5
CH
1030 /* Look for the following pattern
1031 DX = (TYPE1) X;
1032 DY = (TYPE1) Y;
1033 DDIFF = DX - DY;
1034 DAD = ABS_EXPR <DDIFF>;
1035 DDPROD = (TYPE2) DPROD;
1036 sum_1 = DAD + sum_0;
1037 In which
1038 - DX is at least double the size of X
1039 - DY is at least double the size of Y
1040 - DX, DY, DDIFF, DAD all have the same type
1041 - sum is the same size of DAD or bigger
1042 - sum has been recognized as a reduction variable.
1043
1044 This is equivalent to:
1045 DDIFF = X w- Y; #widen sub
1046 DAD = ABS_EXPR <DDIFF>;
1047 sum_1 = DAD w+ sum_0; #widen summation
1048 or
1049 DDIFF = X w- Y; #widen sub
1050 DAD = ABS_EXPR <DDIFF>;
1051 sum_1 = DAD + sum_0; #summation
1052 */
1053
1054 /* Starting from LAST_STMT, follow the defs of its uses in search
1055 of the above pattern. */
1056
7b98e98a
RS
1057 tree plus_oprnd0, plus_oprnd1;
1058 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1059 &plus_oprnd0, &plus_oprnd1))
1f786170 1060 return NULL;
79d652a5 1061
7b98e98a 1062 tree sum_type = gimple_expr_type (last_stmt);
1f786170 1063
00347934
RS
1064 /* Any non-truncating sequence of conversions is OK here, since
1065 with a successful match, the result of the ABS(U) is known to fit
1066 within the nonnegative range of the result type. (It cannot be the
1067 negative of the minimum signed value due to the range of the widening
1068 MINUS_EXPR.) */
1069 vect_unpromoted_value unprom_abs;
1070 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1071 &unprom_abs);
79d652a5
CH
1072
1073 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1074 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1075 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1076 Then check that plus_oprnd0 is defined by an abs_expr. */
1077
00347934 1078 if (!plus_oprnd0)
79d652a5
CH
1079 return NULL;
1080
25927307
RS
1081 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1082 if (!abs_stmt_vinfo)
79d652a5
CH
1083 return NULL;
1084
1085 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1086 inside the loop (in case we are analyzing an outer-loop). */
25927307
RS
1087 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1088 if (!abs_stmt
1089 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1090 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
79d652a5
CH
1091 return NULL;
1092
1093 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
00347934 1094 tree abs_type = TREE_TYPE (abs_oprnd);
79d652a5
CH
1095 if (TYPE_UNSIGNED (abs_type))
1096 return NULL;
1097
00347934
RS
1098 /* Peel off conversions from the ABS input. This can involve sign
1099 changes (e.g. from an unsigned subtraction to a signed ABS input)
1100 or signed promotion, but it can't include unsigned promotion.
1101 (Note that ABS of an unsigned promotion should have been folded
1102 away before now anyway.) */
1103 vect_unpromoted_value unprom_diff;
1104 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1105 &unprom_diff);
1106 if (!abs_oprnd)
1107 return NULL;
1108 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1109 && TYPE_UNSIGNED (unprom_diff.type))
79d652a5
CH
1110 return NULL;
1111
00347934 1112 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
25927307
RS
1113 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1114 if (!diff_stmt_vinfo)
79d652a5
CH
1115 return NULL;
1116
1117 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1118 inside the loop (in case we are analyzing an outer-loop). */
00347934
RS
1119 vect_unpromoted_value unprom[2];
1120 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1121 false, 2, unprom, &half_type))
79d652a5
CH
1122 return NULL;
1123
49d8df1b
RS
1124 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1125
00347934
RS
1126 tree half_vectype;
1127 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1128 type_out, &half_vectype))
1cbfeccc 1129 return NULL;
79d652a5 1130
00347934 1131 /* Get the inputs to the SAD_EXPR in the appropriate types. */
00347934
RS
1132 tree sad_oprnd[2];
1133 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1134 unprom, half_vectype);
1135
79d652a5 1136 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
00347934
RS
1137 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1138 sad_oprnd[1], plus_oprnd1);
79d652a5 1139
79d652a5
CH
1140 return pattern_stmt;
1141}
1142
00347934
RS
1143/* Recognize an operation that performs ORIG_CODE on widened inputs,
1144 so that it can be treated as though it had the form:
79d652a5 1145
00347934
RS
1146 A_TYPE a;
1147 B_TYPE b;
1148 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1149 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1150 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1151 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1152 | RES_TYPE res = a_extend ORIG_CODE b_extend;
36ba4aae 1153
00347934 1154 Try to replace the pattern with:
51312233 1155
00347934
RS
1156 A_TYPE a;
1157 B_TYPE b;
1158 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1159 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1160 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1161 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
51312233 1162
00347934 1163 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
51312233 1164
00347934
RS
1165 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1166 name of the pattern being matched, for dump purposes. */
20f06221 1167
355fe088 1168static gimple *
ba9728b0 1169vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
00347934
RS
1170 tree_code orig_code, tree_code wide_code,
1171 bool shift_p, const char *name)
20f06221 1172{
ba9728b0 1173 gimple *last_stmt = last_stmt_info->stmt;
89d67cca 1174
00347934
RS
1175 vect_unpromoted_value unprom[2];
1176 tree half_type;
1177 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1178 shift_p, 2, unprom, &half_type))
89d67cca
DN
1179 return NULL;
1180
00347934
RS
1181 /* Pattern detected. */
1182 vect_pattern_detected (name, last_stmt);
89d67cca 1183
00347934 1184 tree type = gimple_expr_type (last_stmt);
d367387c 1185 tree itype = type;
00347934
RS
1186 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1187 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1188 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1189 TYPE_UNSIGNED (half_type));
89d67cca
DN
1190
1191 /* Check target support */
00347934
RS
1192 tree vectype = get_vectype_for_scalar_type (half_type);
1193 tree vecitype = get_vectype_for_scalar_type (itype);
1194 enum tree_code dummy_code;
1195 int dummy_int;
1196 auto_vec<tree> dummy_vec;
03d3e953 1197 if (!vectype
d367387c 1198 || !vecitype
00347934 1199 || !supportable_widening_operation (wide_code, last_stmt,
d367387c 1200 vecitype, vectype,
a86ec597
RH
1201 &dummy_code, &dummy_code,
1202 &dummy_int, &dummy_vec))
89d67cca
DN
1203 return NULL;
1204
d367387c 1205 *type_out = get_vectype_for_scalar_type (type);
1cbfeccc
RS
1206 if (!*type_out)
1207 return NULL;
89d67cca 1208
00347934
RS
1209 tree oprnd[2];
1210 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
d367387c 1211
00347934
RS
1212 tree var = vect_recog_temp_ssa_var (itype, NULL);
1213 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1214 oprnd[0], oprnd[1]);
d367387c 1215
00347934 1216 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
20f06221
DN
1217}
1218
00347934
RS
1219/* Try to detect multiplication on widened inputs, converting MULT_EXPR
1220 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1221
1222static gimple *
ba9728b0 1223vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
00347934 1224{
ba9728b0 1225 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
00347934
RS
1226 WIDEN_MULT_EXPR, false,
1227 "vect_recog_widen_mult_pattern");
1228}
20f06221 1229
0b2229b0
RG
1230/* Function vect_recog_pow_pattern
1231
1232 Try to find the following pattern:
1233
1234 x = POW (y, N);
1235
1236 with POW being one of pow, powf, powi, powif and N being
1237 either 2 or 0.5.
1238
1239 Input:
1240
ba9728b0 1241 * STMT_VINFO: The stmt from which the pattern search begins.
0b2229b0
RG
1242
1243 Output:
1244
0b2229b0
RG
1245 * TYPE_OUT: The type of the output of this pattern.
1246
1247 * Return value: A new stmt that will be used to replace the sequence of
1248 stmts that constitute the pattern. In this case it will be:
726a989a 1249 x = x * x
0b2229b0 1250 or
726a989a 1251 x = sqrt (x)
0b2229b0
RG
1252*/
1253
355fe088 1254static gimple *
ba9728b0 1255vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
0b2229b0 1256{
ba9728b0 1257 gimple *last_stmt = stmt_vinfo->stmt;
848bb6fc 1258 tree base, exp;
355fe088 1259 gimple *stmt;
726a989a 1260 tree var;
0b2229b0 1261
51312233 1262 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1263 return NULL;
1264
7a31e5ef 1265 switch (gimple_call_combined_fn (last_stmt))
0b2229b0 1266 {
7a31e5ef
RS
1267 CASE_CFN_POW:
1268 CASE_CFN_POWI:
0b2229b0
RG
1269 break;
1270
726a989a
RB
1271 default:
1272 return NULL;
0b2229b0
RG
1273 }
1274
848bb6fc
JJ
1275 base = gimple_call_arg (last_stmt, 0);
1276 exp = gimple_call_arg (last_stmt, 1);
1277 if (TREE_CODE (exp) != REAL_CST
1278 && TREE_CODE (exp) != INTEGER_CST)
1279 {
1280 if (flag_unsafe_math_optimizations
1281 && TREE_CODE (base) == REAL_CST
1282 && !gimple_call_internal_p (last_stmt))
1283 {
1284 combined_fn log_cfn;
1285 built_in_function exp_bfn;
1286 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1287 {
1288 case BUILT_IN_POW:
1289 log_cfn = CFN_BUILT_IN_LOG;
1290 exp_bfn = BUILT_IN_EXP;
1291 break;
1292 case BUILT_IN_POWF:
1293 log_cfn = CFN_BUILT_IN_LOGF;
1294 exp_bfn = BUILT_IN_EXPF;
1295 break;
1296 case BUILT_IN_POWL:
1297 log_cfn = CFN_BUILT_IN_LOGL;
1298 exp_bfn = BUILT_IN_EXPL;
1299 break;
1300 default:
1301 return NULL;
1302 }
1303 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1304 tree exp_decl = builtin_decl_implicit (exp_bfn);
1305 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1306 does that, but if C is a power of 2, we want to use
1307 exp2 (log2 (C) * x) in the non-vectorized version, but for
1308 vectorization we don't have vectorized exp2. */
1309 if (logc
1310 && TREE_CODE (logc) == REAL_CST
1311 && exp_decl
1312 && lookup_attribute ("omp declare simd",
1313 DECL_ATTRIBUTES (exp_decl)))
1314 {
1315 cgraph_node *node = cgraph_node::get_create (exp_decl);
1316 if (node->simd_clones == NULL)
1317 {
73829f90
JJ
1318 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1319 || node->definition)
848bb6fc
JJ
1320 return NULL;
1321 expand_simd_clones (node);
1322 if (node->simd_clones == NULL)
1323 return NULL;
1324 }
1cbfeccc
RS
1325 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1326 if (!*type_out)
1327 return NULL;
848bb6fc
JJ
1328 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1329 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
9c58fb7a 1330 append_pattern_def_seq (stmt_vinfo, g);
848bb6fc
JJ
1331 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1332 g = gimple_build_call (exp_decl, 1, def);
1333 gimple_call_set_lhs (g, res);
1334 return g;
1335 }
1336 }
1337
1338 return NULL;
1339 }
1340
0b2229b0
RG
1341 /* We now have a pow or powi builtin function call with a constant
1342 exponent. */
1343
0b2229b0 1344 /* Catch squaring. */
9541ffee 1345 if ((tree_fits_shwi_p (exp)
9439e9a1 1346 && tree_to_shwi (exp) == 2)
0b2229b0 1347 || (TREE_CODE (exp) == REAL_CST
624d31fe 1348 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
c6b1b49b 1349 {
1cbfeccc
RS
1350 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1351 TREE_TYPE (base), type_out))
1352 return NULL;
726a989a
RB
1353
1354 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1355 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1356 return stmt;
c6b1b49b 1357 }
0b2229b0
RG
1358
1359 /* Catch square root. */
1360 if (TREE_CODE (exp) == REAL_CST
624d31fe 1361 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
0b2229b0 1362 {
1cbfeccc
RS
1363 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1364 if (*type_out
1365 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
d95ab70a 1366 OPTIMIZE_FOR_SPEED))
c6b1b49b 1367 {
b4e5bc47
RS
1368 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1369 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1370 gimple_call_set_lhs (stmt, var);
a844293d 1371 gimple_call_set_nothrow (stmt, true);
b4e5bc47 1372 return stmt;
c6b1b49b 1373 }
0b2229b0
RG
1374 }
1375
726a989a 1376 return NULL;
0b2229b0
RG
1377}
1378
1379
20f06221
DN
1380/* Function vect_recog_widen_sum_pattern
1381
1382 Try to find the following pattern:
1383
b8698a0f 1384 type x_t;
20f06221
DN
1385 TYPE x_T, sum = init;
1386 loop:
1387 sum_0 = phi <init, sum_1>
1388 S1 x_t = *p;
1389 S2 x_T = (TYPE) x_t;
1390 S3 sum_1 = x_T + sum_0;
1391
b8698a0f 1392 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1393 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1394 a special case of a reduction computation.
20f06221
DN
1395
1396 Input:
1397
ba9728b0 1398 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
20f06221 1399 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1400
20f06221 1401 Output:
b8698a0f 1402
20f06221
DN
1403 * TYPE_OUT: The type of the output of this pattern.
1404
1405 * Return value: A new stmt that will be used to replace the sequence of
1406 stmts that constitute the pattern. In this case it will be:
1407 WIDEN_SUM <x_t, sum_0>
d29de1bf 1408
b8698a0f 1409 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1410 vectorized without preserving all the intermediate results. It
b8698a0f
L
1411 produces only N/2 (widened) results (by summing up pairs of
1412 intermediate results) rather than all N results. Therefore, we
1413 cannot allow this pattern when we want to get all the results and in
1414 the correct order (as is the case when this computation is in an
d29de1bf 1415 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1416
355fe088 1417static gimple *
ba9728b0 1418vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
20f06221 1419{
ba9728b0 1420 gimple *last_stmt = stmt_vinfo->stmt;
20f06221 1421 tree oprnd0, oprnd1;
00347934
RS
1422 vec_info *vinfo = stmt_vinfo->vinfo;
1423 tree type;
355fe088 1424 gimple *pattern_stmt;
726a989a 1425 tree var;
20f06221 1426
20f06221
DN
1427 /* Look for the following pattern
1428 DX = (TYPE) X;
1429 sum_1 = DX + sum_0;
1430 In which DX is at least double the size of X, and sum_1 has been
1431 recognized as a reduction variable.
1432 */
1433
1434 /* Starting from LAST_STMT, follow the defs of its uses in search
1435 of the above pattern. */
1436
7b98e98a
RS
1437 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1438 &oprnd0, &oprnd1))
b308d872
RB
1439 return NULL;
1440
7b98e98a 1441 type = gimple_expr_type (last_stmt);
20f06221 1442
51312233 1443 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1444 we know that oprnd1 is the reduction variable (defined by a loop-header
1445 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1446 Left to check that oprnd0 is defined by a cast from type 'type' to type
1447 'TYPE'. */
1448
00347934
RS
1449 vect_unpromoted_value unprom0;
1450 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1451 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1cbfeccc 1452 return NULL;
20f06221 1453
49d8df1b
RS
1454 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1455
00347934 1456 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1cbfeccc
RS
1457 type_out))
1458 return NULL;
20f06221 1459
726a989a 1460 var = vect_recog_temp_ssa_var (type, NULL);
00347934 1461 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
726a989a 1462
726a989a 1463 return pattern_stmt;
20f06221
DN
1464}
1465
370c2ebe
RS
1466/* Recognize cases in which an operation is performed in one type WTYPE
1467 but could be done more efficiently in a narrower type NTYPE. For example,
1468 if we have:
20f06221 1469
370c2ebe
RS
1470 ATYPE a; // narrower than NTYPE
1471 BTYPE b; // narrower than NTYPE
1472 WTYPE aw = (WTYPE) a;
1473 WTYPE bw = (WTYPE) b;
1474 WTYPE res = aw + bw; // only uses of aw and bw
1107f3ae 1475
370c2ebe 1476 then it would be more efficient to do:
1107f3ae 1477
370c2ebe
RS
1478 NTYPE an = (NTYPE) a;
1479 NTYPE bn = (NTYPE) b;
1480 NTYPE resn = an + bn;
1481 WTYPE res = (WTYPE) resn;
1107f3ae 1482
370c2ebe 1483 Other situations include things like:
f5709183 1484
370c2ebe
RS
1485 ATYPE a; // NTYPE or narrower
1486 WTYPE aw = (WTYPE) a;
1487 WTYPE res = aw + b;
1107f3ae 1488
370c2ebe
RS
1489 when only "(NTYPE) res" is significant. In that case it's more efficient
1490 to truncate "b" and do the operation on NTYPE instead:
1107f3ae 1491
370c2ebe
RS
1492 NTYPE an = (NTYPE) a;
1493 NTYPE bn = (NTYPE) b; // truncation
1494 NTYPE resn = an + bn;
1495 WTYPE res = (WTYPE) resn;
1107f3ae 1496
370c2ebe
RS
1497 All users of "res" should then use "resn" instead, making the final
1498 statement dead (not marked as relevant). The final statement is still
1499 needed to maintain the type correctness of the IR.
1107f3ae 1500
370c2ebe
RS
1501 vect_determine_precisions has already determined the minimum
1502 precison of the operation and the minimum precision required
1503 by users of the result. */
1107f3ae 1504
370c2ebe 1505static gimple *
ba9728b0 1506vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
370c2ebe 1507{
ba9728b0 1508 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
370c2ebe
RS
1509 if (!last_stmt)
1510 return NULL;
9ef7adc0 1511
370c2ebe
RS
1512 /* See whether we have found that this operation can be done on a
1513 narrower type without changing its semantics. */
370c2ebe
RS
1514 unsigned int new_precision = last_stmt_info->operation_precision;
1515 if (!new_precision)
1516 return NULL;
1107f3ae 1517
370c2ebe
RS
1518 vec_info *vinfo = last_stmt_info->vinfo;
1519 tree lhs = gimple_assign_lhs (last_stmt);
1520 tree type = TREE_TYPE (lhs);
1521 tree_code code = gimple_assign_rhs_code (last_stmt);
1522
1523 /* Keep the first operand of a COND_EXPR as-is: only the other two
1524 operands are interesting. */
1525 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1526
1527 /* Check the operands. */
1528 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1529 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1530 unprom.quick_grow (nops);
1531 unsigned int min_precision = 0;
1532 bool single_use_p = false;
1533 for (unsigned int i = 0; i < nops; ++i)
1107f3ae 1534 {
370c2ebe
RS
1535 tree op = gimple_op (last_stmt, first_op + i);
1536 if (TREE_CODE (op) == INTEGER_CST)
1537 unprom[i].set_op (op, vect_constant_def);
1538 else if (TREE_CODE (op) == SSA_NAME)
1539 {
1540 bool op_single_use_p = true;
1541 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1542 &op_single_use_p))
1543 return NULL;
1544 /* If:
1107f3ae 1545
370c2ebe
RS
1546 (1) N bits of the result are needed;
1547 (2) all inputs are widened from M<N bits; and
1548 (3) one operand OP is a single-use SSA name
1107f3ae 1549
370c2ebe
RS
1550 we can shift the M->N widening from OP to the output
1551 without changing the number or type of extensions involved.
1552 This then reduces the number of copies of STMT_INFO.
1107f3ae 1553
370c2ebe
RS
1554 If instead of (3) more than one operand is a single-use SSA name,
1555 shifting the extension to the output is even more of a win.
1107f3ae 1556
370c2ebe 1557 If instead:
1107f3ae 1558
370c2ebe
RS
1559 (1) N bits of the result are needed;
1560 (2) one operand OP2 is widened from M2<N bits;
1561 (3) another operand OP1 is widened from M1<M2 bits; and
1562 (4) both OP1 and OP2 are single-use
1107f3ae 1563
370c2ebe 1564 the choice is between:
1107f3ae 1565
370c2ebe
RS
1566 (a) truncating OP2 to M1, doing the operation on M1,
1567 and then widening the result to N
1107f3ae 1568
370c2ebe
RS
1569 (b) widening OP1 to M2, doing the operation on M2, and then
1570 widening the result to N
1107f3ae 1571
370c2ebe
RS
1572 Both shift the M2->N widening of the inputs to the output.
1573 (a) additionally shifts the M1->M2 widening to the output;
1574 it requires fewer copies of STMT_INFO but requires an extra
1575 M2->M1 truncation.
1107f3ae 1576
370c2ebe
RS
1577 Which is better will depend on the complexity and cost of
1578 STMT_INFO, which is hard to predict at this stage. However,
1579 a clear tie-breaker in favor of (b) is the fact that the
1580 truncation in (a) increases the length of the operation chain.
1107f3ae 1581
370c2ebe
RS
1582 If instead of (4) only one of OP1 or OP2 is single-use,
1583 (b) is still a win over doing the operation in N bits:
1584 it still shifts the M2->N widening on the single-use operand
1585 to the output and reduces the number of STMT_INFO copies.
1107f3ae 1586
370c2ebe
RS
1587 If neither operand is single-use then operating on fewer than
1588 N bits might lead to more extensions overall. Whether it does
1589 or not depends on global information about the vectorization
1590 region, and whether that's a good trade-off would again
1591 depend on the complexity and cost of the statements involved,
1592 as well as things like register pressure that are not normally
1593 modelled at this stage. We therefore ignore these cases
1594 and just optimize the clear single-use wins above.
1107f3ae 1595
370c2ebe
RS
1596 Thus we take the maximum precision of the unpromoted operands
1597 and record whether any operand is single-use. */
1598 if (unprom[i].dt == vect_internal_def)
1599 {
1600 min_precision = MAX (min_precision,
1601 TYPE_PRECISION (unprom[i].type));
1602 single_use_p |= op_single_use_p;
1603 }
1604 }
1107f3ae
IR
1605 }
1606
370c2ebe
RS
1607 /* Although the operation could be done in operation_precision, we have
1608 to balance that against introducing extra truncations or extensions.
1609 Calculate the minimum precision that can be handled efficiently.
1610
1611 The loop above determined that the operation could be handled
1612 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1613 extension from the inputs to the output without introducing more
1614 instructions, and would reduce the number of instructions required
1615 for STMT_INFO itself.
1616
1617 vect_determine_precisions has also determined that the result only
1618 needs min_output_precision bits. Truncating by a factor of N times
1619 requires a tree of N - 1 instructions, so if TYPE is N times wider
1620 than min_output_precision, doing the operation in TYPE and truncating
1621 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1622 In contrast:
1623
1624 - truncating the input to a unary operation and doing the operation
1625 in the new type requires at most N - 1 + 1 = N instructions per
1626 output vector
1627
1628 - doing the same for a binary operation requires at most
1629 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1630
1631 Both unary and binary operations require fewer instructions than
1632 this if the operands were extended from a suitable truncated form.
1633 Thus there is usually nothing to lose by doing operations in
1634 min_output_precision bits, but there can be something to gain. */
1635 if (!single_use_p)
1636 min_precision = last_stmt_info->min_output_precision;
1107f3ae 1637 else
370c2ebe
RS
1638 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1639
1640 /* Apply the minimum efficient precision we just calculated. */
1641 if (new_precision < min_precision)
1642 new_precision = min_precision;
1643 if (new_precision >= TYPE_PRECISION (type))
1644 return NULL;
1645
1646 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1647
1648 *type_out = get_vectype_for_scalar_type (type);
1649 if (!*type_out)
1650 return NULL;
1651
1652 /* We've found a viable pattern. Get the new type of the operation. */
1653 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1654 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1655
1656 /* We specifically don't check here whether the target supports the
1657 new operation, since it might be something that a later pattern
1658 wants to rewrite anyway. If targets have a minimum element size
1659 for some optabs, we should pattern-match smaller ops to larger ops
1660 where beneficial. */
1661 tree new_vectype = get_vectype_for_scalar_type (new_type);
1662 if (!new_vectype)
1663 return NULL;
1107f3ae 1664
370c2ebe
RS
1665 if (dump_enabled_p ())
1666 {
1667 dump_printf_loc (MSG_NOTE, vect_location, "demoting ");
1668 dump_generic_expr (MSG_NOTE, TDF_SLIM, type);
1669 dump_printf (MSG_NOTE, " to ");
1670 dump_generic_expr (MSG_NOTE, TDF_SLIM, new_type);
1671 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1672 }
1673
370c2ebe 1674 /* Calculate the rhs operands for an operation on NEW_TYPE. */
370c2ebe
RS
1675 tree ops[3] = {};
1676 for (unsigned int i = 1; i < first_op; ++i)
1677 ops[i - 1] = gimple_op (last_stmt, i);
1678 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1679 new_type, &unprom[0], new_vectype);
1680
1681 /* Use the operation to produce a result of type NEW_TYPE. */
1682 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1683 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1684 ops[0], ops[1], ops[2]);
1685 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1686
1687 if (dump_enabled_p ())
1688 {
1689 dump_printf_loc (MSG_NOTE, vect_location,
1690 "created pattern stmt: ");
1691 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
1692 }
1107f3ae 1693
370c2ebe
RS
1694 pattern_stmt = vect_convert_output (last_stmt_info, type,
1695 pattern_stmt, new_vectype);
1107f3ae 1696
370c2ebe 1697 return pattern_stmt;
1107f3ae
IR
1698}
1699
0267732b
RS
1700/* Recognize the patterns:
1701
1702 ATYPE a; // narrower than TYPE
1703 BTYPE b; // narrower than TYPE
1704 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1705 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1706
1707 where only the bottom half of avg is used. Try to transform them into:
1708
1709 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1710 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1711
1712 followed by:
1713
1714 TYPE avg = (TYPE) avg';
1715
1716 where NTYPE is no wider than half of TYPE. Since only the bottom half
1717 of avg is used, all or part of the cast of avg' should become redundant. */
1718
1719static gimple *
ba9728b0 1720vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
0267732b
RS
1721{
1722 /* Check for a shift right by one bit. */
ba9728b0
RS
1723 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1724 vec_info *vinfo = last_stmt_info->vinfo;
0267732b
RS
1725 if (!last_stmt
1726 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1727 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1728 return NULL;
1729
0267732b
RS
1730 /* Check that the shift result is wider than the users of the
1731 result need (i.e. that narrowing would be a natural choice). */
1732 tree lhs = gimple_assign_lhs (last_stmt);
1733 tree type = TREE_TYPE (lhs);
1734 unsigned int target_precision
1735 = vect_element_precision (last_stmt_info->min_output_precision);
1736 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1737 return NULL;
1738
1739 /* Get the definition of the shift input. */
1740 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1741 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1742 if (!plus_stmt_info)
1743 return NULL;
1744
1745 /* Check whether the shift input can be seen as a tree of additions on
1746 2 or 3 widened inputs.
1747
1748 Note that the pattern should be a win even if the result of one or
1749 more additions is reused elsewhere: if the pattern matches, we'd be
1750 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1751 internal_fn ifn = IFN_AVG_FLOOR;
1752 vect_unpromoted_value unprom[3];
1753 tree new_type;
1754 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1755 PLUS_EXPR, false, 3,
1756 unprom, &new_type);
1757 if (nops == 0)
1758 return NULL;
1759 if (nops == 3)
1760 {
1761 /* Check that one operand is 1. */
1762 unsigned int i;
1763 for (i = 0; i < 3; ++i)
1764 if (integer_onep (unprom[i].op))
1765 break;
1766 if (i == 3)
1767 return NULL;
1768 /* Throw away the 1 operand and keep the other two. */
1769 if (i < 2)
1770 unprom[i] = unprom[2];
1771 ifn = IFN_AVG_CEIL;
1772 }
1773
1774 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1775
1776 /* We know that:
1777
1778 (a) the operation can be viewed as:
1779
1780 TYPE widened0 = (TYPE) UNPROM[0];
1781 TYPE widened1 = (TYPE) UNPROM[1];
1782 TYPE tmp1 = widened0 + widened1 {+ 1};
1783 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1784
1785 (b) the first two statements are equivalent to:
1786
1787 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1788 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1789
1790 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1791 where sensible;
1792
1793 (d) all the operations can be performed correctly at twice the width of
1794 NEW_TYPE, due to the nature of the average operation; and
1795
1796 (e) users of the result of the right shift need only TARGET_PRECISION
1797 bits, where TARGET_PRECISION is no more than half of TYPE's
1798 precision.
1799
1800 Under these circumstances, the only situation in which NEW_TYPE
1801 could be narrower than TARGET_PRECISION is if widened0, widened1
1802 and an addition result are all used more than once. Thus we can
1803 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1804 as "free", whereas widening the result of the average instruction
1805 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1806 therefore better not to go narrower than TARGET_PRECISION. */
1807 if (TYPE_PRECISION (new_type) < target_precision)
1808 new_type = build_nonstandard_integer_type (target_precision,
1809 TYPE_UNSIGNED (new_type));
1810
1811 /* Check for target support. */
1812 tree new_vectype = get_vectype_for_scalar_type (new_type);
1813 if (!new_vectype
1814 || !direct_internal_fn_supported_p (ifn, new_vectype,
1815 OPTIMIZE_FOR_SPEED))
1816 return NULL;
1817
1818 /* The IR requires a valid vector type for the cast result, even though
1819 it's likely to be discarded. */
1820 *type_out = get_vectype_for_scalar_type (type);
1821 if (!*type_out)
1822 return NULL;
1823
1824 /* Generate the IFN_AVG* call. */
1825 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1826 tree new_ops[2];
1827 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1828 unprom, new_vectype);
1829 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1830 new_ops[1]);
1831 gimple_call_set_lhs (average_stmt, new_var);
1832 gimple_set_location (average_stmt, gimple_location (last_stmt));
1833
1834 if (dump_enabled_p ())
1835 {
1836 dump_printf_loc (MSG_NOTE, vect_location,
1837 "created pattern stmt: ");
1838 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, average_stmt, 0);
1839 }
1840
0267732b
RS
1841 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1842}
1843
370c2ebe
RS
1844/* Recognize cases in which the input to a cast is wider than its
1845 output, and the input is fed by a widening operation. Fold this
1846 by removing the unnecessary intermediate widening. E.g.:
1107f3ae 1847
370c2ebe
RS
1848 unsigned char a;
1849 unsigned int b = (unsigned int) a;
1850 unsigned short c = (unsigned short) b;
1107f3ae 1851
370c2ebe 1852 -->
1107f3ae 1853
370c2ebe 1854 unsigned short c = (unsigned short) a;
1107f3ae 1855
370c2ebe
RS
1856 Although this is rare in input IR, it is an expected side-effect
1857 of the over-widening pattern above.
1107f3ae 1858
370c2ebe
RS
1859 This is beneficial also for integer-to-float conversions, if the
1860 widened integer has more bits than the float, and if the unwidened
1861 input doesn't. */
1107f3ae 1862
370c2ebe 1863static gimple *
ba9728b0 1864vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
370c2ebe
RS
1865{
1866 /* Check for a cast, including an integer-to-float conversion. */
ba9728b0 1867 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
370c2ebe
RS
1868 if (!last_stmt)
1869 return NULL;
1870 tree_code code = gimple_assign_rhs_code (last_stmt);
1871 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1872 return NULL;
1107f3ae 1873
370c2ebe
RS
1874 /* Make sure that the rhs is a scalar with a natural bitsize. */
1875 tree lhs = gimple_assign_lhs (last_stmt);
1876 if (!lhs)
1877 return NULL;
1878 tree lhs_type = TREE_TYPE (lhs);
1879 scalar_mode lhs_mode;
1880 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1881 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1882 return NULL;
1107f3ae 1883
370c2ebe
RS
1884 /* Check for a narrowing operation (from a vector point of view). */
1885 tree rhs = gimple_assign_rhs1 (last_stmt);
1886 tree rhs_type = TREE_TYPE (rhs);
1887 if (!INTEGRAL_TYPE_P (rhs_type)
1888 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1889 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1890 return NULL;
82db3d43 1891
370c2ebe 1892 /* Try to find an unpromoted input. */
370c2ebe
RS
1893 vec_info *vinfo = last_stmt_info->vinfo;
1894 vect_unpromoted_value unprom;
1895 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1896 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1897 return NULL;
1cbfeccc 1898
370c2ebe
RS
1899 /* If the bits above RHS_TYPE matter, make sure that they're the
1900 same when extending from UNPROM as they are when extending from RHS. */
1901 if (!INTEGRAL_TYPE_P (lhs_type)
1902 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1903 return NULL;
1107f3ae 1904
370c2ebe
RS
1905 /* We can get the same result by casting UNPROM directly, to avoid
1906 the unnecessary widening and narrowing. */
1907 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1107f3ae 1908
370c2ebe
RS
1909 *type_out = get_vectype_for_scalar_type (lhs_type);
1910 if (!*type_out)
1107f3ae
IR
1911 return NULL;
1912
370c2ebe
RS
1913 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1914 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1915 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1107f3ae
IR
1916
1917 return pattern_stmt;
1918}
1919
00347934
RS
1920/* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1921 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
36ba4aae 1922
355fe088 1923static gimple *
ba9728b0 1924vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
36ba4aae 1925{
ba9728b0 1926 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
00347934 1927 WIDEN_LSHIFT_EXPR, true,
d379ac22 1928 "vect_recog_widen_shift_pattern");
7e9a3abb
JJ
1929}
1930
1931/* Detect a rotate pattern wouldn't be otherwise vectorized:
1932
1933 type a_t, b_t, c_t;
1934
1935 S0 a_t = b_t r<< c_t;
1936
1937 Input/Output:
1938
ba9728b0 1939 * STMT_VINFO: The stmt from which the pattern search begins,
7e9a3abb
JJ
1940 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1941 with a sequence:
1942
1943 S1 d_t = -c_t;
1944 S2 e_t = d_t & (B - 1);
1945 S3 f_t = b_t << c_t;
1946 S4 g_t = b_t >> e_t;
1947 S0 a_t = f_t | g_t;
1948
1949 where B is element bitsize of type.
1950
1951 Output:
1952
7e9a3abb
JJ
1953 * TYPE_OUT: The type of the output of this pattern.
1954
1955 * Return value: A new stmt that will be used to replace the rotate
1956 S0 stmt. */
1957
355fe088 1958static gimple *
ba9728b0 1959vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
7e9a3abb 1960{
ba9728b0 1961 gimple *last_stmt = stmt_vinfo->stmt;
7e9a3abb 1962 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
355fe088 1963 gimple *pattern_stmt, *def_stmt;
7e9a3abb 1964 enum tree_code rhs_code;
310213d4 1965 vec_info *vinfo = stmt_vinfo->vinfo;
7e9a3abb
JJ
1966 enum vect_def_type dt;
1967 optab optab1, optab2;
68119618 1968 edge ext_def = NULL;
7e9a3abb
JJ
1969
1970 if (!is_gimple_assign (last_stmt))
1971 return NULL;
1972
1973 rhs_code = gimple_assign_rhs_code (last_stmt);
1974 switch (rhs_code)
1975 {
1976 case LROTATE_EXPR:
1977 case RROTATE_EXPR:
1978 break;
1979 default:
1980 return NULL;
1981 }
1982
7e9a3abb
JJ
1983 lhs = gimple_assign_lhs (last_stmt);
1984 oprnd0 = gimple_assign_rhs1 (last_stmt);
1985 type = TREE_TYPE (oprnd0);
1986 oprnd1 = gimple_assign_rhs2 (last_stmt);
1987 if (TREE_CODE (oprnd0) != SSA_NAME
1988 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1989 || !INTEGRAL_TYPE_P (type)
1990 || !TYPE_UNSIGNED (type))
1991 return NULL;
1992
fef96d8e
RS
1993 stmt_vec_info def_stmt_info;
1994 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
7e9a3abb
JJ
1995 return NULL;
1996
1997 if (dt != vect_internal_def
1998 && dt != vect_constant_def
1999 && dt != vect_external_def)
2000 return NULL;
2001
2002 vectype = get_vectype_for_scalar_type (type);
2003 if (vectype == NULL_TREE)
2004 return NULL;
2005
2006 /* If vector/vector or vector/scalar rotate is supported by the target,
2007 don't do anything here. */
2008 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2009 if (optab1
2010 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2011 return NULL;
2012
310213d4 2013 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
7e9a3abb
JJ
2014 {
2015 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2016 if (optab2
2017 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2018 return NULL;
2019 }
2020
2021 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2022 don't do anything here either. */
2023 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2024 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2025 if (!optab1
2026 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2027 || !optab2
2028 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2029 {
310213d4 2030 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
7e9a3abb
JJ
2031 return NULL;
2032 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2033 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2034 if (!optab1
2035 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2036 || !optab2
2037 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2038 return NULL;
2039 }
2040
7e9a3abb 2041 *type_out = vectype;
7e9a3abb 2042
68119618 2043 if (dt == vect_external_def
3330053e
RS
2044 && TREE_CODE (oprnd1) == SSA_NAME)
2045 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
68119618 2046
7e9a3abb 2047 def = NULL_TREE;
7a504f33 2048 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
7e9a3abb 2049 if (TREE_CODE (oprnd1) == INTEGER_CST
7a504f33 2050 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
7e9a3abb
JJ
2051 def = oprnd1;
2052 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2053 {
2054 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7a504f33 2055 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
7e9a3abb
JJ
2056 && TYPE_PRECISION (TREE_TYPE (rhs1))
2057 == TYPE_PRECISION (type))
2058 def = rhs1;
2059 }
2060
7e9a3abb
JJ
2061 if (def == NULL_TREE)
2062 {
2063 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2064 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
68119618
JJ
2065 if (ext_def)
2066 {
2067 basic_block new_bb
2068 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2069 gcc_assert (!new_bb);
2070 }
2071 else
2072 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
2073 }
2074 stype = TREE_TYPE (def);
7a504f33 2075 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
7e9a3abb
JJ
2076
2077 if (TREE_CODE (def) == INTEGER_CST)
2078 {
cc269bb6 2079 if (!tree_fits_uhwi_p (def)
7a504f33 2080 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
7e9a3abb
JJ
2081 || integer_zerop (def))
2082 return NULL;
2083 def2 = build_int_cst (stype,
7a504f33 2084 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
7e9a3abb
JJ
2085 }
2086 else
2087 {
2088 tree vecstype = get_vectype_for_scalar_type (stype);
7e9a3abb
JJ
2089
2090 if (vecstype == NULL_TREE)
2091 return NULL;
2092 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 2093 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
2094 if (ext_def)
2095 {
2096 basic_block new_bb
2097 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2098 gcc_assert (!new_bb);
2099 }
2100 else
776bfcea 2101 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
7e9a3abb
JJ
2102
2103 def2 = vect_recog_temp_ssa_var (stype, NULL);
7a504f33 2104 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
0d0e4a03
JJ
2105 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2106 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
2107 if (ext_def)
2108 {
2109 basic_block new_bb
2110 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2111 gcc_assert (!new_bb);
2112 }
2113 else
776bfcea 2114 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
7e9a3abb
JJ
2115 }
2116
2117 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2118 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2119 ? LSHIFT_EXPR : RSHIFT_EXPR,
2120 oprnd0, def);
7e9a3abb
JJ
2121 append_pattern_def_seq (stmt_vinfo, def_stmt);
2122
2123 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2124 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2125 ? RSHIFT_EXPR : LSHIFT_EXPR,
2126 oprnd0, def2);
7e9a3abb
JJ
2127 append_pattern_def_seq (stmt_vinfo, def_stmt);
2128
2129 /* Pattern detected. */
49d8df1b 2130 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
7e9a3abb
JJ
2131
2132 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2133 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2134 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb 2135
36ba4aae
IR
2136 return pattern_stmt;
2137}
1107f3ae 2138
732a0ad3
JJ
2139/* Detect a vector by vector shift pattern that wouldn't be otherwise
2140 vectorized:
2141
2142 type a_t;
2143 TYPE b_T, res_T;
2144
2145 S1 a_t = ;
2146 S2 b_T = ;
2147 S3 res_T = b_T op a_t;
2148
2149 where type 'TYPE' is a type with different size than 'type',
2150 and op is <<, >> or rotate.
2151
2152 Also detect cases:
2153
2154 type a_t;
2155 TYPE b_T, c_T, res_T;
2156
2157 S0 c_T = ;
2158 S1 a_t = (type) c_T;
2159 S2 b_T = ;
2160 S3 res_T = b_T op a_t;
2161
2162 Input/Output:
2163
ba9728b0 2164 * STMT_VINFO: The stmt from which the pattern search begins,
732a0ad3
JJ
2165 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2166 with a shift/rotate which has same type on both operands, in the
2167 second case just b_T op c_T, in the first case with added cast
363477c0 2168 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2169
2170 Output:
2171
732a0ad3
JJ
2172 * TYPE_OUT: The type of the output of this pattern.
2173
2174 * Return value: A new stmt that will be used to replace the shift/rotate
2175 S3 stmt. */
2176
355fe088 2177static gimple *
ba9728b0
RS
2178vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2179 tree *type_out)
732a0ad3 2180{
ba9728b0 2181 gimple *last_stmt = stmt_vinfo->stmt;
732a0ad3 2182 tree oprnd0, oprnd1, lhs, var;
25927307 2183 gimple *pattern_stmt;
732a0ad3 2184 enum tree_code rhs_code;
310213d4 2185 vec_info *vinfo = stmt_vinfo->vinfo;
732a0ad3
JJ
2186
2187 if (!is_gimple_assign (last_stmt))
2188 return NULL;
2189
2190 rhs_code = gimple_assign_rhs_code (last_stmt);
2191 switch (rhs_code)
2192 {
2193 case LSHIFT_EXPR:
2194 case RSHIFT_EXPR:
2195 case LROTATE_EXPR:
2196 case RROTATE_EXPR:
2197 break;
2198 default:
2199 return NULL;
2200 }
2201
732a0ad3
JJ
2202 lhs = gimple_assign_lhs (last_stmt);
2203 oprnd0 = gimple_assign_rhs1 (last_stmt);
2204 oprnd1 = gimple_assign_rhs2 (last_stmt);
2205 if (TREE_CODE (oprnd0) != SSA_NAME
2206 || TREE_CODE (oprnd1) != SSA_NAME
2207 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2be65d9e 2208 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
732a0ad3
JJ
2209 || TYPE_PRECISION (TREE_TYPE (lhs))
2210 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2211 return NULL;
2212
25927307
RS
2213 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2214 if (!def_vinfo)
732a0ad3
JJ
2215 return NULL;
2216
1cbfeccc
RS
2217 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2218 if (*type_out == NULL_TREE)
732a0ad3
JJ
2219 return NULL;
2220
81c40241 2221 tree def = NULL_TREE;
25927307 2222 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
0f8c840c 2223 if (def_stmt && gimple_assign_cast_p (def_stmt))
732a0ad3
JJ
2224 {
2225 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2226 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2227 && TYPE_PRECISION (TREE_TYPE (rhs1))
2228 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
0179520a
JJ
2229 {
2230 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2231 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2232 def = rhs1;
2233 else
2234 {
2235 tree mask
2236 = build_low_bits_mask (TREE_TYPE (rhs1),
2237 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2238 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2239 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
776bfcea
RS
2240 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2241 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
0179520a
JJ
2242 }
2243 }
732a0ad3
JJ
2244 }
2245
2246 if (def == NULL_TREE)
2247 {
2248 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2249 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
9c58fb7a 2250 append_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2251 }
2252
2253 /* Pattern detected. */
49d8df1b 2254 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
732a0ad3
JJ
2255
2256 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2257 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2258 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2259
732a0ad3
JJ
2260 return pattern_stmt;
2261}
2262
53109ba8
KT
2263/* Return true iff the target has a vector optab implementing the operation
2264 CODE on type VECTYPE. */
47486460 2265
53109ba8
KT
2266static bool
2267target_has_vecop_for_code (tree_code code, tree vectype)
2268{
2269 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2270 return voptab
2271 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2272}
47486460 2273
53109ba8
KT
2274/* Verify that the target has optabs of VECTYPE to perform all the steps
2275 needed by the multiplication-by-immediate synthesis algorithm described by
2276 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2277 present. Return true iff the target supports all the steps. */
2278
2279static bool
2280target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2281 tree vectype, bool synth_shift_p)
2282{
2283 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2284 return false;
2285
2286 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2287 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2288
2289 if (var == negate_variant
2290 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2291 return false;
2292
2293 /* If we must synthesize shifts with additions make sure that vector
2294 addition is available. */
2295 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2296 return false;
2297
2298 for (int i = 1; i < alg->ops; i++)
2299 {
2300 switch (alg->op[i])
2301 {
2302 case alg_shift:
2303 break;
2304 case alg_add_t_m2:
2305 case alg_add_t2_m:
2306 case alg_add_factor:
2307 if (!supports_vplus)
2308 return false;
2309 break;
2310 case alg_sub_t_m2:
2311 case alg_sub_t2_m:
2312 case alg_sub_factor:
2313 if (!supports_vminus)
2314 return false;
2315 break;
2316 case alg_unknown:
2317 case alg_m:
2318 case alg_zero:
2319 case alg_impossible:
2320 return false;
2321 default:
2322 gcc_unreachable ();
2323 }
2324 }
2325
2326 return true;
2327}
2328
2329/* Synthesize a left shift of OP by AMNT bits using a series of additions and
2330 putting the final result in DEST. Append all statements but the last into
2331 VINFO. Return the last statement. */
2332
2333static gimple *
2334synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2335 stmt_vec_info vinfo)
2336{
2337 HOST_WIDE_INT i;
2338 tree itype = TREE_TYPE (op);
2339 tree prev_res = op;
2340 gcc_assert (amnt >= 0);
2341 for (i = 0; i < amnt; i++)
2342 {
2343 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2344 : dest;
2345 gimple *stmt
2346 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2347 prev_res = tmp_var;
2348 if (i < amnt - 1)
2349 append_pattern_def_seq (vinfo, stmt);
2350 else
2351 return stmt;
2352 }
2353 gcc_unreachable ();
2354 return NULL;
2355}
2356
2357/* Helper for vect_synth_mult_by_constant. Apply a binary operation
2358 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2359 the process if necessary. Append the resulting assignment statements
2360 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2361 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2362 left shifts using additions. */
2363
2364static tree
2365apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2366 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2367{
2368 if (integer_zerop (op2)
2369 && (code == LSHIFT_EXPR
2370 || code == PLUS_EXPR))
2371 {
2372 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2373 return op1;
2374 }
2375
2376 gimple *stmt;
2377 tree itype = TREE_TYPE (op1);
2378 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2379
2380 if (code == LSHIFT_EXPR
2381 && synth_shift_p)
2382 {
2383 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2384 stmt_vinfo);
2385 append_pattern_def_seq (stmt_vinfo, stmt);
2386 return tmp_var;
2387 }
2388
2389 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2390 append_pattern_def_seq (stmt_vinfo, stmt);
2391 return tmp_var;
2392}
2393
2394/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2395 and simple arithmetic operations to be vectorized. Record the statements
2396 produced in STMT_VINFO and return the last statement in the sequence or
2397 NULL if it's not possible to synthesize such a multiplication.
2398 This function mirrors the behavior of expand_mult_const in expmed.c but
2399 works on tree-ssa form. */
2400
2401static gimple *
2402vect_synth_mult_by_constant (tree op, tree val,
2403 stmt_vec_info stmt_vinfo)
2404{
2405 tree itype = TREE_TYPE (op);
2406 machine_mode mode = TYPE_MODE (itype);
2407 struct algorithm alg;
2408 mult_variant variant;
2409 if (!tree_fits_shwi_p (val))
2410 return NULL;
2411
2412 /* Multiplication synthesis by shifts, adds and subs can introduce
2413 signed overflow where the original operation didn't. Perform the
2414 operations on an unsigned type and cast back to avoid this.
2415 In the future we may want to relax this for synthesis algorithms
2416 that we can prove do not cause unexpected overflow. */
2417 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2418
2419 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
47486460 2420
53109ba8
KT
2421 /* Targets that don't support vector shifts but support vector additions
2422 can synthesize shifts that way. */
2423 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2424
2425 HOST_WIDE_INT hwval = tree_to_shwi (val);
2426 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2427 The vectorizer's benefit analysis will decide whether it's beneficial
2428 to do this. */
2429 bool possible = choose_mult_variant (mode, hwval, &alg,
2430 &variant, MAX_COST);
2431 if (!possible)
2432 return NULL;
2433
2434 tree vectype = get_vectype_for_scalar_type (multtype);
2435
2436 if (!vectype
2437 || !target_supports_mult_synth_alg (&alg, variant,
2438 vectype, synth_shift_p))
2439 return NULL;
2440
2441 tree accumulator;
2442
2443 /* Clear out the sequence of statements so we can populate it below. */
53109ba8
KT
2444 gimple *stmt = NULL;
2445
2446 if (cast_to_unsigned_p)
2447 {
2448 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2449 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2450 append_pattern_def_seq (stmt_vinfo, stmt);
2451 op = tmp_op;
2452 }
2453
2454 if (alg.op[0] == alg_zero)
2455 accumulator = build_int_cst (multtype, 0);
2456 else
2457 accumulator = op;
2458
2459 bool needs_fixup = (variant == negate_variant)
2460 || (variant == add_variant);
2461
2462 for (int i = 1; i < alg.ops; i++)
2463 {
2464 tree shft_log = build_int_cst (multtype, alg.log[i]);
2465 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2466 tree tmp_var = NULL_TREE;
2467
2468 switch (alg.op[i])
2469 {
2470 case alg_shift:
2471 if (synth_shift_p)
2472 stmt
2473 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2474 stmt_vinfo);
2475 else
2476 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2477 shft_log);
2478 break;
2479 case alg_add_t_m2:
2480 tmp_var
2481 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2482 stmt_vinfo, synth_shift_p);
2483 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2484 tmp_var);
2485 break;
2486 case alg_sub_t_m2:
2487 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2488 shft_log, stmt_vinfo,
2489 synth_shift_p);
2490 /* In some algorithms the first step involves zeroing the
2491 accumulator. If subtracting from such an accumulator
2492 just emit the negation directly. */
2493 if (integer_zerop (accumulator))
2494 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2495 else
2496 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2497 tmp_var);
2498 break;
2499 case alg_add_t2_m:
2500 tmp_var
2501 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2502 stmt_vinfo, synth_shift_p);
2503 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2504 break;
2505 case alg_sub_t2_m:
2506 tmp_var
2507 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2508 stmt_vinfo, synth_shift_p);
2509 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2510 break;
2511 case alg_add_factor:
2512 tmp_var
2513 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2514 stmt_vinfo, synth_shift_p);
2515 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2516 tmp_var);
2517 break;
2518 case alg_sub_factor:
2519 tmp_var
2520 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2521 stmt_vinfo, synth_shift_p);
2522 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2523 accumulator);
2524 break;
2525 default:
2526 gcc_unreachable ();
2527 }
2528 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2529 but rather return it directly. */
2530
2531 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2532 append_pattern_def_seq (stmt_vinfo, stmt);
2533 accumulator = accum_tmp;
2534 }
2535 if (variant == negate_variant)
2536 {
2537 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2538 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2539 accumulator = accum_tmp;
2540 if (cast_to_unsigned_p)
2541 append_pattern_def_seq (stmt_vinfo, stmt);
2542 }
2543 else if (variant == add_variant)
2544 {
2545 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2546 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2547 accumulator = accum_tmp;
2548 if (cast_to_unsigned_p)
2549 append_pattern_def_seq (stmt_vinfo, stmt);
2550 }
2551 /* Move back to a signed if needed. */
2552 if (cast_to_unsigned_p)
2553 {
2554 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2555 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2556 }
2557
2558 return stmt;
2559}
2560
2561/* Detect multiplication by constant and convert it into a sequence of
2562 shifts and additions, subtractions, negations. We reuse the
2563 choose_mult_variant algorithms from expmed.c
47486460
VK
2564
2565 Input/Output:
2566
ba9728b0 2567 STMT_VINFO: The stmt from which the pattern search begins,
53109ba8 2568 i.e. the mult stmt.
47486460
VK
2569
2570 Output:
2571
47486460
VK
2572 * TYPE_OUT: The type of the output of this pattern.
2573
53109ba8
KT
2574 * Return value: A new stmt that will be used to replace
2575 the multiplication. */
47486460 2576
355fe088 2577static gimple *
ba9728b0 2578vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
47486460 2579{
ba9728b0 2580 gimple *last_stmt = stmt_vinfo->stmt;
47486460 2581 tree oprnd0, oprnd1, vectype, itype;
53109ba8 2582 gimple *pattern_stmt;
47486460
VK
2583
2584 if (!is_gimple_assign (last_stmt))
2585 return NULL;
2586
2587 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2588 return NULL;
2589
2590 oprnd0 = gimple_assign_rhs1 (last_stmt);
2591 oprnd1 = gimple_assign_rhs2 (last_stmt);
2592 itype = TREE_TYPE (oprnd0);
2593
2594 if (TREE_CODE (oprnd0) != SSA_NAME
2595 || TREE_CODE (oprnd1) != INTEGER_CST
2596 || !INTEGRAL_TYPE_P (itype)
2be65d9e 2597 || !type_has_mode_precision_p (itype))
47486460
VK
2598 return NULL;
2599
2600 vectype = get_vectype_for_scalar_type (itype);
2601 if (vectype == NULL_TREE)
2602 return NULL;
2603
2604 /* If the target can handle vectorized multiplication natively,
2605 don't attempt to optimize this. */
53109ba8
KT
2606 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2607 if (mul_optab != unknown_optab)
47486460
VK
2608 {
2609 machine_mode vec_mode = TYPE_MODE (vectype);
53109ba8 2610 int icode = (int) optab_handler (mul_optab, vec_mode);
47486460 2611 if (icode != CODE_FOR_nothing)
53109ba8 2612 return NULL;
47486460
VK
2613 }
2614
53109ba8
KT
2615 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2616 if (!pattern_stmt)
47486460
VK
2617 return NULL;
2618
2619 /* Pattern detected. */
49d8df1b 2620 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
47486460 2621
47486460
VK
2622 *type_out = vectype;
2623
2624 return pattern_stmt;
2625}
2626
079c527f 2627/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
2628 otherwise vectorized:
2629
2630 type a_t, b_t;
2631
2632 S1 a_t = b_t / N;
2633
079c527f 2634 where type 'type' is an integral type and N is a constant.
363477c0 2635
079c527f 2636 Similarly handle modulo by a constant:
363477c0
JJ
2637
2638 S4 a_t = b_t % N;
2639
2640 Input/Output:
2641
ba9728b0 2642 * STMT_VINFO: The stmt from which the pattern search begins,
079c527f
JJ
2643 i.e. the division stmt. S1 is replaced by if N is a power
2644 of two constant and type is signed:
363477c0
JJ
2645 S3 y_t = b_t < 0 ? N - 1 : 0;
2646 S2 x_t = b_t + y_t;
2647 S1' a_t = x_t >> log2 (N);
2648
079c527f
JJ
2649 S4 is replaced if N is a power of two constant and
2650 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
2651 S9 y_T = b_t < 0 ? -1U : 0U;
2652 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2653 S7 z_t = (type) z_T;
2654 S6 w_t = b_t + z_t;
2655 S5 x_t = w_t & (N - 1);
2656 S4' a_t = x_t - z_t;
2657
2658 Output:
2659
363477c0
JJ
2660 * TYPE_OUT: The type of the output of this pattern.
2661
2662 * Return value: A new stmt that will be used to replace the division
2663 S1 or modulo S4 stmt. */
2664
355fe088 2665static gimple *
ba9728b0 2666vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
363477c0 2667{
ba9728b0 2668 gimple *last_stmt = stmt_vinfo->stmt;
5deb57cb 2669 tree oprnd0, oprnd1, vectype, itype, cond;
355fe088 2670 gimple *pattern_stmt, *def_stmt;
363477c0 2671 enum tree_code rhs_code;
363477c0 2672 optab optab;
00f07b86 2673 tree q;
079c527f 2674 int dummy_int, prec;
363477c0
JJ
2675
2676 if (!is_gimple_assign (last_stmt))
2677 return NULL;
2678
2679 rhs_code = gimple_assign_rhs_code (last_stmt);
2680 switch (rhs_code)
2681 {
2682 case TRUNC_DIV_EXPR:
eb592645 2683 case EXACT_DIV_EXPR:
363477c0
JJ
2684 case TRUNC_MOD_EXPR:
2685 break;
2686 default:
2687 return NULL;
2688 }
2689
363477c0
JJ
2690 oprnd0 = gimple_assign_rhs1 (last_stmt);
2691 oprnd1 = gimple_assign_rhs2 (last_stmt);
2692 itype = TREE_TYPE (oprnd0);
2693 if (TREE_CODE (oprnd0) != SSA_NAME
2694 || TREE_CODE (oprnd1) != INTEGER_CST
2695 || TREE_CODE (itype) != INTEGER_TYPE
2be65d9e 2696 || !type_has_mode_precision_p (itype))
363477c0
JJ
2697 return NULL;
2698
7a504f33 2699 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
363477c0
JJ
2700 vectype = get_vectype_for_scalar_type (itype);
2701 if (vectype == NULL_TREE)
2702 return NULL;
2703
8f76f377 2704 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
363477c0 2705 {
8f76f377
RS
2706 /* If the target can handle vectorized division or modulo natively,
2707 don't attempt to optimize this, since native division is likely
2708 to give smaller code. */
2709 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2710 if (optab != unknown_optab)
2711 {
2712 machine_mode vec_mode = TYPE_MODE (vectype);
2713 int icode = (int) optab_handler (optab, vec_mode);
2714 if (icode != CODE_FOR_nothing)
2715 return NULL;
2716 }
363477c0
JJ
2717 }
2718
079c527f
JJ
2719 prec = TYPE_PRECISION (itype);
2720 if (integer_pow2p (oprnd1))
363477c0 2721 {
079c527f
JJ
2722 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2723 return NULL;
363477c0 2724
079c527f 2725 /* Pattern detected. */
49d8df1b 2726 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
079c527f
JJ
2727
2728 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2729 build_int_cst (itype, 0));
eb592645
RB
2730 if (rhs_code == TRUNC_DIV_EXPR
2731 || rhs_code == EXACT_DIV_EXPR)
079c527f
JJ
2732 {
2733 tree var = vect_recog_temp_ssa_var (itype, NULL);
2734 tree shift;
2735 def_stmt
0d0e4a03
JJ
2736 = gimple_build_assign (var, COND_EXPR, cond,
2737 fold_build2 (MINUS_EXPR, itype, oprnd1,
2738 build_int_cst (itype, 1)),
2739 build_int_cst (itype, 0));
9c58fb7a 2740 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2741 var = vect_recog_temp_ssa_var (itype, NULL);
2742 def_stmt
0d0e4a03
JJ
2743 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2744 gimple_assign_lhs (def_stmt));
079c527f
JJ
2745 append_pattern_def_seq (stmt_vinfo, def_stmt);
2746
2747 shift = build_int_cst (itype, tree_log2 (oprnd1));
2748 pattern_stmt
0d0e4a03
JJ
2749 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2750 RSHIFT_EXPR, var, shift);
079c527f
JJ
2751 }
2752 else
2753 {
2754 tree signmask;
079c527f
JJ
2755 if (compare_tree_int (oprnd1, 2) == 0)
2756 {
2757 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2758 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2759 build_int_cst (itype, 1),
2760 build_int_cst (itype, 0));
079c527f
JJ
2761 append_pattern_def_seq (stmt_vinfo, def_stmt);
2762 }
2763 else
2764 {
2765 tree utype
2766 = build_nonstandard_integer_type (prec, 1);
2767 tree vecutype = get_vectype_for_scalar_type (utype);
2768 tree shift
7a504f33 2769 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
079c527f
JJ
2770 - tree_log2 (oprnd1));
2771 tree var = vect_recog_temp_ssa_var (utype, NULL);
2772
0d0e4a03
JJ
2773 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2774 build_int_cst (utype, -1),
2775 build_int_cst (utype, 0));
776bfcea 2776 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
079c527f 2777 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
2778 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2779 gimple_assign_lhs (def_stmt),
2780 shift);
776bfcea 2781 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
079c527f
JJ
2782 signmask = vect_recog_temp_ssa_var (itype, NULL);
2783 def_stmt
0d0e4a03 2784 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
2785 append_pattern_def_seq (stmt_vinfo, def_stmt);
2786 }
2787 def_stmt
0d0e4a03
JJ
2788 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2789 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
2790 append_pattern_def_seq (stmt_vinfo, def_stmt);
2791 def_stmt
0d0e4a03
JJ
2792 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2793 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2794 fold_build2 (MINUS_EXPR, itype, oprnd1,
2795 build_int_cst (itype, 1)));
079c527f
JJ
2796 append_pattern_def_seq (stmt_vinfo, def_stmt);
2797
2798 pattern_stmt
0d0e4a03
JJ
2799 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2800 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2801 signmask);
079c527f
JJ
2802 }
2803
079c527f
JJ
2804 *type_out = vectype;
2805 return pattern_stmt;
363477c0 2806 }
079c527f 2807
6b58915b
RS
2808 if (prec > HOST_BITS_PER_WIDE_INT
2809 || integer_zerop (oprnd1))
079c527f
JJ
2810 return NULL;
2811
00f07b86
RH
2812 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2813 return NULL;
079c527f 2814
079c527f 2815 if (TYPE_UNSIGNED (itype))
363477c0 2816 {
079c527f
JJ
2817 unsigned HOST_WIDE_INT mh, ml;
2818 int pre_shift, post_shift;
6b58915b 2819 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
7a504f33 2820 & GET_MODE_MASK (itype_mode));
5deb57cb 2821 tree t1, t2, t3, t4;
079c527f 2822
fecfbfa4 2823 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
079c527f
JJ
2824 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2825 return NULL;
2826
2827 /* Find a suitable multiplier and right shift count
2828 instead of multiplying with D. */
2829 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2830
2831 /* If the suggested multiplier is more than SIZE bits, we can do better
2832 for even divisors, using an initial right shift. */
2833 if (mh != 0 && (d & 1) == 0)
363477c0 2834 {
146ec50f 2835 pre_shift = ctz_or_zero (d);
079c527f
JJ
2836 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2837 &ml, &post_shift, &dummy_int);
2838 gcc_assert (!mh);
2839 }
2840 else
2841 pre_shift = 0;
2842
2843 if (mh != 0)
2844 {
2845 if (post_shift - 1 >= prec)
2846 return NULL;
2847
5deb57cb
JJ
2848 /* t1 = oprnd0 h* ml;
2849 t2 = oprnd0 - t1;
2850 t3 = t2 >> 1;
2851 t4 = t1 + t3;
2852 q = t4 >> (post_shift - 1); */
2853 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2854 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2855 build_int_cst (itype, ml));
079c527f 2856 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2857
5deb57cb 2858 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2859 def_stmt
0d0e4a03 2860 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 2861 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2862
2863 t3 = vect_recog_temp_ssa_var (itype, NULL);
2864 def_stmt
0d0e4a03 2865 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
2866 append_pattern_def_seq (stmt_vinfo, def_stmt);
2867
5deb57cb 2868 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2869 def_stmt
0d0e4a03 2870 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
2871
2872 if (post_shift != 1)
2873 {
2874 append_pattern_def_seq (stmt_vinfo, def_stmt);
2875
5deb57cb 2876 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2877 pattern_stmt
0d0e4a03
JJ
2878 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2879 build_int_cst (itype, post_shift - 1));
079c527f
JJ
2880 }
2881 else
2882 {
5deb57cb 2883 q = t4;
079c527f
JJ
2884 pattern_stmt = def_stmt;
2885 }
363477c0
JJ
2886 }
2887 else
2888 {
079c527f
JJ
2889 if (pre_shift >= prec || post_shift >= prec)
2890 return NULL;
2891
2892 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2893 t2 = t1 h* ml;
2894 q = t2 >> post_shift; */
079c527f
JJ
2895 if (pre_shift)
2896 {
2897 t1 = vect_recog_temp_ssa_var (itype, NULL);
2898 def_stmt
0d0e4a03
JJ
2899 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2900 build_int_cst (NULL, pre_shift));
079c527f
JJ
2901 append_pattern_def_seq (stmt_vinfo, def_stmt);
2902 }
2903 else
2904 t1 = oprnd0;
363477c0 2905
5deb57cb 2906 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2907 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2908 build_int_cst (itype, ml));
079c527f 2909
5deb57cb
JJ
2910 if (post_shift)
2911 {
2912 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2913
5deb57cb
JJ
2914 q = vect_recog_temp_ssa_var (itype, NULL);
2915 def_stmt
0d0e4a03
JJ
2916 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2917 build_int_cst (itype, post_shift));
5deb57cb
JJ
2918 }
2919 else
2920 q = t2;
2921
2922 pattern_stmt = def_stmt;
079c527f
JJ
2923 }
2924 }
2925 else
2926 {
2927 unsigned HOST_WIDE_INT ml;
4ee4c52c 2928 int post_shift;
6b58915b 2929 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2930 unsigned HOST_WIDE_INT abs_d;
2931 bool add = false;
5deb57cb 2932 tree t1, t2, t3, t4;
079c527f
JJ
2933
2934 /* Give up for -1. */
2935 if (d == -1)
2936 return NULL;
2937
079c527f
JJ
2938 /* Since d might be INT_MIN, we have to cast to
2939 unsigned HOST_WIDE_INT before negating to avoid
2940 undefined signed overflow. */
2941 abs_d = (d >= 0
2942 ? (unsigned HOST_WIDE_INT) d
2943 : - (unsigned HOST_WIDE_INT) d);
2944
2945 /* n rem d = n rem -d */
2946 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2947 {
2948 d = abs_d;
2949 oprnd1 = build_int_cst (itype, abs_d);
2950 }
2951 else if (HOST_BITS_PER_WIDE_INT >= prec
fecfbfa4 2952 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
2953 /* This case is not handled correctly below. */
2954 return NULL;
2955
4ee4c52c 2956 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
fecfbfa4 2957 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
2958 {
2959 add = true;
dd4786fe 2960 ml |= HOST_WIDE_INT_M1U << (prec - 1);
079c527f
JJ
2961 }
2962 if (post_shift >= prec)
2963 return NULL;
2964
7abed779 2965 /* t1 = oprnd0 h* ml; */
5deb57cb 2966 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2967 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2968 build_int_cst (itype, ml));
079c527f
JJ
2969
2970 if (add)
2971 {
5deb57cb 2972 /* t2 = t1 + oprnd0; */
7abed779 2973 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2974 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2975 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
2976 }
2977 else
5deb57cb 2978 t2 = t1;
079c527f 2979
5deb57cb 2980 if (post_shift)
079c527f 2981 {
5deb57cb 2982 /* t3 = t2 >> post_shift; */
7abed779 2983 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2984 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2985 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2986 build_int_cst (itype, post_shift));
363477c0 2987 }
079c527f 2988 else
5deb57cb 2989 t3 = t2;
079c527f 2990
807e902e 2991 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
2992 int msb = 1;
2993 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2994 {
807e902e 2995 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 2996 msb = 0;
807e902e 2997 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
2998 msb = -1;
2999 }
079c527f 3000
7abed779
JJ
3001 if (msb == 0 && d >= 0)
3002 {
3003 /* q = t3; */
3004 q = t3;
3005 pattern_stmt = def_stmt;
3006 }
3007 else
3008 {
3009 /* t4 = oprnd0 >> (prec - 1);
3010 or if we know from VRP that oprnd0 >= 0
3011 t4 = 0;
3012 or if we know from VRP that oprnd0 < 0
3013 t4 = -1; */
3014 append_pattern_def_seq (stmt_vinfo, def_stmt);
3015 t4 = vect_recog_temp_ssa_var (itype, NULL);
3016 if (msb != 1)
0d0e4a03
JJ
3017 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3018 build_int_cst (itype, msb));
7abed779 3019 else
0d0e4a03
JJ
3020 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3021 build_int_cst (itype, prec - 1));
7abed779
JJ
3022 append_pattern_def_seq (stmt_vinfo, def_stmt);
3023
3024 /* q = t3 - t4; or q = t4 - t3; */
3025 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3026 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3027 d < 0 ? t3 : t4);
7abed779 3028 }
079c527f
JJ
3029 }
3030
3031 if (rhs_code == TRUNC_MOD_EXPR)
3032 {
3033 tree r, t1;
3034
3035 /* We divided. Now finish by:
3036 t1 = q * oprnd1;
3037 r = oprnd0 - t1; */
3038 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3039
3040 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3041 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 3042 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 3043
079c527f 3044 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3045 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
3046 }
3047
079c527f 3048 /* Pattern detected. */
49d8df1b 3049 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
363477c0 3050
363477c0
JJ
3051 *type_out = vectype;
3052 return pattern_stmt;
3053}
3054
69d2aade
JJ
3055/* Function vect_recog_mixed_size_cond_pattern
3056
3057 Try to find the following pattern:
3058
3059 type x_t, y_t;
3060 TYPE a_T, b_T, c_T;
3061 loop:
3062 S1 a_T = x_t CMP y_t ? b_T : c_T;
3063
3064 where type 'TYPE' is an integral type which has different size
bc4fb355 3065 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 3066 than 'type', the constants need to fit into an integer type
bc4fb355 3067 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
3068
3069 Input:
3070
ba9728b0 3071 * STMT_VINFO: The stmt from which the pattern search begins.
69d2aade
JJ
3072
3073 Output:
3074
69d2aade
JJ
3075 * TYPE_OUT: The type of the output of this pattern.
3076
3077 * Return value: A new stmt that will be used to replace the pattern.
3078 Additionally a def_stmt is added.
3079
3080 a_it = x_t CMP y_t ? b_it : c_it;
3081 a_T = (TYPE) a_it; */
3082
355fe088 3083static gimple *
ba9728b0 3084vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
69d2aade 3085{
ba9728b0 3086 gimple *last_stmt = stmt_vinfo->stmt;
69d2aade 3087 tree cond_expr, then_clause, else_clause;
bc4fb355 3088 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
355fe088 3089 gimple *pattern_stmt, *def_stmt;
bc4fb355 3090 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
355fe088 3091 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
bc4fb355
IR
3092 bool promotion;
3093 tree comp_scalar_type;
69d2aade
JJ
3094
3095 if (!is_gimple_assign (last_stmt)
3096 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3097 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3098 return NULL;
3099
3100 cond_expr = gimple_assign_rhs1 (last_stmt);
3101 then_clause = gimple_assign_rhs2 (last_stmt);
3102 else_clause = gimple_assign_rhs3 (last_stmt);
3103
87aab9b2
JJ
3104 if (!COMPARISON_CLASS_P (cond_expr))
3105 return NULL;
3106
bc4fb355
IR
3107 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3108 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 3109 if (comp_vectype == NULL_TREE)
69d2aade
JJ
3110 return NULL;
3111
3112 type = gimple_expr_type (last_stmt);
bc4fb355
IR
3113 if (types_compatible_p (type, comp_scalar_type)
3114 || ((TREE_CODE (then_clause) != INTEGER_CST
3115 || TREE_CODE (else_clause) != INTEGER_CST)
3116 && !INTEGRAL_TYPE_P (comp_scalar_type))
3117 || !INTEGRAL_TYPE_P (type))
3118 return NULL;
3119
3120 if ((TREE_CODE (then_clause) != INTEGER_CST
3121 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
3122 &def_stmt0, &promotion))
3123 || (TREE_CODE (else_clause) != INTEGER_CST
3124 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
3125 &def_stmt1, &promotion)))
3126 return NULL;
3127
3128 if (orig_type0 && orig_type1
3129 && !types_compatible_p (orig_type0, orig_type1))
3130 return NULL;
3131
3132 if (orig_type0)
3133 {
3134 if (!types_compatible_p (orig_type0, comp_scalar_type))
3135 return NULL;
3136 then_clause = gimple_assign_rhs1 (def_stmt0);
3137 itype = orig_type0;
3138 }
3139
3140 if (orig_type1)
3141 {
3142 if (!types_compatible_p (orig_type1, comp_scalar_type))
3143 return NULL;
3144 else_clause = gimple_assign_rhs1 (def_stmt1);
3145 itype = orig_type1;
3146 }
3147
69d2aade 3148
6c825cd4
DS
3149 HOST_WIDE_INT cmp_mode_size
3150 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3151
7a504f33
RS
3152 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3153 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
69d2aade
JJ
3154 return NULL;
3155
3156 vectype = get_vectype_for_scalar_type (type);
3157 if (vectype == NULL_TREE)
3158 return NULL;
3159
96592eed 3160 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3161 return NULL;
3162
bc4fb355 3163 if (itype == NULL_TREE)
6c825cd4 3164 itype = build_nonstandard_integer_type (cmp_mode_size,
bc4fb355
IR
3165 TYPE_UNSIGNED (type));
3166
69d2aade 3167 if (itype == NULL_TREE
b397965c 3168 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
69d2aade
JJ
3169 return NULL;
3170
3171 vecitype = get_vectype_for_scalar_type (itype);
3172 if (vecitype == NULL_TREE)
3173 return NULL;
3174
96592eed 3175 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3176 return NULL;
3177
7a504f33 3178 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
69d2aade 3179 {
bc4fb355
IR
3180 if ((TREE_CODE (then_clause) == INTEGER_CST
3181 && !int_fits_type_p (then_clause, itype))
3182 || (TREE_CODE (else_clause) == INTEGER_CST
3183 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
3184 return NULL;
3185 }
3186
0d0e4a03
JJ
3187 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3188 COND_EXPR, unshare_expr (cond_expr),
3189 fold_convert (itype, then_clause),
3190 fold_convert (itype, else_clause));
3191 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3192 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 3193
776bfcea 3194 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
69d2aade
JJ
3195 *type_out = vectype;
3196
49d8df1b 3197 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
f5709183 3198
69d2aade
JJ
3199 return pattern_stmt;
3200}
3201
3202
71c92d17 3203/* Helper function of vect_recog_bool_pattern. Called recursively, return
42fd8198
IE
3204 true if bool VAR can and should be optimized that way. Assume it shouldn't
3205 in case it's a result of a comparison which can be directly vectorized into
fa2c9034
RB
3206 a vector comparison. Fills in STMTS with all stmts visited during the
3207 walk. */
71c92d17
JJ
3208
3209static bool
fa2c9034 3210check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
71c92d17 3211{
81c40241 3212 tree rhs1;
71c92d17
JJ
3213 enum tree_code rhs_code;
3214
25927307
RS
3215 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3216 if (!def_stmt_info)
71c92d17
JJ
3217 return false;
3218
25927307
RS
3219 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3220 if (!def_stmt)
71c92d17
JJ
3221 return false;
3222
fa2c9034
RB
3223 if (stmts.contains (def_stmt))
3224 return true;
71c92d17
JJ
3225
3226 rhs1 = gimple_assign_rhs1 (def_stmt);
3227 rhs_code = gimple_assign_rhs_code (def_stmt);
3228 switch (rhs_code)
3229 {
3230 case SSA_NAME:
fa2c9034
RB
3231 if (! check_bool_pattern (rhs1, vinfo, stmts))
3232 return false;
3233 break;
71c92d17
JJ
3234
3235 CASE_CONVERT:
2568d8a1 3236 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
71c92d17 3237 return false;
fa2c9034
RB
3238 if (! check_bool_pattern (rhs1, vinfo, stmts))
3239 return false;
3240 break;
71c92d17
JJ
3241
3242 case BIT_NOT_EXPR:
fa2c9034
RB
3243 if (! check_bool_pattern (rhs1, vinfo, stmts))
3244 return false;
3245 break;
71c92d17
JJ
3246
3247 case BIT_AND_EXPR:
3248 case BIT_IOR_EXPR:
3249 case BIT_XOR_EXPR:
fa2c9034
RB
3250 if (! check_bool_pattern (rhs1, vinfo, stmts)
3251 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
71c92d17 3252 return false;
fa2c9034 3253 break;
71c92d17
JJ
3254
3255 default:
3256 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3257 {
fa2c9034 3258 tree vecitype, comp_vectype;
71c92d17 3259
2f326699
JJ
3260 /* If the comparison can throw, then is_gimple_condexpr will be
3261 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
3262 if (stmt_could_throw_p (def_stmt))
3263 return false;
3264
71c92d17
JJ
3265 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3266 if (comp_vectype == NULL_TREE)
3267 return false;
3268
fa2c9034 3269 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
42fd8198 3270 if (mask_type
96592eed 3271 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
42fd8198
IE
3272 return false;
3273
71c92d17
JJ
3274 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3275 {
b397965c 3276 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3277 tree itype
ab0ef706 3278 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3279 vecitype = get_vectype_for_scalar_type (itype);
3280 if (vecitype == NULL_TREE)
3281 return false;
3282 }
3283 else
3284 vecitype = comp_vectype;
96592eed 3285 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
fa2c9034 3286 return false;
71c92d17 3287 }
fa2c9034
RB
3288 else
3289 return false;
3290 break;
71c92d17 3291 }
fa2c9034
RB
3292
3293 bool res = stmts.add (def_stmt);
3294 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3295 gcc_assert (!res);
3296
3297 return true;
71c92d17
JJ
3298}
3299
3300
3301/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
fa2c9034
RB
3302 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3303 pattern sequence. */
71c92d17
JJ
3304
3305static tree
fa2c9034 3306adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
71c92d17 3307{
fa2c9034
RB
3308 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3309 NOP_EXPR, var);
776bfcea
RS
3310 append_pattern_def_seq (stmt_info, cast_stmt,
3311 get_vectype_for_scalar_type (type));
71c92d17
JJ
3312 return gimple_assign_lhs (cast_stmt);
3313}
3314
fa2c9034
RB
3315/* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3316 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3317 type, OUT_TYPE is the desired final integer type of the whole pattern.
3318 STMT_INFO is the info of the pattern root and is where pattern stmts should
3319 be associated with. DEFS is a map of pattern defs. */
71c92d17 3320
fa2c9034
RB
3321static void
3322adjust_bool_pattern (tree var, tree out_type,
3323 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
71c92d17 3324{
355fe088 3325 gimple *stmt = SSA_NAME_DEF_STMT (var);
71c92d17
JJ
3326 enum tree_code rhs_code, def_rhs_code;
3327 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3328 location_t loc;
355fe088 3329 gimple *pattern_stmt, *def_stmt;
fa2c9034 3330 tree trueval = NULL_TREE;
71c92d17
JJ
3331
3332 rhs1 = gimple_assign_rhs1 (stmt);
3333 rhs2 = gimple_assign_rhs2 (stmt);
3334 rhs_code = gimple_assign_rhs_code (stmt);
3335 loc = gimple_location (stmt);
3336 switch (rhs_code)
3337 {
3338 case SSA_NAME:
3339 CASE_CONVERT:
fa2c9034 3340 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3341 itype = TREE_TYPE (irhs1);
3342 pattern_stmt
0d0e4a03
JJ
3343 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3344 SSA_NAME, irhs1);
71c92d17
JJ
3345 break;
3346
3347 case BIT_NOT_EXPR:
fa2c9034 3348 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3349 itype = TREE_TYPE (irhs1);
3350 pattern_stmt
0d0e4a03
JJ
3351 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3352 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
3353 break;
3354
3355 case BIT_AND_EXPR:
3356 /* Try to optimize x = y & (a < b ? 1 : 0); into
3357 x = (a < b ? y : 0);
3358
3359 E.g. for:
3360 bool a_b, b_b, c_b;
3361 TYPE d_T;
3362
3363 S1 a_b = x1 CMP1 y1;
3364 S2 b_b = x2 CMP2 y2;
3365 S3 c_b = a_b & b_b;
3366 S4 d_T = (TYPE) c_b;
3367
3368 we would normally emit:
3369
3370 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3371 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3372 S3' c_T = a_T & b_T;
3373 S4' d_T = c_T;
3374
3375 but we can save one stmt by using the
3376 result of one of the COND_EXPRs in the other COND_EXPR and leave
3377 BIT_AND_EXPR stmt out:
3378
3379 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3380 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3381 S4' f_T = c_T;
3382
3383 At least when VEC_COND_EXPR is implemented using masks
3384 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3385 computes the comparison masks and ands it, in one case with
3386 all ones vector, in the other case with a vector register.
3387 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3388 often more expensive. */
3389 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3390 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3391 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3392 {
fa2c9034 3393 irhs1 = *defs.get (rhs1);
71c92d17 3394 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17 3395 if (TYPE_PRECISION (TREE_TYPE (irhs1))
b397965c 3396 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
71c92d17 3397 {
fa2c9034
RB
3398 rhs_code = def_rhs_code;
3399 rhs1 = def_rhs1;
3400 rhs2 = gimple_assign_rhs2 (def_stmt);
3401 trueval = irhs1;
3402 goto do_compare;
71c92d17
JJ
3403 }
3404 else
fa2c9034 3405 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3406 goto and_ior_xor;
3407 }
3408 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3409 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3410 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3411 {
fa2c9034 3412 irhs2 = *defs.get (rhs2);
71c92d17 3413 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17 3414 if (TYPE_PRECISION (TREE_TYPE (irhs2))
b397965c 3415 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
71c92d17 3416 {
fa2c9034
RB
3417 rhs_code = def_rhs_code;
3418 rhs1 = def_rhs1;
3419 rhs2 = gimple_assign_rhs2 (def_stmt);
3420 trueval = irhs2;
3421 goto do_compare;
71c92d17
JJ
3422 }
3423 else
fa2c9034 3424 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3425 goto and_ior_xor;
3426 }
3427 /* FALLTHRU */
3428 case BIT_IOR_EXPR:
3429 case BIT_XOR_EXPR:
fa2c9034
RB
3430 irhs1 = *defs.get (rhs1);
3431 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3432 and_ior_xor:
3433 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3434 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3435 {
3436 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3437 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3438 int out_prec = TYPE_PRECISION (out_type);
3439 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
fa2c9034
RB
3440 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3441 stmt_info);
71c92d17 3442 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
fa2c9034
RB
3443 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3444 stmt_info);
71c92d17
JJ
3445 else
3446 {
fa2c9034
RB
3447 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3448 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
71c92d17
JJ
3449 }
3450 }
3451 itype = TREE_TYPE (irhs1);
3452 pattern_stmt
0d0e4a03
JJ
3453 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3454 rhs_code, irhs1, irhs2);
71c92d17
JJ
3455 break;
3456
3457 default:
fa2c9034 3458 do_compare:
71c92d17
JJ
3459 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3460 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2 3461 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
73a699ae
RS
3462 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3463 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3464 {
b397965c 3465 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3466 itype
ab0ef706 3467 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3468 }
3469 else
3470 itype = TREE_TYPE (rhs1);
3471 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3472 if (trueval == NULL_TREE)
3473 trueval = build_int_cst (itype, 1);
3474 else
3475 gcc_checking_assert (useless_type_conversion_p (itype,
3476 TREE_TYPE (trueval)));
3477 pattern_stmt
0d0e4a03
JJ
3478 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3479 COND_EXPR, cond_expr, trueval,
3480 build_int_cst (itype, 0));
71c92d17
JJ
3481 break;
3482 }
3483
71c92d17 3484 gimple_set_location (pattern_stmt, loc);
776bfcea
RS
3485 append_pattern_def_seq (stmt_info, pattern_stmt,
3486 get_vectype_for_scalar_type (itype));
fa2c9034
RB
3487 defs.put (var, gimple_assign_lhs (pattern_stmt));
3488}
3489
3490/* Comparison function to qsort a vector of gimple stmts after UID. */
3491
3492static int
3493sort_after_uid (const void *p1, const void *p2)
3494{
3495 const gimple *stmt1 = *(const gimple * const *)p1;
3496 const gimple *stmt2 = *(const gimple * const *)p2;
3497 return gimple_uid (stmt1) - gimple_uid (stmt2);
71c92d17
JJ
3498}
3499
fa2c9034
RB
3500/* Create pattern stmts for all stmts participating in the bool pattern
3501 specified by BOOL_STMT_SET and its root STMT with the desired type
3502 OUT_TYPE. Return the def of the pattern root. */
3503
3504static tree
3505adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
3506 tree out_type, gimple *stmt)
3507{
3508 /* Gather original stmts in the bool pattern in their order of appearance
3509 in the IL. */
3510 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3511 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3512 i != bool_stmt_set.end (); ++i)
3513 bool_stmts.quick_push (*i);
3514 bool_stmts.qsort (sort_after_uid);
3515
3516 /* Now process them in that order, producing pattern stmts. */
3517 hash_map <tree, tree> defs;
3518 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3519 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
3520 out_type, vinfo_for_stmt (stmt), defs);
3521
3522 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3523 gimple *pattern_stmt
3524 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (stmt)));
3525 return gimple_assign_lhs (pattern_stmt);
3526}
71c92d17 3527
5116b156 3528/* Helper for search_type_for_mask. */
42fd8198
IE
3529
3530static tree
5116b156
JJ
3531search_type_for_mask_1 (tree var, vec_info *vinfo,
3532 hash_map<gimple *, tree> &cache)
42fd8198 3533{
42fd8198
IE
3534 tree rhs1;
3535 enum tree_code rhs_code;
e6f5c25d 3536 tree res = NULL_TREE, res2;
42fd8198 3537
2568d8a1 3538 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
42fd8198
IE
3539 return NULL_TREE;
3540
25927307
RS
3541 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3542 if (!def_stmt_info)
42fd8198
IE
3543 return NULL_TREE;
3544
25927307
RS
3545 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3546 if (!def_stmt)
42fd8198
IE
3547 return NULL_TREE;
3548
5116b156
JJ
3549 tree *c = cache.get (def_stmt);
3550 if (c)
3551 return *c;
3552
42fd8198
IE
3553 rhs_code = gimple_assign_rhs_code (def_stmt);
3554 rhs1 = gimple_assign_rhs1 (def_stmt);
3555
3556 switch (rhs_code)
3557 {
3558 case SSA_NAME:
3559 case BIT_NOT_EXPR:
3560 CASE_CONVERT:
5116b156 3561 res = search_type_for_mask_1 (rhs1, vinfo, cache);
42fd8198
IE
3562 break;
3563
3564 case BIT_AND_EXPR:
3565 case BIT_IOR_EXPR:
3566 case BIT_XOR_EXPR:
5116b156
JJ
3567 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3568 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3569 cache);
e6f5c25d
IE
3570 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3571 res = res2;
42fd8198
IE
3572 break;
3573
3574 default:
3575 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3576 {
e6f5c25d
IE
3577 tree comp_vectype, mask_type;
3578
2568d8a1 3579 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
af3cdd34 3580 {
5116b156
JJ
3581 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3582 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3583 vinfo, cache);
af3cdd34
IE
3584 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3585 res = res2;
3586 break;
3587 }
3588
e6f5c25d
IE
3589 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3590 if (comp_vectype == NULL_TREE)
5116b156
JJ
3591 {
3592 res = NULL_TREE;
3593 break;
3594 }
e6f5c25d
IE
3595
3596 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3597 if (!mask_type
96592eed 3598 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5116b156
JJ
3599 {
3600 res = NULL_TREE;
3601 break;
3602 }
e6f5c25d 3603
42fd8198
IE
3604 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3605 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3606 {
b397965c 3607 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
42fd8198
IE
3608 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3609 }
3610 else
3611 res = TREE_TYPE (rhs1);
3612 }
3613 }
3614
5116b156 3615 cache.put (def_stmt, res);
42fd8198
IE
3616 return res;
3617}
3618
5116b156
JJ
3619/* Return the proper type for converting bool VAR into
3620 an integer value or NULL_TREE if no such type exists.
3621 The type is chosen so that converted value has the
3622 same number of elements as VAR's vector type. */
3623
3624static tree
3625search_type_for_mask (tree var, vec_info *vinfo)
3626{
3627 hash_map<gimple *, tree> cache;
3628 return search_type_for_mask_1 (var, vinfo, cache);
3629}
42fd8198 3630
71c92d17
JJ
3631/* Function vect_recog_bool_pattern
3632
3633 Try to find pattern like following:
3634
3635 bool a_b, b_b, c_b, d_b, e_b;
3636 TYPE f_T;
3637 loop:
3638 S1 a_b = x1 CMP1 y1;
3639 S2 b_b = x2 CMP2 y2;
3640 S3 c_b = a_b & b_b;
3641 S4 d_b = x3 CMP3 y3;
3642 S5 e_b = c_b | d_b;
3643 S6 f_T = (TYPE) e_b;
3644
52264dbf
RB
3645 where type 'TYPE' is an integral type. Or a similar pattern
3646 ending in
3647
3648 S6 f_Y = e_b ? r_Y : s_Y;
3649
3650 as results from if-conversion of a complex condition.
71c92d17
JJ
3651
3652 Input:
3653
ba9728b0
RS
3654 * STMT_VINFO: The stmt at the end from which the pattern
3655 search begins, i.e. cast of a bool to
3656 an integer type.
71c92d17
JJ
3657
3658 Output:
3659
71c92d17
JJ
3660 * TYPE_OUT: The type of the output of this pattern.
3661
3662 * Return value: A new stmt that will be used to replace the pattern.
3663
3664 Assuming size of TYPE is the same as size of all comparisons
3665 (otherwise some casts would be added where needed), the above
3666 sequence we create related pattern stmts:
3667 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3668 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3669 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3670 S5' e_T = c_T | d_T;
3671 S6' f_T = e_T;
3672
3673 Instead of the above S3' we could emit:
3674 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3675 S3' c_T = a_T | b_T;
3676 but the above is more efficient. */
3677
355fe088 3678static gimple *
ba9728b0 3679vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
71c92d17 3680{
ba9728b0 3681 gimple *last_stmt = stmt_vinfo->stmt;
71c92d17
JJ
3682 enum tree_code rhs_code;
3683 tree var, lhs, rhs, vectype;
310213d4 3684 vec_info *vinfo = stmt_vinfo->vinfo;
355fe088 3685 gimple *pattern_stmt;
71c92d17
JJ
3686
3687 if (!is_gimple_assign (last_stmt))
3688 return NULL;
3689
3690 var = gimple_assign_rhs1 (last_stmt);
3691 lhs = gimple_assign_lhs (last_stmt);
3692
2568d8a1 3693 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
71c92d17
JJ
3694 return NULL;
3695
fa2c9034
RB
3696 hash_set<gimple *> bool_stmts;
3697
71c92d17
JJ
3698 rhs_code = gimple_assign_rhs_code (last_stmt);
3699 if (CONVERT_EXPR_CODE_P (rhs_code))
3700 {
d362ac6c 3701 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
78048b1c 3702 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
3703 return NULL;
3704 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3705 if (vectype == NULL_TREE)
3706 return NULL;
3707
fa2c9034 3708 if (check_bool_pattern (var, vinfo, bool_stmts))
42fd8198 3709 {
fa2c9034 3710 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), last_stmt);
42fd8198
IE
3711 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3712 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3713 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3714 else
3715 pattern_stmt
3716 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3717 }
71c92d17 3718 else
42fd8198
IE
3719 {
3720 tree type = search_type_for_mask (var, vinfo);
a414c77f 3721 tree cst0, cst1, tmp;
42fd8198
IE
3722
3723 if (!type)
3724 return NULL;
3725
3726 /* We may directly use cond with narrowed type to avoid
3727 multiple cond exprs with following result packing and
3728 perform single cond with packed mask instead. In case
3729 of widening we better make cond first and then extract
3730 results. */
3731 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3732 type = TREE_TYPE (lhs);
3733
3734 cst0 = build_int_cst (type, 0);
3735 cst1 = build_int_cst (type, 1);
3736 tmp = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3737 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3738
3739 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3740 {
3741 tree new_vectype = get_vectype_for_scalar_type (type);
776bfcea 3742 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
42fd8198
IE
3743
3744 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3745 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3746 }
3747 }
3748
71c92d17 3749 *type_out = vectype;
49d8df1b 3750 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
f5709183 3751
52264dbf
RB
3752 return pattern_stmt;
3753 }
3754 else if (rhs_code == COND_EXPR
3755 && TREE_CODE (var) == SSA_NAME)
3756 {
3757 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3758 if (vectype == NULL_TREE)
3759 return NULL;
3760
3761 /* Build a scalar type for the boolean result that when
3762 vectorized matches the vector type of the result in
3763 size and number of elements. */
3764 unsigned prec
928686b1
RS
3765 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3766 TYPE_VECTOR_SUBPARTS (vectype));
3767
52264dbf
RB
3768 tree type
3769 = build_nonstandard_integer_type (prec,
3770 TYPE_UNSIGNED (TREE_TYPE (var)));
3771 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3772 return NULL;
3773
fa2c9034 3774 if (!check_bool_pattern (var, vinfo, bool_stmts))
a414c77f
IE
3775 return NULL;
3776
fa2c9034 3777 rhs = adjust_bool_stmts (bool_stmts, type, last_stmt);
52264dbf 3778
52264dbf
RB
3779 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3780 pattern_stmt
a414c77f
IE
3781 = gimple_build_assign (lhs, COND_EXPR,
3782 build2 (NE_EXPR, boolean_type_node,
3783 rhs, build_int_cst (type, 0)),
0d0e4a03
JJ
3784 gimple_assign_rhs2 (last_stmt),
3785 gimple_assign_rhs3 (last_stmt));
52264dbf 3786 *type_out = vectype;
49d8df1b 3787 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
52264dbf 3788
71c92d17
JJ
3789 return pattern_stmt;
3790 }
ab0ef706
JJ
3791 else if (rhs_code == SSA_NAME
3792 && STMT_VINFO_DATA_REF (stmt_vinfo))
3793 {
3794 stmt_vec_info pattern_stmt_info;
3795 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3796 gcc_assert (vectype != NULL_TREE);
78336739
JJ
3797 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3798 return NULL;
ab0ef706 3799
fa2c9034
RB
3800 if (check_bool_pattern (var, vinfo, bool_stmts))
3801 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), last_stmt);
42fd8198
IE
3802 else
3803 {
3804 tree type = search_type_for_mask (var, vinfo);
a414c77f 3805 tree cst0, cst1, new_vectype;
42fd8198
IE
3806
3807 if (!type)
3808 return NULL;
3809
3810 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3811 type = TREE_TYPE (vectype);
3812
3813 cst0 = build_int_cst (type, 0);
3814 cst1 = build_int_cst (type, 1);
3815 new_vectype = get_vectype_for_scalar_type (type);
3816
3817 rhs = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3818 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
776bfcea 3819 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
42fd8198
IE
3820 }
3821
ab0ef706
JJ
3822 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3823 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3824 {
3825 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
355fe088 3826 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
42fd8198 3827 append_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
3828 rhs = rhs2;
3829 }
0d0e4a03 3830 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4fbeb363 3831 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
ab0ef706
JJ
3832 STMT_VINFO_DATA_REF (pattern_stmt_info)
3833 = STMT_VINFO_DATA_REF (stmt_vinfo);
3f5e8a76
RS
3834 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3835 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
ab0ef706 3836 *type_out = vectype;
49d8df1b
RS
3837 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3838
ab0ef706
JJ
3839 return pattern_stmt;
3840 }
71c92d17
JJ
3841 else
3842 return NULL;
3843}
3844
3845
e6f5c25d
IE
3846/* A helper for vect_recog_mask_conversion_pattern. Build
3847 conversion of MASK to a type suitable for masking VECTYPE.
3848 Built statement gets required vectype and is appended to
3849 a pattern sequence of STMT_VINFO.
3850
3851 Return converted mask. */
3852
3853static tree
776bfcea 3854build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
e6f5c25d
IE
3855{
3856 gimple *stmt;
3857 tree masktype, tmp;
e6f5c25d
IE
3858
3859 masktype = build_same_sized_truth_vector_type (vectype);
3860 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3861 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
776bfcea 3862 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
e6f5c25d
IE
3863
3864 return tmp;
3865}
3866
3867
3868/* Function vect_recog_mask_conversion_pattern
3869
3870 Try to find statements which require boolean type
3871 converison. Additional conversion statements are
3872 added to handle such cases. For example:
3873
3874 bool m_1, m_2, m_3;
3875 int i_4, i_5;
3876 double d_6, d_7;
3877 char c_1, c_2, c_3;
3878
3879 S1 m_1 = i_4 > i_5;
3880 S2 m_2 = d_6 < d_7;
3881 S3 m_3 = m_1 & m_2;
3882 S4 c_1 = m_3 ? c_2 : c_3;
3883
3884 Will be transformed into:
3885
3886 S1 m_1 = i_4 > i_5;
3887 S2 m_2 = d_6 < d_7;
3888 S3'' m_2' = (_Bool[bitsize=32])m_2
3889 S3' m_3' = m_1 & m_2';
3890 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3891 S4' c_1' = m_3'' ? c_2 : c_3; */
3892
3893static gimple *
ba9728b0 3894vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
e6f5c25d 3895{
ba9728b0 3896 gimple *last_stmt = stmt_vinfo->stmt;
e6f5c25d 3897 enum tree_code rhs_code;
310aba3b
ML
3898 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3899 tree vectype1, vectype2;
e6f5c25d
IE
3900 stmt_vec_info pattern_stmt_info;
3901 vec_info *vinfo = stmt_vinfo->vinfo;
e6f5c25d
IE
3902
3903 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3904 if (is_gimple_call (last_stmt)
2c58d42c 3905 && gimple_call_internal_p (last_stmt))
e6f5c25d 3906 {
a844293d 3907 gcall *pattern_stmt;
e6f5c25d 3908
2c58d42c
RS
3909 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3910 int mask_argno = internal_fn_mask_index (ifn);
3911 if (mask_argno < 0)
3912 return NULL;
3913
3914 bool store_p = internal_store_fn_p (ifn);
3915 if (store_p)
e6f5c25d 3916 {
2c58d42c
RS
3917 int rhs_index = internal_fn_stored_value_index (ifn);
3918 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3919 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
e6f5c25d
IE
3920 }
3921 else
3922 {
2c58d42c
RS
3923 lhs = gimple_call_lhs (last_stmt);
3924 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
e6f5c25d
IE
3925 }
3926
2c58d42c
RS
3927 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3928 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3929 if (!mask_arg_type)
e6f5c25d 3930 return NULL;
2c58d42c 3931 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
e6f5c25d
IE
3932
3933 if (!vectype1 || !vectype2
928686b1
RS
3934 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3935 TYPE_VECTOR_SUBPARTS (vectype2)))
e6f5c25d
IE
3936 return NULL;
3937
2c58d42c
RS
3938 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3939
3940 auto_vec<tree, 8> args;
3941 unsigned int nargs = gimple_call_num_args (last_stmt);
3942 args.safe_grow (nargs);
3943 for (unsigned int i = 0; i < nargs; ++i)
3944 args[i] = ((int) i == mask_argno
3945 ? tmp
3946 : gimple_call_arg (last_stmt, i));
3947 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
e6f5c25d 3948
2c58d42c 3949 if (!store_p)
e6f5c25d
IE
3950 {
3951 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
e6f5c25d
IE
3952 gimple_call_set_lhs (pattern_stmt, lhs);
3953 }
a844293d 3954 gimple_call_set_nothrow (pattern_stmt, true);
e6f5c25d 3955
4fbeb363 3956 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
2c58d42c
RS
3957 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3958 {
3959 STMT_VINFO_DATA_REF (pattern_stmt_info)
3960 = STMT_VINFO_DATA_REF (stmt_vinfo);
3961 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
3962 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_vinfo);
3963 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
3964 = STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo);
3965 }
e6f5c25d
IE
3966
3967 *type_out = vectype1;
49d8df1b 3968 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
3969
3970 return pattern_stmt;
3971 }
3972
3973 if (!is_gimple_assign (last_stmt))
3974 return NULL;
3975
a844293d 3976 gimple *pattern_stmt;
e6f5c25d
IE
3977 lhs = gimple_assign_lhs (last_stmt);
3978 rhs1 = gimple_assign_rhs1 (last_stmt);
3979 rhs_code = gimple_assign_rhs_code (last_stmt);
3980
3981 /* Check for cond expression requiring mask conversion. */
3982 if (rhs_code == COND_EXPR)
3983 {
e6f5c25d
IE
3984 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3985
3986 if (TREE_CODE (rhs1) == SSA_NAME)
3987 {
3988 rhs1_type = search_type_for_mask (rhs1, vinfo);
3989 if (!rhs1_type)
3990 return NULL;
3991 }
dea60b59 3992 else if (COMPARISON_CLASS_P (rhs1))
6a3c127c
RS
3993 {
3994 /* Check whether we're comparing scalar booleans and (if so)
3995 whether a better mask type exists than the mask associated
3996 with boolean-sized elements. This avoids unnecessary packs
3997 and unpacks if the booleans are set from comparisons of
3998 wider types. E.g. in:
3999
4000 int x1, x2, x3, x4, y1, y1;
4001 ...
4002 bool b1 = (x1 == x2);
4003 bool b2 = (x3 == x4);
4004 ... = b1 == b2 ? y1 : y2;
4005
4006 it is better for b1 and b2 to use the mask type associated
4007 with int elements rather bool (byte) elements. */
4008 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4009 if (!rhs1_type)
4010 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4011 }
dea60b59
JJ
4012 else
4013 return NULL;
e6f5c25d
IE
4014
4015 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4016
6a3c127c
RS
4017 if (!vectype1 || !vectype2)
4018 return NULL;
4019
4020 /* Continue if a conversion is needed. Also continue if we have
4021 a comparison whose vector type would normally be different from
4022 VECTYPE2 when considered in isolation. In that case we'll
4023 replace the comparison with an SSA name (so that we can record
4024 its vector type) and behave as though the comparison was an SSA
4025 name from the outset. */
4026 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4027 TYPE_VECTOR_SUBPARTS (vectype2))
4028 && (TREE_CODE (rhs1) == SSA_NAME
4029 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
e6f5c25d
IE
4030 return NULL;
4031
8da4c8d8
RB
4032 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4033 in place, we can handle it in vectorizable_condition. This avoids
4034 unnecessary promotion stmts and increased vectorization factor. */
4035 if (COMPARISON_CLASS_P (rhs1)
4036 && INTEGRAL_TYPE_P (rhs1_type)
928686b1
RS
4037 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4038 TYPE_VECTOR_SUBPARTS (vectype2)))
8da4c8d8 4039 {
8da4c8d8 4040 enum vect_def_type dt;
894dd753 4041 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
8da4c8d8 4042 && dt == vect_external_def
894dd753 4043 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
8da4c8d8
RB
4044 && (dt == vect_external_def
4045 || dt == vect_constant_def))
4046 {
4047 tree wide_scalar_type = build_nonstandard_integer_type
4048 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4049 TYPE_UNSIGNED (rhs1_type));
4050 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4051 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4052 return NULL;
4053 }
4054 }
4055
e6f5c25d
IE
4056 /* If rhs1 is a comparison we need to move it into a
4057 separate statement. */
4058 if (TREE_CODE (rhs1) != SSA_NAME)
4059 {
4060 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4061 pattern_stmt = gimple_build_assign (tmp, rhs1);
4062 rhs1 = tmp;
776bfcea 4063 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
e6f5c25d
IE
4064 }
4065
6a3c127c
RS
4066 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4067 TYPE_VECTOR_SUBPARTS (vectype2)))
776bfcea 4068 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
6a3c127c
RS
4069 else
4070 tmp = rhs1;
e6f5c25d
IE
4071
4072 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4073 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4074 gimple_assign_rhs2 (last_stmt),
4075 gimple_assign_rhs3 (last_stmt));
4076
4077 *type_out = vectype1;
49d8df1b 4078 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
4079
4080 return pattern_stmt;
4081 }
4082
4083 /* Now check for binary boolean operations requiring conversion for
4084 one of operands. */
2568d8a1 4085 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
e6f5c25d
IE
4086 return NULL;
4087
4088 if (rhs_code != BIT_IOR_EXPR
4089 && rhs_code != BIT_XOR_EXPR
49e76ff1
IE
4090 && rhs_code != BIT_AND_EXPR
4091 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
e6f5c25d
IE
4092 return NULL;
4093
4094 rhs2 = gimple_assign_rhs2 (last_stmt);
4095
4096 rhs1_type = search_type_for_mask (rhs1, vinfo);
4097 rhs2_type = search_type_for_mask (rhs2, vinfo);
4098
4099 if (!rhs1_type || !rhs2_type
4100 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4101 return NULL;
4102
4103 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4104 {
4105 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4106 if (!vectype1)
4107 return NULL;
776bfcea 4108 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
e6f5c25d
IE
4109 }
4110 else
4111 {
4112 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4113 if (!vectype1)
4114 return NULL;
776bfcea 4115 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
e6f5c25d
IE
4116 }
4117
4118 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4119 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4120
4121 *type_out = vectype1;
49d8df1b 4122 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
4123
4124 return pattern_stmt;
4125}
4126
bfaa08b7
RS
4127/* STMT is a load or store. If the load or store is conditional, return
4128 the boolean condition under which it occurs, otherwise return null. */
4129
4130static tree
4131vect_get_load_store_mask (gimple *stmt)
4132{
4133 if (gassign *def_assign = dyn_cast <gassign *> (stmt))
4134 {
4135 gcc_assert (gimple_assign_single_p (def_assign));
4136 return NULL_TREE;
4137 }
4138
4139 if (gcall *def_call = dyn_cast <gcall *> (stmt))
4140 {
4141 internal_fn ifn = gimple_call_internal_fn (def_call);
4142 int mask_index = internal_fn_mask_index (ifn);
4143 return gimple_call_arg (def_call, mask_index);
4144 }
4145
4146 gcc_unreachable ();
4147}
4148
4149/* Return the scalar offset type that an internal gather/scatter function
4150 should use. GS_INFO describes the gather/scatter operation. */
4151
4152static tree
4153vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4154{
4155 tree offset_type = TREE_TYPE (gs_info->offset);
4156 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4157
4158 /* Enforced by vect_check_gather_scatter. */
4159 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4160 gcc_assert (element_bits >= offset_bits);
4161
4162 /* If the offset is narrower than the elements, extend it according
4163 to its sign. */
4164 if (element_bits > offset_bits)
4165 return build_nonstandard_integer_type (element_bits,
4166 TYPE_UNSIGNED (offset_type));
4167
4168 return offset_type;
4169}
4170
4171/* Return MASK if MASK is suitable for masking an operation on vectors
4172 of type VECTYPE, otherwise convert it into such a form and return
4173 the result. Associate any conversion statements with STMT_INFO's
4174 pattern. */
4175
4176static tree
4177vect_convert_mask_for_vectype (tree mask, tree vectype,
4178 stmt_vec_info stmt_info, vec_info *vinfo)
4179{
4180 tree mask_type = search_type_for_mask (mask, vinfo);
4181 if (mask_type)
4182 {
4183 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4184 if (mask_vectype
4185 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4186 TYPE_VECTOR_SUBPARTS (mask_vectype)))
776bfcea 4187 mask = build_mask_conversion (mask, vectype, stmt_info);
bfaa08b7
RS
4188 }
4189 return mask;
4190}
4191
4192/* Return the equivalent of:
4193
4194 fold_convert (TYPE, VALUE)
4195
4196 with the expectation that the operation will be vectorized.
4197 If new statements are needed, add them as pattern statements
4198 to STMT_INFO. */
4199
4200static tree
776bfcea 4201vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
bfaa08b7
RS
4202{
4203 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4204 return value;
4205
4206 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4207 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
776bfcea
RS
4208 append_pattern_def_seq (stmt_info, conversion,
4209 get_vectype_for_scalar_type (type));
bfaa08b7
RS
4210 return new_value;
4211}
4212
ba9728b0 4213/* Try to convert STMT_INFO into a call to a gather load or scatter store
bfaa08b7 4214 internal function. Return the final statement on success and set
1cbfeccc 4215 *TYPE_OUT to the vector type being loaded or stored.
bfaa08b7
RS
4216
4217 This function only handles gathers and scatters that were recognized
4218 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4219
4220static gimple *
ba9728b0 4221vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
bfaa08b7
RS
4222{
4223 /* Currently we only support this for loop vectorization. */
bfaa08b7
RS
4224 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4225 if (!loop_vinfo)
4226 return NULL;
4227
4228 /* Make sure that we're looking at a gather load or scatter store. */
4229 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4230 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4231 return NULL;
4232
bfaa08b7
RS
4233 /* Get the boolean that controls whether the load or store happens.
4234 This is null if the operation is unconditional. */
ba9728b0 4235 gimple *stmt = stmt_info->stmt;
bfaa08b7
RS
4236 tree mask = vect_get_load_store_mask (stmt);
4237
4238 /* Make sure that the target supports an appropriate internal
4239 function for the gather/scatter operation. */
4240 gather_scatter_info gs_info;
4241 if (!vect_check_gather_scatter (stmt, loop_vinfo, &gs_info)
4242 || gs_info.decl)
4243 return NULL;
4244
4245 /* Convert the mask to the right form. */
4246 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4247 if (mask)
ba9728b0 4248 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
bfaa08b7
RS
4249 loop_vinfo);
4250
4251 /* Get the invariant base and non-invariant offset, converting the
4252 latter to the same width as the vector elements. */
4253 tree base = gs_info.base;
4254 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
776bfcea
RS
4255 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4256 stmt_info);
bfaa08b7
RS
4257
4258 /* Build the new pattern statement. */
4259 tree scale = size_int (gs_info.scale);
4260 gcall *pattern_stmt;
4261 if (DR_IS_READ (dr))
4262 {
4263 if (mask != NULL)
4264 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4265 offset, scale, mask);
4266 else
4267 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4268 offset, scale);
4269 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4270 gimple_call_set_lhs (pattern_stmt, load_lhs);
4271 }
4272 else
f307441a
RS
4273 {
4274 tree rhs = vect_get_store_rhs (stmt);
4275 if (mask != NULL)
4276 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4277 base, offset, scale, rhs,
4278 mask);
4279 else
4280 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4281 base, offset, scale, rhs);
4282 }
bfaa08b7
RS
4283 gimple_call_set_nothrow (pattern_stmt, true);
4284
4285 /* Copy across relevant vectorization info and associate DR with the
4286 new pattern statement instead of the original statement. */
4fbeb363 4287 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
bfaa08b7
RS
4288 STMT_VINFO_DATA_REF (pattern_stmt_info) = dr;
4289 STMT_VINFO_DR_WRT_VEC_LOOP (pattern_stmt_info)
4290 = STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
4291 STMT_VINFO_GATHER_SCATTER_P (pattern_stmt_info)
4292 = STMT_VINFO_GATHER_SCATTER_P (stmt_info);
bfaa08b7
RS
4293
4294 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4295 *type_out = vectype;
49d8df1b 4296 vect_pattern_detected ("gather/scatter pattern", stmt);
bfaa08b7
RS
4297
4298 return pattern_stmt;
4299}
4300
370c2ebe
RS
4301/* Return true if TYPE is a non-boolean integer type. These are the types
4302 that we want to consider for narrowing. */
4303
4304static bool
4305vect_narrowable_type_p (tree type)
4306{
4307 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4308}
4309
4310/* Return true if the operation given by CODE can be truncated to N bits
4311 when only N bits of the output are needed. This is only true if bit N+1
4312 of the inputs has no effect on the low N bits of the result. */
4313
4314static bool
4315vect_truncatable_operation_p (tree_code code)
4316{
4317 switch (code)
4318 {
4319 case PLUS_EXPR:
4320 case MINUS_EXPR:
4321 case MULT_EXPR:
4322 case BIT_AND_EXPR:
4323 case BIT_IOR_EXPR:
4324 case BIT_XOR_EXPR:
4325 case COND_EXPR:
4326 return true;
4327
4328 default:
4329 return false;
4330 }
4331}
4332
4333/* Record that STMT_INFO could be changed from operating on TYPE to
4334 operating on a type with the precision and sign given by PRECISION
4335 and SIGN respectively. PRECISION is an arbitrary bit precision;
4336 it might not be a whole number of bytes. */
4337
4338static void
4339vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4340 unsigned int precision, signop sign)
4341{
4342 /* Round the precision up to a whole number of bytes. */
4343 precision = vect_element_precision (precision);
4344 if (precision < TYPE_PRECISION (type)
4345 && (!stmt_info->operation_precision
4346 || stmt_info->operation_precision > precision))
4347 {
4348 stmt_info->operation_precision = precision;
4349 stmt_info->operation_sign = sign;
4350 }
4351}
4352
4353/* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4354 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4355 is an arbitrary bit precision; it might not be a whole number of bytes. */
4356
4357static void
4358vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4359 unsigned int min_input_precision)
4360{
4361 /* This operation in isolation only requires the inputs to have
4362 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4363 that MIN_INPUT_PRECISION is a natural precision for the chain
4364 as a whole. E.g. consider something like:
4365
4366 unsigned short *x, *y;
4367 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4368
4369 The right shift can be done on unsigned chars, and only requires the
4370 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4371 approach would mean turning a natural chain of single-vector unsigned
4372 short operations into one that truncates "*x" and then extends
4373 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4374 operation and one vector for each unsigned char operation.
4375 This would be a significant pessimization.
4376
4377 Instead only propagate the maximum of this precision and the precision
4378 required by the users of the result. This means that we don't pessimize
4379 the case above but continue to optimize things like:
4380
4381 unsigned char *y;
4382 unsigned short *x;
4383 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4384
4385 Here we would truncate two vectors of *x to a single vector of
4386 unsigned chars and use single-vector unsigned char operations for
4387 everything else, rather than doing two unsigned short copies of
4388 "(*x & 0xf0) >> 4" and then truncating the result. */
4389 min_input_precision = MAX (min_input_precision,
4390 stmt_info->min_output_precision);
4391
4392 if (min_input_precision < TYPE_PRECISION (type)
4393 && (!stmt_info->min_input_precision
4394 || stmt_info->min_input_precision > min_input_precision))
4395 stmt_info->min_input_precision = min_input_precision;
4396}
4397
4398/* Subroutine of vect_determine_min_output_precision. Return true if
4399 we can calculate a reduced number of output bits for STMT_INFO,
4400 whose result is LHS. */
4401
4402static bool
4403vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4404{
4405 /* Take the maximum precision required by users of the result. */
6585ff8f 4406 vec_info *vinfo = stmt_info->vinfo;
370c2ebe
RS
4407 unsigned int precision = 0;
4408 imm_use_iterator iter;
4409 use_operand_p use;
4410 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4411 {
4412 gimple *use_stmt = USE_STMT (use);
4413 if (is_gimple_debug (use_stmt))
4414 continue;
6585ff8f
RS
4415 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4416 if (!use_stmt_info || !use_stmt_info->min_input_precision)
370c2ebe
RS
4417 return false;
4418 precision = MAX (precision, use_stmt_info->min_input_precision);
4419 }
4420
4421 if (dump_enabled_p ())
4422 {
4423 dump_printf_loc (MSG_NOTE, vect_location, "only the low %d bits of ",
4424 precision);
4425 dump_generic_expr (MSG_NOTE, TDF_SLIM, lhs);
4426 dump_printf (MSG_NOTE, " are significant\n");
4427 }
4428 stmt_info->min_output_precision = precision;
4429 return true;
4430}
4431
4432/* Calculate min_output_precision for STMT_INFO. */
4433
4434static void
4435vect_determine_min_output_precision (stmt_vec_info stmt_info)
4436{
4437 /* We're only interested in statements with a narrowable result. */
4438 tree lhs = gimple_get_lhs (stmt_info->stmt);
4439 if (!lhs
4440 || TREE_CODE (lhs) != SSA_NAME
4441 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4442 return;
4443
4444 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4445 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4446}
4447
4448/* Use range information to decide whether STMT (described by STMT_INFO)
4449 could be done in a narrower type. This is effectively a forward
4450 propagation, since it uses context-independent information that applies
4451 to all users of an SSA name. */
4452
4453static void
4454vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4455{
4456 tree lhs = gimple_assign_lhs (stmt);
4457 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4458 return;
4459
4460 tree type = TREE_TYPE (lhs);
4461 if (!vect_narrowable_type_p (type))
4462 return;
4463
4464 /* First see whether we have any useful range information for the result. */
4465 unsigned int precision = TYPE_PRECISION (type);
4466 signop sign = TYPE_SIGN (type);
4467 wide_int min_value, max_value;
4468 if (!vect_get_range_info (lhs, &min_value, &max_value))
4469 return;
4470
4471 tree_code code = gimple_assign_rhs_code (stmt);
4472 unsigned int nops = gimple_num_ops (stmt);
4473
4474 if (!vect_truncatable_operation_p (code))
4475 /* Check that all relevant input operands are compatible, and update
4476 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4477 for (unsigned int i = 1; i < nops; ++i)
4478 {
4479 tree op = gimple_op (stmt, i);
4480 if (TREE_CODE (op) == INTEGER_CST)
4481 {
4482 /* Don't require the integer to have RHS_TYPE (which it might
4483 not for things like shift amounts, etc.), but do require it
4484 to fit the type. */
4485 if (!int_fits_type_p (op, type))
4486 return;
4487
4488 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4489 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4490 }
4491 else if (TREE_CODE (op) == SSA_NAME)
4492 {
4493 /* Ignore codes that don't take uniform arguments. */
4494 if (!types_compatible_p (TREE_TYPE (op), type))
4495 return;
4496
4497 wide_int op_min_value, op_max_value;
4498 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4499 return;
4500
4501 min_value = wi::min (min_value, op_min_value, sign);
4502 max_value = wi::max (max_value, op_max_value, sign);
4503 }
4504 else
4505 return;
4506 }
4507
4508 /* Try to switch signed types for unsigned types if we can.
4509 This is better for two reasons. First, unsigned ops tend
4510 to be cheaper than signed ops. Second, it means that we can
4511 handle things like:
4512
4513 signed char c;
4514 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4515
4516 as:
4517
4518 signed char c;
4519 unsigned short res_1 = (unsigned short) c & 0xff00;
4520 int res = (int) res_1;
4521
4522 where the intermediate result res_1 has unsigned rather than
4523 signed type. */
4524 if (sign == SIGNED && !wi::neg_p (min_value))
4525 sign = UNSIGNED;
4526
4527 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4528 unsigned int precision1 = wi::min_precision (min_value, sign);
4529 unsigned int precision2 = wi::min_precision (max_value, sign);
4530 unsigned int value_precision = MAX (precision1, precision2);
4531 if (value_precision >= precision)
4532 return;
4533
4534 if (dump_enabled_p ())
4535 {
4536 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4537 " without loss of precision: ",
4538 sign == SIGNED ? "signed" : "unsigned",
4539 value_precision);
4540 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4541 }
4542
4543 vect_set_operation_type (stmt_info, type, value_precision, sign);
4544 vect_set_min_input_precision (stmt_info, type, value_precision);
4545}
4546
4547/* Use information about the users of STMT's result to decide whether
4548 STMT (described by STMT_INFO) could be done in a narrower type.
4549 This is effectively a backward propagation. */
4550
4551static void
4552vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4553{
4554 tree_code code = gimple_assign_rhs_code (stmt);
4555 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4556 tree type = TREE_TYPE (gimple_op (stmt, opno));
4557 if (!vect_narrowable_type_p (type))
4558 return;
4559
4560 unsigned int precision = TYPE_PRECISION (type);
4561 unsigned int operation_precision, min_input_precision;
4562 switch (code)
4563 {
4564 CASE_CONVERT:
4565 /* Only the bits that contribute to the output matter. Don't change
4566 the precision of the operation itself. */
4567 operation_precision = precision;
4568 min_input_precision = stmt_info->min_output_precision;
4569 break;
4570
4571 case LSHIFT_EXPR:
4572 case RSHIFT_EXPR:
4573 {
4574 tree shift = gimple_assign_rhs2 (stmt);
4575 if (TREE_CODE (shift) != INTEGER_CST
4576 || !wi::ltu_p (wi::to_widest (shift), precision))
4577 return;
4578 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4579 if (code == LSHIFT_EXPR)
4580 {
4581 /* We need CONST_SHIFT fewer bits of the input. */
4582 operation_precision = stmt_info->min_output_precision;
4583 min_input_precision = (MAX (operation_precision, const_shift)
4584 - const_shift);
4585 }
4586 else
4587 {
4588 /* We need CONST_SHIFT extra bits to do the operation. */
4589 operation_precision = (stmt_info->min_output_precision
4590 + const_shift);
4591 min_input_precision = operation_precision;
4592 }
4593 break;
4594 }
4595
4596 default:
4597 if (vect_truncatable_operation_p (code))
4598 {
4599 /* Input bit N has no effect on output bits N-1 and lower. */
4600 operation_precision = stmt_info->min_output_precision;
4601 min_input_precision = operation_precision;
4602 break;
4603 }
4604 return;
4605 }
4606
4607 if (operation_precision < precision)
4608 {
4609 if (dump_enabled_p ())
4610 {
4611 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4612 " without affecting users: ",
4613 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4614 operation_precision);
4615 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
4616 }
4617 vect_set_operation_type (stmt_info, type, operation_precision,
4618 TYPE_SIGN (type));
4619 }
4620 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4621}
4622
4623/* Handle vect_determine_precisions for STMT_INFO, given that we
4624 have already done so for the users of its result. */
4625
4626void
4627vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4628{
4629 vect_determine_min_output_precision (stmt_info);
4630 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4631 {
4632 vect_determine_precisions_from_range (stmt_info, stmt);
4633 vect_determine_precisions_from_users (stmt_info, stmt);
4634 }
4635}
4636
4637/* Walk backwards through the vectorizable region to determine the
4638 values of these fields:
4639
4640 - min_output_precision
4641 - min_input_precision
4642 - operation_precision
4643 - operation_sign. */
4644
4645void
4646vect_determine_precisions (vec_info *vinfo)
4647{
4648 DUMP_VECT_SCOPE ("vect_determine_precisions");
4649
4650 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4651 {
4652 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4653 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4654 unsigned int nbbs = loop->num_nodes;
4655
4656 for (unsigned int i = 0; i < nbbs; i++)
4657 {
4658 basic_block bb = bbs[nbbs - i - 1];
4659 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4660 !gsi_end_p (si); gsi_prev (&si))
6585ff8f
RS
4661 vect_determine_stmt_precisions
4662 (vinfo->lookup_stmt (gsi_stmt (si)));
370c2ebe
RS
4663 }
4664 }
4665 else
4666 {
4667 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4668 gimple_stmt_iterator si = bb_vinfo->region_end;
4669 gimple *stmt;
4670 do
4671 {
4672 if (!gsi_stmt (si))
4673 si = gsi_last_bb (bb_vinfo->bb);
4674 else
4675 gsi_prev (&si);
4676 stmt = gsi_stmt (si);
6585ff8f 4677 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
370c2ebe
RS
4678 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4679 vect_determine_stmt_precisions (stmt_info);
4680 }
4681 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4682 }
4683}
4684
ba9728b0 4685typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
1cbfeccc
RS
4686
4687struct vect_recog_func
4688{
4689 vect_recog_func_ptr fn;
4690 const char *name;
4691};
4692
4693/* Note that ordering matters - the first pattern matching on a stmt is
4694 taken which means usually the more complex one needs to preceed the
4695 less comples onex (widen_sum only after dot_prod or sad for example). */
4696static vect_recog_func vect_vect_recog_func_ptrs[] = {
370c2ebe 4697 { vect_recog_over_widening_pattern, "over_widening" },
0267732b
RS
4698 /* Must come after over_widening, which narrows the shift as much as
4699 possible beforehand. */
4700 { vect_recog_average_pattern, "average" },
370c2ebe 4701 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
1cbfeccc
RS
4702 { vect_recog_widen_mult_pattern, "widen_mult" },
4703 { vect_recog_dot_prod_pattern, "dot_prod" },
4704 { vect_recog_sad_pattern, "sad" },
4705 { vect_recog_widen_sum_pattern, "widen_sum" },
4706 { vect_recog_pow_pattern, "pow" },
4707 { vect_recog_widen_shift_pattern, "widen_shift" },
1cbfeccc
RS
4708 { vect_recog_rotate_pattern, "rotate" },
4709 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4710 { vect_recog_divmod_pattern, "divmod" },
4711 { vect_recog_mult_pattern, "mult" },
4712 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4713 { vect_recog_bool_pattern, "bool" },
4714 /* This must come before mask conversion, and includes the parts
4715 of mask conversion that are needed for gather and scatter
4716 internal functions. */
4717 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4718 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4719};
4720
4721const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4722
1107f3ae
IR
4723/* Mark statements that are involved in a pattern. */
4724
4725static inline void
355fe088 4726vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
1107f3ae
IR
4727 tree pattern_vectype)
4728{
1107f3ae 4729 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
41949de9 4730 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
1107f3ae 4731
41949de9
RS
4732 bool old_pattern_p = is_pattern_stmt_p (orig_stmt_info);
4733 if (old_pattern_p)
ab0ef706 4734 {
41949de9
RS
4735 /* We're replacing a statement in an existing pattern definition
4736 sequence. */
4737 if (dump_enabled_p ())
4738 {
4739 dump_printf_loc (MSG_NOTE, vect_location,
4740 "replacing earlier pattern ");
4741 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, orig_stmt, 0);
4742 }
4743
4744 /* To keep the book-keeping simple, just swap the lhs of the
4745 old and new statements, so that the old one has a valid but
4746 unused lhs. */
4747 tree old_lhs = gimple_get_lhs (orig_stmt);
4748 gimple_set_lhs (orig_stmt, gimple_get_lhs (pattern_stmt));
4749 gimple_set_lhs (pattern_stmt, old_lhs);
4750
4751 if (dump_enabled_p ())
4752 {
4753 dump_printf_loc (MSG_NOTE, vect_location, "with ");
4754 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
4755 }
4756
4757 /* Switch to the statement that ORIG replaces. */
10681ce8 4758 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
41949de9
RS
4759
4760 /* We shouldn't be replacing the main pattern statement. */
4761 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) != orig_stmt);
ab0ef706 4762 }
1107f3ae 4763
41949de9 4764 if (def_seq)
e3947d80
RS
4765 for (gimple_stmt_iterator si = gsi_start (def_seq);
4766 !gsi_end_p (si); gsi_next (&si))
41949de9
RS
4767 vect_init_pattern_stmt (gsi_stmt (si), orig_stmt_info, pattern_vectype);
4768
4769 if (old_pattern_p)
4770 {
4771 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4772
4773 /* Insert all the new pattern statements before the original one. */
4774 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
4775 gimple_stmt_iterator gsi = gsi_for_stmt (orig_stmt, orig_def_seq);
4776 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4777 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
3239dde9
RS
4778
4779 /* Remove the pattern statement that this new pattern replaces. */
4780 gsi_remove (&gsi, false);
41949de9
RS
4781 }
4782 else
4783 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
1107f3ae
IR
4784}
4785
b8698a0f 4786/* Function vect_pattern_recog_1
20f06221
DN
4787
4788 Input:
4789 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4790 computation pattern.
4791 STMT: A stmt from which the pattern search should start.
4792
1cbfeccc
RS
4793 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4794 a sequence of statements that has the same functionality and can be
4795 used to replace STMT. It returns the last statement in the sequence
4796 and adds any earlier statements to STMT's STMT_VINFO_PATTERN_DEF_SEQ.
4797 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4798 statement, having first checked that the target supports the new operation
4799 in that type.
20f06221 4800
b8698a0f 4801 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
4802 for vect_recog_pattern. */
4803
41949de9 4804static void
ba9728b0 4805vect_pattern_recog_1 (vect_recog_func *recog_func, gimple_stmt_iterator si)
20f06221 4806{
355fe088 4807 gimple *stmt = gsi_stmt (si), *pattern_stmt;
383d9c83 4808 stmt_vec_info stmt_info;
383d9c83 4809 loop_vec_info loop_vinfo;
20f06221 4810 tree pattern_vectype;
20f06221 4811
41949de9
RS
4812 /* If this statement has already been replaced with pattern statements,
4813 leave the original statement alone, since the first match wins.
4814 Instead try to match against the definition statements that feed
4815 the main pattern statement. */
4816 stmt_info = vinfo_for_stmt (stmt);
4817 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4818 {
4819 gimple_stmt_iterator gsi;
4820 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4821 !gsi_end_p (gsi); gsi_next (&gsi))
ba9728b0 4822 vect_pattern_recog_1 (recog_func, gsi);
41949de9
RS
4823 return;
4824 }
4825
9c58fb7a 4826 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
ba9728b0 4827 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
726a989a 4828 if (!pattern_stmt)
df0aef6d 4829 {
3239dde9
RS
4830 /* Clear any half-formed pattern definition sequence. */
4831 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
41949de9 4832 return;
df0aef6d 4833 }
b8698a0f 4834
383d9c83 4835 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1cbfeccc 4836 gcc_assert (pattern_vectype);
383d9c83 4837
20f06221 4838 /* Found a vectorizable pattern. */
73fbfcad 4839 if (dump_enabled_p ())
20f06221 4840 {
ccb3ad87 4841 dump_printf_loc (MSG_NOTE, vect_location,
0bee0ef9 4842 "%s pattern recognized: ", recog_func->name);
ccb3ad87 4843 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
20f06221 4844 }
b8698a0f 4845
726a989a 4846 /* Mark the stmts that are involved in the pattern. */
1107f3ae 4847 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 4848
b5aeb3bb
IR
4849 /* Patterns cannot be vectorized using SLP, because they change the order of
4850 computation. */
f5709183 4851 if (loop_vinfo)
b94c2dc1
TV
4852 {
4853 unsigned ix, ix2;
32c91dfc 4854 stmt_vec_info *elem_ptr;
b94c2dc1 4855 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
32c91dfc 4856 elem_ptr, *elem_ptr == stmt_info);
b94c2dc1 4857 }
20f06221
DN
4858}
4859
4860
4861/* Function vect_pattern_recog
4862
4863 Input:
4864 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4865 computation idioms.
4866
9d5e7640
IR
4867 Output - for each computation idiom that is detected we create a new stmt
4868 that provides the same functionality and that can be vectorized. We
20f06221
DN
4869 also record some information in the struct_stmt_info of the relevant
4870 stmts, as explained below:
4871
4872 At the entry to this function we have the following stmts, with the
4873 following initial value in the STMT_VINFO fields:
4874
4875 stmt in_pattern_p related_stmt vec_stmt
4876 S1: a_i = .... - - -
4877 S2: a_2 = ..use(a_i).. - - -
4878 S3: a_1 = ..use(a_2).. - - -
4879 S4: a_0 = ..use(a_1).. - - -
4880 S5: ... = ..use(a_0).. - - -
4881
4882 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
4883 represented by a single stmt. We then:
4884 - create a new stmt S6 equivalent to the pattern (the stmt is not
4885 inserted into the code)
20f06221
DN
4886 - fill in the STMT_VINFO fields as follows:
4887
4888 in_pattern_p related_stmt vec_stmt
b8698a0f 4889 S1: a_i = .... - - -
20f06221
DN
4890 S2: a_2 = ..use(a_i).. - - -
4891 S3: a_1 = ..use(a_2).. - - -
20f06221 4892 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 4893 '---> S6: a_new = .... - S4 -
20f06221
DN
4894 S5: ... = ..use(a_0).. - - -
4895
4896 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 4897 to each other through the RELATED_STMT field).
20f06221
DN
4898
4899 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4900 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4901 remain irrelevant unless used by stmts other than S4.
4902
4903 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 4904 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
4905 a pointer to the new vector stmt VS6 from S6 (as usual).
4906 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
4907
4908 in_pattern_p related_stmt vec_stmt
4909 S1: a_i = .... - - -
4910 S2: a_2 = ..use(a_i).. - - -
4911 S3: a_1 = ..use(a_2).. - - -
4912 > VS6: va_new = .... - - -
20f06221 4913 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 4914 '---> S6: a_new = .... - S4 VS6
20f06221
DN
4915 > VS5: ... = ..vuse(va_new).. - - -
4916 S5: ... = ..use(a_0).. - - -
4917
9d5e7640 4918 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
4919 elsewhere), and we'll end up with:
4920
b8698a0f 4921 VS6: va_new = ....
83197f37
IR
4922 VS5: ... = ..vuse(va_new)..
4923
4924 In case of more than one pattern statements, e.g., widen-mult with
4925 intermediate type:
4926
4927 S1 a_t = ;
4928 S2 a_T = (TYPE) a_t;
4929 '--> S3: a_it = (interm_type) a_t;
4930 S4 prod_T = a_T * CONST;
4931 '--> S5: prod_T' = a_it w* CONST;
4932
4933 there may be other users of a_T outside the pattern. In that case S2 will
4934 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4935 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4936 be recorded in S3. */
20f06221
DN
4937
4938void
310213d4 4939vect_pattern_recog (vec_info *vinfo)
20f06221 4940{
f5709183 4941 struct loop *loop;
772e61e1 4942 basic_block *bbs;
f5709183 4943 unsigned int nbbs;
726a989a 4944 gimple_stmt_iterator si;
20f06221 4945 unsigned int i, j;
20f06221 4946
370c2ebe
RS
4947 vect_determine_precisions (vinfo);
4948
adac3a68 4949 DUMP_VECT_SCOPE ("vect_pattern_recog");
20f06221 4950
310213d4 4951 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
f5709183
IR
4952 {
4953 loop = LOOP_VINFO_LOOP (loop_vinfo);
4954 bbs = LOOP_VINFO_BBS (loop_vinfo);
4955 nbbs = loop->num_nodes;
61d371eb
RB
4956
4957 /* Scan through the loop stmts, applying the pattern recognition
4958 functions starting at each stmt visited: */
4959 for (i = 0; i < nbbs; i++)
4960 {
4961 basic_block bb = bbs[i];
4962 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
41949de9
RS
4963 /* Scan over all generic vect_recog_xxx_pattern functions. */
4964 for (j = 0; j < NUM_PATTERNS; j++)
ba9728b0 4965 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si);
61d371eb 4966 }
f5709183
IR
4967 }
4968 else
4969 {
61d371eb
RB
4970 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4971 for (si = bb_vinfo->region_begin;
4972 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4973 {
df0aef6d 4974 gimple *stmt = gsi_stmt (si);
6585ff8f 4975 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
41949de9 4976 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
61d371eb 4977 continue;
f5709183 4978
61d371eb
RB
4979 /* Scan over all generic vect_recog_xxx_pattern functions. */
4980 for (j = 0; j < NUM_PATTERNS; j++)
ba9728b0 4981 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], si);
61d371eb 4982 }
20f06221
DN
4983 }
4984}