]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
8d9254fc 2 Copyright (C) 2006-2020 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
957060b5 25#include "rtl.h"
20f06221 26#include "tree.h"
c7131fb2 27#include "gimple.h"
c7131fb2 28#include "ssa.h"
957060b5
AM
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
40e23961 33#include "fold-const.h"
d8a2d370 34#include "stor-layout.h"
2fb9a547 35#include "tree-eh.h"
45b0be94 36#include "gimplify.h"
5be5c238 37#include "gimple-iterator.h"
20f06221 38#include "cfgloop.h"
20f06221 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
9b2b7279 41#include "builtins.h"
b4e5bc47 42#include "internal-fn.h"
7a31e5ef 43#include "case-cfn-macros.h"
848bb6fc
JJ
44#include "fold-const-call.h"
45#include "attribs.h"
46#include "cgraph.h"
47#include "omp-simd-clone.h"
8f76f377 48#include "predict.h"
9ff9a0a5
JJ
49#include "tree-vector-builder.h"
50#include "vec-perm-indices.h"
20f06221 51
370c2ebe
RS
52/* Return true if we have a useful VR_RANGE range for VAR, storing it
53 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
54
55static bool
56vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
57{
54994253 58 value_range_kind vr_type = get_range_info (var, min_value, max_value);
370c2ebe
RS
59 wide_int nonzero = get_nonzero_bits (var);
60 signop sgn = TYPE_SIGN (TREE_TYPE (var));
61 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
62 nonzero, sgn) == VR_RANGE)
63 {
64 if (dump_enabled_p ())
65 {
66 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
67 dump_printf (MSG_NOTE, " has range [");
68 dump_hex (MSG_NOTE, *min_value);
69 dump_printf (MSG_NOTE, ", ");
70 dump_hex (MSG_NOTE, *max_value);
71 dump_printf (MSG_NOTE, "]\n");
72 }
73 return true;
74 }
75 else
76 {
77 if (dump_enabled_p ())
78 {
79 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
80 dump_printf (MSG_NOTE, " has no range info\n");
81 }
82 return false;
83 }
84}
85
49d8df1b
RS
86/* Report that we've found an instance of pattern PATTERN in
87 statement STMT. */
88
89static void
90vect_pattern_detected (const char *name, gimple *stmt)
91{
92 if (dump_enabled_p ())
3c2a8ed0 93 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
49d8df1b
RS
94}
95
10681ce8
RS
96/* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
97 return the pattern statement's stmt_vec_info. Set its vector type to
98 VECTYPE if it doesn't have one already. */
41949de9 99
10681ce8 100static stmt_vec_info
41949de9
RS
101vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
102 tree vectype)
103{
6585ff8f
RS
104 vec_info *vinfo = orig_stmt_info->vinfo;
105 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
ddf98a96 106 if (pattern_stmt_info == NULL)
4fbeb363 107 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
41949de9
RS
108 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
109
634e7150 110 pattern_stmt_info->pattern_stmt_p = true;
10681ce8 111 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
41949de9
RS
112 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
113 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
114 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
0c3ea6b3
RS
115 {
116 gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
117 == vect_use_mask_type_p (orig_stmt_info));
118 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
119 pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
120 }
10681ce8 121 return pattern_stmt_info;
41949de9
RS
122}
123
124/* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
125 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
126 have one already. */
127
128static void
129vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
130 tree vectype)
131{
132 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
10681ce8
RS
133 STMT_VINFO_RELATED_STMT (orig_stmt_info)
134 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
41949de9
RS
135}
136
00347934
RS
137/* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
138 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
0c3ea6b3
RS
139 be different from the vector type of the final pattern statement.
140 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
141 from which it was derived. */
00347934 142
083481d8 143static inline void
00347934 144append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
0c3ea6b3
RS
145 tree vectype = NULL_TREE,
146 tree scalar_type_for_mask = NULL_TREE)
083481d8 147{
0c3ea6b3
RS
148 gcc_assert (!scalar_type_for_mask
149 == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
00347934
RS
150 vec_info *vinfo = stmt_info->vinfo;
151 if (vectype)
152 {
4fbeb363 153 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
00347934 154 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
0c3ea6b3
RS
155 if (scalar_type_for_mask)
156 new_stmt_info->mask_precision
157 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
00347934 158 }
a1a6c5b2 159 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
00347934 160 new_stmt);
083481d8
JJ
161}
162
3330053e
RS
163/* The caller wants to perform new operations on vect_external variable
164 VAR, so that the result of the operations would also be vect_external.
165 Return the edge on which the operations can be performed, if one exists.
166 Return null if the operations should instead be treated as part of
167 the pattern that needs them. */
168
169static edge
170vect_get_external_def_edge (vec_info *vinfo, tree var)
171{
172 edge e = NULL;
173 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
174 {
175 e = loop_preheader_edge (loop_vinfo->loop);
176 if (!SSA_NAME_IS_DEFAULT_DEF (var))
177 {
178 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
179 if (bb == NULL
180 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
181 e = NULL;
182 }
183 }
184 return e;
185}
186
1cbfeccc
RS
187/* Return true if the target supports a vector version of CODE,
188 where CODE is known to map to a direct optab. ITYPE specifies
189 the type of (some of) the scalar inputs and OTYPE specifies the
190 type of the scalar result.
191
192 If CODE allows the inputs and outputs to have different type
193 (such as for WIDEN_SUM_EXPR), it is the input mode rather
194 than the output mode that determines the appropriate target pattern.
195 Operand 0 of the target pattern then specifies the mode that the output
196 must have.
197
198 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
199 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
200 is nonnull. */
201
202static bool
7ed54790 203vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
1cbfeccc
RS
204 tree itype, tree *vecotype_out,
205 tree *vecitype_out = NULL)
206{
7ed54790 207 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
1cbfeccc
RS
208 if (!vecitype)
209 return false;
210
7ed54790 211 tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
1cbfeccc
RS
212 if (!vecotype)
213 return false;
214
215 optab optab = optab_for_tree_code (code, vecitype, optab_default);
216 if (!optab)
217 return false;
218
219 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
220 if (icode == CODE_FOR_nothing
221 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
222 return false;
223
224 *vecotype_out = vecotype;
225 if (vecitype_out)
226 *vecitype_out = vecitype;
227 return true;
228}
229
00347934
RS
230/* Round bit precision PRECISION up to a full element. */
231
232static unsigned int
233vect_element_precision (unsigned int precision)
234{
235 precision = 1 << ceil_log2 (precision);
236 return MAX (precision, BITS_PER_UNIT);
237}
238
25927307
RS
239/* If OP is defined by a statement that's being considered for vectorization,
240 return information about that statement, otherwise return NULL. */
241
242static stmt_vec_info
243vect_get_internal_def (vec_info *vinfo, tree op)
244{
c98d0595
RS
245 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
246 if (def_stmt_info
247 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
248 return def_stmt_info;
249 return NULL;
25927307
RS
250}
251
32e8e429 252/* Check whether NAME, an ssa-name used in STMT_VINFO,
79d652a5 253 is a result of a type promotion, such that:
20f06221 254 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
255 If CHECK_SIGN is TRUE, check that either both types are signed or both are
256 unsigned. */
20f06221
DN
257
258static bool
32e8e429 259type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
355fe088 260 tree *orig_type, gimple **def_stmt, bool *promotion)
20f06221 261{
20f06221
DN
262 tree type = TREE_TYPE (name);
263 tree oprnd0;
264 enum vect_def_type dt;
20f06221 265
fef96d8e
RS
266 stmt_vec_info def_stmt_info;
267 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
268 def_stmt))
20f06221
DN
269 return false;
270
8644a673
IR
271 if (dt != vect_internal_def
272 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
273 return false;
274
bc4fb355 275 if (!*def_stmt)
20f06221
DN
276 return false;
277
726a989a 278 if (!is_gimple_assign (*def_stmt))
20f06221
DN
279 return false;
280
bc4fb355 281 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
282 return false;
283
726a989a 284 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 285
bc4fb355
IR
286 *orig_type = TREE_TYPE (oprnd0);
287 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
288 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
289 return false;
290
291 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
292 *promotion = true;
bc4fb355 293 else
79d652a5 294 *promotion = false;
20f06221 295
894dd753 296 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
20f06221
DN
297 return false;
298
20f06221
DN
299 return true;
300}
301
00347934
RS
302/* Holds information about an input operand after some sign changes
303 and type promotions have been peeled away. */
6c1dae73
MS
304class vect_unpromoted_value {
305public:
00347934
RS
306 vect_unpromoted_value ();
307
308 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
309
310 /* The value obtained after peeling away zero or more casts. */
311 tree op;
312
313 /* The type of OP. */
314 tree type;
315
316 /* The definition type of OP. */
317 vect_def_type dt;
318
319 /* If OP is the result of peeling at least one cast, and if the cast
320 of OP itself is a vectorizable statement, CASTER identifies that
321 statement, otherwise it is null. */
322 stmt_vec_info caster;
323};
324
325inline vect_unpromoted_value::vect_unpromoted_value ()
326 : op (NULL_TREE),
327 type (NULL_TREE),
328 dt (vect_uninitialized_def),
329 caster (NULL)
330{
331}
332
333/* Set the operand to OP_IN, its definition type to DT_IN, and the
334 statement that casts it to CASTER_IN. */
335
336inline void
337vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
338 stmt_vec_info caster_in)
339{
340 op = op_in;
341 type = TREE_TYPE (op);
342 dt = dt_in;
343 caster = caster_in;
344}
345
346/* If OP is a vectorizable SSA name, strip a sequence of integer conversions
347 to reach some vectorizable inner operand OP', continuing as long as it
348 is possible to convert OP' back to OP using a possible sign change
349 followed by a possible promotion P. Return this OP', or null if OP is
350 not a vectorizable SSA name. If there is a promotion P, describe its
370c2ebe
RS
351 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
352 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
353 have more than one user.
00347934
RS
354
355 A successful return means that it is possible to go from OP' to OP
356 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
357 whereas the cast from UNPROM to OP might be a promotion, a sign
358 change, or a nop.
359
360 E.g. say we have:
361
362 signed short *ptr = ...;
363 signed short C = *ptr;
364 unsigned short B = (unsigned short) C; // sign change
365 signed int A = (signed int) B; // unsigned promotion
366 ...possible other uses of A...
367 unsigned int OP = (unsigned int) A; // sign change
368
369 In this case it's possible to go directly from C to OP using:
370
371 OP = (unsigned int) (unsigned short) C;
372 +------------+ +--------------+
373 promotion sign change
374
375 so OP' would be C. The input to the promotion is B, so UNPROM
376 would describe B. */
377
378static tree
379vect_look_through_possible_promotion (vec_info *vinfo, tree op,
370c2ebe
RS
380 vect_unpromoted_value *unprom,
381 bool *single_use_p = NULL)
00347934
RS
382{
383 tree res = NULL_TREE;
384 tree op_type = TREE_TYPE (op);
385 unsigned int orig_precision = TYPE_PRECISION (op_type);
d11be094 386 unsigned int min_precision = orig_precision;
00347934
RS
387 stmt_vec_info caster = NULL;
388 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
389 {
390 /* See whether OP is simple enough to vectorize. */
fef96d8e 391 stmt_vec_info def_stmt_info;
00347934
RS
392 gimple *def_stmt;
393 vect_def_type dt;
fef96d8e 394 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
00347934
RS
395 break;
396
397 /* If OP is the input of a demotion, skip over it to see whether
398 OP is itself the result of a promotion. If so, the combined
399 effect of the promotion and the demotion might fit the required
400 pattern, otherwise neither operation fits.
401
402 This copes with cases such as the result of an arithmetic
403 operation being truncated before being stored, and where that
404 arithmetic operation has been recognized as an over-widened one. */
d11be094 405 if (TYPE_PRECISION (op_type) <= min_precision)
00347934
RS
406 {
407 /* Use OP as the UNPROM described above if we haven't yet
408 found a promotion, or if using the new input preserves the
409 sign of the previous promotion. */
410 if (!res
411 || TYPE_PRECISION (unprom->type) == orig_precision
412 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
d11be094
JJ
413 {
414 unprom->set_op (op, dt, caster);
415 min_precision = TYPE_PRECISION (op_type);
416 }
00347934
RS
417 /* Stop if we've already seen a promotion and if this
418 conversion does more than change the sign. */
419 else if (TYPE_PRECISION (op_type)
420 != TYPE_PRECISION (unprom->type))
421 break;
422
423 /* The sequence now extends to OP. */
424 res = op;
425 }
426
427 /* See whether OP is defined by a cast. Record it as CASTER if
428 the cast is potentially vectorizable. */
429 if (!def_stmt)
430 break;
fef96d8e
RS
431 caster = def_stmt_info;
432
433 /* Ignore pattern statements, since we don't link uses for them. */
434 if (caster
435 && single_use_p
436 && !STMT_VINFO_RELATED_STMT (caster)
437 && !has_single_use (res))
438 *single_use_p = false;
439
00347934
RS
440 gassign *assign = dyn_cast <gassign *> (def_stmt);
441 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
442 break;
443
444 /* Continue with the input to the cast. */
445 op = gimple_assign_rhs1 (def_stmt);
446 op_type = TREE_TYPE (op);
447 }
448 return res;
449}
450
451/* OP is an integer operand to an operation that returns TYPE, and we
452 want to treat the operation as a widening one. So far we can treat
453 it as widening from *COMMON_TYPE.
454
455 Return true if OP is suitable for such a widening operation,
456 either widening from *COMMON_TYPE or from some supertype of it.
457 Update *COMMON_TYPE to the supertype in the latter case.
458
459 SHIFT_P is true if OP is a shift amount. */
460
461static bool
462vect_joust_widened_integer (tree type, bool shift_p, tree op,
463 tree *common_type)
464{
465 /* Calculate the minimum precision required by OP, without changing
466 the sign of either operand. */
467 unsigned int precision;
468 if (shift_p)
469 {
470 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
471 return false;
472 precision = TREE_INT_CST_LOW (op);
473 }
474 else
475 {
476 precision = wi::min_precision (wi::to_widest (op),
477 TYPE_SIGN (*common_type));
478 if (precision * 2 > TYPE_PRECISION (type))
479 return false;
480 }
481
482 /* If OP requires a wider type, switch to that type. The checks
483 above ensure that this is still narrower than the result. */
484 precision = vect_element_precision (precision);
485 if (TYPE_PRECISION (*common_type) < precision)
486 *common_type = build_nonstandard_integer_type
487 (precision, TYPE_UNSIGNED (*common_type));
488 return true;
489}
490
491/* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
492 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
493
494static bool
495vect_joust_widened_type (tree type, tree new_type, tree *common_type)
496{
497 if (types_compatible_p (*common_type, new_type))
498 return true;
499
500 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
501 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
502 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
503 return true;
504
505 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
506 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
507 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
508 {
509 *common_type = new_type;
510 return true;
511 }
512
513 /* We have mismatched signs, with the signed type being
514 no wider than the unsigned type. In this case we need
515 a wider signed type. */
516 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
517 TYPE_PRECISION (new_type));
518 precision *= 2;
519 if (precision * 2 > TYPE_PRECISION (type))
520 return false;
521
522 *common_type = build_nonstandard_integer_type (precision, false);
523 return true;
524}
525
526/* Check whether STMT_INFO can be viewed as a tree of integer operations
527 in which each node either performs CODE or WIDENED_CODE, and where
528 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
529 specifies the maximum number of leaf operands. SHIFT_P says whether
530 CODE and WIDENED_CODE are some sort of shift.
531
532 If STMT_INFO is such a tree, return the number of leaf operands
533 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
534 to a type that (a) is narrower than the result of STMT_INFO and
535 (b) can hold all leaf operand values.
536
537 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
538 exists. */
539
540static unsigned int
541vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
542 tree_code widened_code, bool shift_p,
543 unsigned int max_nops,
544 vect_unpromoted_value *unprom, tree *common_type)
545{
546 /* Check for an integer operation with the right code. */
c98d0595 547 vec_info *vinfo = stmt_info->vinfo;
00347934
RS
548 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
549 if (!assign)
550 return 0;
551
552 tree_code rhs_code = gimple_assign_rhs_code (assign);
553 if (rhs_code != code && rhs_code != widened_code)
554 return 0;
555
556 tree type = gimple_expr_type (assign);
557 if (!INTEGRAL_TYPE_P (type))
558 return 0;
559
560 /* Assume that both operands will be leaf operands. */
561 max_nops -= 2;
562
563 /* Check the operands. */
564 unsigned int next_op = 0;
565 for (unsigned int i = 0; i < 2; ++i)
566 {
567 vect_unpromoted_value *this_unprom = &unprom[next_op];
568 unsigned int nops = 1;
569 tree op = gimple_op (assign, i + 1);
570 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
571 {
572 /* We already have a common type from earlier operands.
573 Update it to account for OP. */
574 this_unprom->set_op (op, vect_constant_def);
575 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
576 return 0;
577 }
578 else
579 {
580 /* Only allow shifts by constants. */
581 if (shift_p && i == 1)
582 return 0;
583
584 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
585 this_unprom))
586 return 0;
587
588 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
589 {
590 /* The operand isn't widened. If STMT_INFO has the code
591 for an unwidened operation, recursively check whether
592 this operand is a node of the tree. */
593 if (rhs_code != code
594 || max_nops == 0
595 || this_unprom->dt != vect_internal_def)
596 return 0;
597
598 /* Give back the leaf slot allocated above now that we're
599 not treating this as a leaf operand. */
600 max_nops += 1;
601
602 /* Recursively process the definition of the operand. */
603 stmt_vec_info def_stmt_info
c98d0595 604 = vinfo->lookup_def (this_unprom->op);
00347934
RS
605 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
606 shift_p, max_nops, this_unprom,
607 common_type);
608 if (nops == 0)
609 return 0;
610
611 max_nops -= nops;
612 }
613 else
614 {
615 /* Make sure that the operand is narrower than the result. */
616 if (TYPE_PRECISION (this_unprom->type) * 2
617 > TYPE_PRECISION (type))
618 return 0;
619
620 /* Update COMMON_TYPE for the new operand. */
621 if (i == 0)
622 *common_type = this_unprom->type;
623 else if (!vect_joust_widened_type (type, this_unprom->type,
624 common_type))
625 return 0;
626 }
627 }
628 next_op += nops;
629 }
630 return next_op;
631}
632
726a989a
RB
633/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
634 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
635
636static tree
355fe088 637vect_recog_temp_ssa_var (tree type, gimple *stmt)
726a989a 638{
83d5977e 639 return make_temp_ssa_name (type, stmt, "patt");
726a989a 640}
20f06221 641
4ef79c96
RS
642/* STMT2_INFO describes a type conversion that could be split into STMT1
643 followed by a version of STMT2_INFO that takes NEW_RHS as its first
644 input. Try to do this using pattern statements, returning true on
645 success. */
646
647static bool
648vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
649 gimple *stmt1, tree vectype)
650{
7ed54790 651 vec_info *vinfo = stmt2_info->vinfo;
4ef79c96
RS
652 if (is_pattern_stmt_p (stmt2_info))
653 {
654 /* STMT2_INFO is part of a pattern. Get the statement to which
655 the pattern is attached. */
10681ce8 656 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
4ef79c96
RS
657 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
658
659 if (dump_enabled_p ())
3c2a8ed0
DM
660 dump_printf_loc (MSG_NOTE, vect_location,
661 "Splitting pattern statement: %G", stmt2_info->stmt);
4ef79c96
RS
662
663 /* Since STMT2_INFO is a pattern statement, we can change it
664 in-situ without worrying about changing the code for the
665 containing block. */
666 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
667
668 if (dump_enabled_p ())
669 {
3c2a8ed0
DM
670 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
671 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
672 stmt2_info->stmt);
4ef79c96
RS
673 }
674
675 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
10681ce8 676 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
4ef79c96
RS
677 /* STMT2_INFO is the actual pattern statement. Add STMT1
678 to the end of the definition sequence. */
679 gimple_seq_add_stmt_without_update (def_seq, stmt1);
680 else
681 {
682 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
683 before it. */
684 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
685 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
686 }
687 return true;
688 }
689 else
690 {
691 /* STMT2_INFO doesn't yet have a pattern. Try to create a
692 two-statement pattern now. */
693 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
694 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
7ed54790 695 tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
4ef79c96
RS
696 if (!lhs_vectype)
697 return false;
698
699 if (dump_enabled_p ())
3c2a8ed0
DM
700 dump_printf_loc (MSG_NOTE, vect_location,
701 "Splitting statement: %G", stmt2_info->stmt);
4ef79c96
RS
702
703 /* Add STMT1 as a singleton pattern definition sequence. */
704 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
705 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
706 gimple_seq_add_stmt_without_update (def_seq, stmt1);
707
708 /* Build the second of the two pattern statements. */
709 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
710 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
711 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
712
713 if (dump_enabled_p ())
714 {
715 dump_printf_loc (MSG_NOTE, vect_location,
3c2a8ed0
DM
716 "into pattern statements: %G", stmt1);
717 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
4ef79c96
RS
718 }
719
720 return true;
721 }
722}
723
00347934
RS
724/* Convert UNPROM to TYPE and return the result, adding new statements
725 to STMT_INFO's pattern definition statements if no better way is
726 available. VECTYPE is the vector form of TYPE. */
727
728static tree
729vect_convert_input (stmt_vec_info stmt_info, tree type,
730 vect_unpromoted_value *unprom, tree vectype)
731{
7ed54790
RS
732 vec_info *vinfo = stmt_info->vinfo;
733
00347934
RS
734 /* Check for a no-op conversion. */
735 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
736 return unprom->op;
737
738 /* Allow the caller to create constant vect_unpromoted_values. */
739 if (TREE_CODE (unprom->op) == INTEGER_CST)
740 return wide_int_to_tree (type, wi::to_widest (unprom->op));
741
a1ff7f14 742 tree input = unprom->op;
00347934
RS
743 if (unprom->caster)
744 {
745 tree lhs = gimple_get_lhs (unprom->caster->stmt);
a1ff7f14
RS
746 tree lhs_type = TREE_TYPE (lhs);
747
748 /* If the result of the existing cast is the right width, use it
749 instead of the source of the cast. */
750 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
751 input = lhs;
752 /* If the precision we want is between the source and result
753 precisions of the existing cast, try splitting the cast into
754 two and tapping into a mid-way point. */
755 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
756 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
757 {
758 /* In order to preserve the semantics of the original cast,
759 give the mid-way point the same signedness as the input value.
760
761 It would be possible to use a signed type here instead if
762 TYPE is signed and UNPROM->TYPE is unsigned, but that would
763 make the sign of the midtype sensitive to the order in
764 which we process the statements, since the signedness of
765 TYPE is the signedness required by just one of possibly
766 many users. Also, unsigned promotions are usually as cheap
767 as or cheaper than signed ones, so it's better to keep an
768 unsigned promotion. */
769 tree midtype = build_nonstandard_integer_type
770 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
7ed54790 771 tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
a1ff7f14
RS
772 if (vec_midtype)
773 {
774 input = vect_recog_temp_ssa_var (midtype, NULL);
775 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
776 unprom->op);
777 if (!vect_split_statement (unprom->caster, input, new_stmt,
778 vec_midtype))
779 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
780 }
781 }
782
783 /* See if we can reuse an existing result. */
784 if (types_compatible_p (type, TREE_TYPE (input)))
785 return input;
00347934
RS
786 }
787
788 /* We need a new conversion statement. */
789 tree new_op = vect_recog_temp_ssa_var (type, NULL);
a1ff7f14 790 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
4ef79c96 791
3330053e
RS
792 /* If OP is an external value, see if we can insert the new statement
793 on an incoming edge. */
a1ff7f14
RS
794 if (input == unprom->op && unprom->dt == vect_external_def)
795 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
3330053e
RS
796 {
797 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
798 gcc_assert (!new_bb);
799 return new_op;
800 }
801
00347934
RS
802 /* As a (common) last resort, add the statement to the pattern itself. */
803 append_pattern_def_seq (stmt_info, new_stmt, vectype);
804 return new_op;
805}
806
807/* Invoke vect_convert_input for N elements of UNPROM and store the
808 result in the corresponding elements of RESULT. */
809
810static void
811vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
812 tree *result, tree type, vect_unpromoted_value *unprom,
813 tree vectype)
814{
815 for (unsigned int i = 0; i < n; ++i)
816 {
817 unsigned int j;
818 for (j = 0; j < i; ++j)
819 if (unprom[j].op == unprom[i].op)
820 break;
821 if (j < i)
822 result[i] = result[j];
823 else
824 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
825 }
826}
827
828/* The caller has created a (possibly empty) sequence of pattern definition
829 statements followed by a single statement PATTERN_STMT. Cast the result
830 of this final statement to TYPE. If a new statement is needed, add
831 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
832 and return the new statement, otherwise return PATTERN_STMT as-is.
833 VECITYPE is the vector form of PATTERN_STMT's result type. */
834
835static gimple *
836vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
837 tree vecitype)
838{
839 tree lhs = gimple_get_lhs (pattern_stmt);
840 if (!types_compatible_p (type, TREE_TYPE (lhs)))
841 {
842 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
843 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
844 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
845 }
846 return pattern_stmt;
847}
848
97e52238 849/* Return true if STMT_VINFO describes a reduction for which reassociation
d99dcb77
RS
850 is allowed. If STMT_INFO is part of a group, assume that it's part of
851 a reduction chain and optimistically assume that all statements
aa9dffac
RB
852 except the last allow reassociation.
853 Also require it to have code CODE and to be a reduction
7b98e98a
RS
854 in the outermost loop. When returning true, store the operands in
855 *OP0_OUT and *OP1_OUT. */
856
857static bool
858vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
859 tree *op0_out, tree *op1_out)
860{
861 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
862 if (!loop_info)
863 return false;
864
865 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
866 if (!assign || gimple_assign_rhs_code (assign) != code)
867 return false;
868
869 /* We don't allow changing the order of the computation in the inner-loop
870 when doing outer-loop vectorization. */
99b1c316 871 class loop *loop = LOOP_VINFO_LOOP (loop_info);
86a91c0a 872 if (loop && nested_in_vect_loop_p (loop, stmt_info))
7b98e98a
RS
873 return false;
874
aa9dffac
RB
875 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
876 {
877 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
878 code))
879 return false;
880 }
881 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
7b98e98a
RS
882 return false;
883
884 *op0_out = gimple_assign_rhs1 (assign);
885 *op1_out = gimple_assign_rhs2 (assign);
d63eadac
RB
886 if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
887 std::swap (*op0_out, *op1_out);
7b98e98a
RS
888 return true;
889}
890
20f06221
DN
891/* Function vect_recog_dot_prod_pattern
892
893 Try to find the following pattern:
894
895 type x_t, y_t;
896 TYPE1 prod;
897 TYPE2 sum = init;
898 loop:
899 sum_0 = phi <init, sum_1>
900 S1 x_t = ...
901 S2 y_t = ...
902 S3 x_T = (TYPE1) x_t;
903 S4 y_T = (TYPE1) y_t;
904 S5 prod = x_T * y_T;
905 [S6 prod = (TYPE2) prod; #optional]
906 S7 sum_1 = prod + sum_0;
907
b8698a0f
L
908 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
909 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 910 computation.
b8698a0f 911
20f06221
DN
912 Input:
913
ba9728b0 914 * STMT_VINFO: The stmt from which the pattern search begins. In the
51312233
IR
915 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
916 will be detected.
20f06221
DN
917
918 Output:
919
20f06221
DN
920 * TYPE_OUT: The type of the output of this pattern.
921
922 * Return value: A new stmt that will be used to replace the sequence of
923 stmts that constitute the pattern. In this case it will be:
924 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
925
926 Note: The dot-prod idiom is a widening reduction pattern that is
927 vectorized without preserving all the intermediate results. It
928 produces only N/2 (widened) results (by summing up pairs of
929 intermediate results) rather than all N results. Therefore, we
930 cannot allow this pattern when we want to get all the results and in
931 the correct order (as is the case when this computation is in an
932 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 933
355fe088 934static gimple *
ba9728b0 935vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
20f06221 936{
20f06221 937 tree oprnd0, oprnd1;
ba9728b0 938 gimple *last_stmt = stmt_vinfo->stmt;
25927307 939 vec_info *vinfo = stmt_vinfo->vinfo;
20f06221 940 tree type, half_type;
355fe088 941 gimple *pattern_stmt;
f471fe72 942 tree var;
20f06221 943
b8698a0f 944 /* Look for the following pattern
20f06221
DN
945 DX = (TYPE1) X;
946 DY = (TYPE1) Y;
b8698a0f 947 DPROD = DX * DY;
20f06221
DN
948 DDPROD = (TYPE2) DPROD;
949 sum_1 = DDPROD + sum_0;
b8698a0f 950 In which
20f06221
DN
951 - DX is double the size of X
952 - DY is double the size of Y
953 - DX, DY, DPROD all have the same type
954 - sum is the same size of DPROD or bigger
955 - sum has been recognized as a reduction variable.
956
957 This is equivalent to:
958 DPROD = X w* Y; #widen mult
959 sum_1 = DPROD w+ sum_0; #widen summation
960 or
961 DPROD = X w* Y; #widen mult
962 sum_1 = DPROD + sum_0; #summation
963 */
964
965 /* Starting from LAST_STMT, follow the defs of its uses in search
966 of the above pattern. */
967
7b98e98a
RS
968 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
969 &oprnd0, &oprnd1))
1f786170
RS
970 return NULL;
971
7b98e98a 972 type = gimple_expr_type (last_stmt);
1f786170 973
00347934
RS
974 vect_unpromoted_value unprom_mult;
975 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
20f06221 976
51312233 977 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
978 we know that oprnd1 is the reduction variable (defined by a loop-header
979 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
980 Left to check that oprnd0 is defined by a (widen_)mult_expr */
00347934 981 if (!oprnd0)
ba02d3bc 982 return NULL;
20f06221 983
25927307
RS
984 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
985 if (!mult_vinfo)
3cb35c12
CF
986 return NULL;
987
b8698a0f 988 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 989 inside the loop (in case we are analyzing an outer-loop). */
00347934
RS
990 vect_unpromoted_value unprom0[2];
991 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
992 false, 2, unprom0, &half_type))
b8698a0f 993 return NULL;
0f8c840c 994
00347934
RS
995 /* If there are two widening operations, make sure they agree on
996 the sign of the extension. */
997 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
998 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
999 return NULL;
20f06221 1000
49d8df1b
RS
1001 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
1002
00347934 1003 tree half_vectype;
dcab2a0d 1004 if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
00347934 1005 type_out, &half_vectype))
1cbfeccc 1006 return NULL;
b8698a0f 1007
00347934 1008 /* Get the inputs in the appropriate types. */
00347934
RS
1009 tree mult_oprnd[2];
1010 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
1011 unprom0, half_vectype);
1012
726a989a 1013 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1014 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
00347934 1015 mult_oprnd[0], mult_oprnd[1], oprnd1);
b8698a0f 1016
726a989a 1017 return pattern_stmt;
20f06221 1018}
b8698a0f 1019
51312233 1020
79d652a5
CH
1021/* Function vect_recog_sad_pattern
1022
1023 Try to find the following Sum of Absolute Difference (SAD) pattern:
1024
1025 type x_t, y_t;
1026 signed TYPE1 diff, abs_diff;
1027 TYPE2 sum = init;
1028 loop:
1029 sum_0 = phi <init, sum_1>
1030 S1 x_t = ...
1031 S2 y_t = ...
1032 S3 x_T = (TYPE1) x_t;
1033 S4 y_T = (TYPE1) y_t;
1034 S5 diff = x_T - y_T;
1035 S6 abs_diff = ABS_EXPR <diff>;
1036 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1037 S8 sum_1 = abs_diff + sum_0;
1038
1039 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1040 same size of 'TYPE1' or bigger. This is a special case of a reduction
1041 computation.
1042
1043 Input:
1044
ba9728b0 1045 * STMT_VINFO: The stmt from which the pattern search begins. In the
79d652a5
CH
1046 example, when this function is called with S8, the pattern
1047 {S3,S4,S5,S6,S7,S8} will be detected.
1048
1049 Output:
1050
79d652a5
CH
1051 * TYPE_OUT: The type of the output of this pattern.
1052
1053 * Return value: A new stmt that will be used to replace the sequence of
1054 stmts that constitute the pattern. In this case it will be:
1055 SAD_EXPR <x_t, y_t, sum_0>
1056 */
1057
355fe088 1058static gimple *
ba9728b0 1059vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
79d652a5 1060{
ba9728b0 1061 gimple *last_stmt = stmt_vinfo->stmt;
25927307 1062 vec_info *vinfo = stmt_vinfo->vinfo;
79d652a5 1063 tree half_type;
79d652a5 1064
79d652a5
CH
1065 /* Look for the following pattern
1066 DX = (TYPE1) X;
1067 DY = (TYPE1) Y;
1068 DDIFF = DX - DY;
1069 DAD = ABS_EXPR <DDIFF>;
1070 DDPROD = (TYPE2) DPROD;
1071 sum_1 = DAD + sum_0;
1072 In which
1073 - DX is at least double the size of X
1074 - DY is at least double the size of Y
1075 - DX, DY, DDIFF, DAD all have the same type
1076 - sum is the same size of DAD or bigger
1077 - sum has been recognized as a reduction variable.
1078
1079 This is equivalent to:
1080 DDIFF = X w- Y; #widen sub
1081 DAD = ABS_EXPR <DDIFF>;
1082 sum_1 = DAD w+ sum_0; #widen summation
1083 or
1084 DDIFF = X w- Y; #widen sub
1085 DAD = ABS_EXPR <DDIFF>;
1086 sum_1 = DAD + sum_0; #summation
1087 */
1088
1089 /* Starting from LAST_STMT, follow the defs of its uses in search
1090 of the above pattern. */
1091
7b98e98a
RS
1092 tree plus_oprnd0, plus_oprnd1;
1093 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1094 &plus_oprnd0, &plus_oprnd1))
1f786170 1095 return NULL;
79d652a5 1096
7b98e98a 1097 tree sum_type = gimple_expr_type (last_stmt);
1f786170 1098
00347934
RS
1099 /* Any non-truncating sequence of conversions is OK here, since
1100 with a successful match, the result of the ABS(U) is known to fit
1101 within the nonnegative range of the result type. (It cannot be the
1102 negative of the minimum signed value due to the range of the widening
1103 MINUS_EXPR.) */
1104 vect_unpromoted_value unprom_abs;
1105 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1106 &unprom_abs);
79d652a5
CH
1107
1108 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1109 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1110 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1111 Then check that plus_oprnd0 is defined by an abs_expr. */
1112
00347934 1113 if (!plus_oprnd0)
79d652a5
CH
1114 return NULL;
1115
25927307
RS
1116 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1117 if (!abs_stmt_vinfo)
79d652a5
CH
1118 return NULL;
1119
1120 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1121 inside the loop (in case we are analyzing an outer-loop). */
25927307
RS
1122 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1123 if (!abs_stmt
1124 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1125 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
79d652a5
CH
1126 return NULL;
1127
1128 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
00347934 1129 tree abs_type = TREE_TYPE (abs_oprnd);
79d652a5
CH
1130 if (TYPE_UNSIGNED (abs_type))
1131 return NULL;
1132
00347934
RS
1133 /* Peel off conversions from the ABS input. This can involve sign
1134 changes (e.g. from an unsigned subtraction to a signed ABS input)
1135 or signed promotion, but it can't include unsigned promotion.
1136 (Note that ABS of an unsigned promotion should have been folded
1137 away before now anyway.) */
1138 vect_unpromoted_value unprom_diff;
1139 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1140 &unprom_diff);
1141 if (!abs_oprnd)
1142 return NULL;
1143 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1144 && TYPE_UNSIGNED (unprom_diff.type))
79d652a5
CH
1145 return NULL;
1146
00347934 1147 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
25927307
RS
1148 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1149 if (!diff_stmt_vinfo)
79d652a5
CH
1150 return NULL;
1151
1152 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1153 inside the loop (in case we are analyzing an outer-loop). */
00347934
RS
1154 vect_unpromoted_value unprom[2];
1155 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1156 false, 2, unprom, &half_type))
79d652a5
CH
1157 return NULL;
1158
49d8df1b
RS
1159 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1160
00347934 1161 tree half_vectype;
dcab2a0d 1162 if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
00347934 1163 type_out, &half_vectype))
1cbfeccc 1164 return NULL;
79d652a5 1165
00347934 1166 /* Get the inputs to the SAD_EXPR in the appropriate types. */
00347934
RS
1167 tree sad_oprnd[2];
1168 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1169 unprom, half_vectype);
1170
79d652a5 1171 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
00347934
RS
1172 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1173 sad_oprnd[1], plus_oprnd1);
79d652a5 1174
79d652a5
CH
1175 return pattern_stmt;
1176}
1177
00347934
RS
1178/* Recognize an operation that performs ORIG_CODE on widened inputs,
1179 so that it can be treated as though it had the form:
79d652a5 1180
00347934
RS
1181 A_TYPE a;
1182 B_TYPE b;
1183 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1184 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1185 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1186 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1187 | RES_TYPE res = a_extend ORIG_CODE b_extend;
36ba4aae 1188
00347934 1189 Try to replace the pattern with:
51312233 1190
00347934
RS
1191 A_TYPE a;
1192 B_TYPE b;
1193 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1194 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1195 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1196 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
51312233 1197
00347934 1198 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
51312233 1199
00347934
RS
1200 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1201 name of the pattern being matched, for dump purposes. */
20f06221 1202
355fe088 1203static gimple *
ba9728b0 1204vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
00347934
RS
1205 tree_code orig_code, tree_code wide_code,
1206 bool shift_p, const char *name)
20f06221 1207{
7ed54790 1208 vec_info *vinfo = last_stmt_info->vinfo;
ba9728b0 1209 gimple *last_stmt = last_stmt_info->stmt;
89d67cca 1210
00347934
RS
1211 vect_unpromoted_value unprom[2];
1212 tree half_type;
1213 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1214 shift_p, 2, unprom, &half_type))
89d67cca
DN
1215 return NULL;
1216
00347934
RS
1217 /* Pattern detected. */
1218 vect_pattern_detected (name, last_stmt);
89d67cca 1219
00347934 1220 tree type = gimple_expr_type (last_stmt);
d367387c 1221 tree itype = type;
00347934
RS
1222 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1223 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1224 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1225 TYPE_UNSIGNED (half_type));
89d67cca
DN
1226
1227 /* Check target support */
7ed54790
RS
1228 tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
1229 tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
00347934
RS
1230 enum tree_code dummy_code;
1231 int dummy_int;
1232 auto_vec<tree> dummy_vec;
03d3e953 1233 if (!vectype
d367387c 1234 || !vecitype
86a91c0a 1235 || !supportable_widening_operation (wide_code, last_stmt_info,
d367387c 1236 vecitype, vectype,
a86ec597
RH
1237 &dummy_code, &dummy_code,
1238 &dummy_int, &dummy_vec))
89d67cca
DN
1239 return NULL;
1240
7ed54790 1241 *type_out = get_vectype_for_scalar_type (vinfo, type);
1cbfeccc
RS
1242 if (!*type_out)
1243 return NULL;
89d67cca 1244
00347934
RS
1245 tree oprnd[2];
1246 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
d367387c 1247
00347934
RS
1248 tree var = vect_recog_temp_ssa_var (itype, NULL);
1249 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1250 oprnd[0], oprnd[1]);
d367387c 1251
00347934 1252 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
20f06221
DN
1253}
1254
00347934
RS
1255/* Try to detect multiplication on widened inputs, converting MULT_EXPR
1256 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1257
1258static gimple *
ba9728b0 1259vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
00347934 1260{
ba9728b0 1261 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
00347934
RS
1262 WIDEN_MULT_EXPR, false,
1263 "vect_recog_widen_mult_pattern");
1264}
20f06221 1265
0b2229b0
RG
1266/* Function vect_recog_pow_pattern
1267
1268 Try to find the following pattern:
1269
1270 x = POW (y, N);
1271
1272 with POW being one of pow, powf, powi, powif and N being
1273 either 2 or 0.5.
1274
1275 Input:
1276
ba9728b0 1277 * STMT_VINFO: The stmt from which the pattern search begins.
0b2229b0
RG
1278
1279 Output:
1280
0b2229b0
RG
1281 * TYPE_OUT: The type of the output of this pattern.
1282
1283 * Return value: A new stmt that will be used to replace the sequence of
1284 stmts that constitute the pattern. In this case it will be:
726a989a 1285 x = x * x
0b2229b0 1286 or
726a989a 1287 x = sqrt (x)
0b2229b0
RG
1288*/
1289
355fe088 1290static gimple *
ba9728b0 1291vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
0b2229b0 1292{
dcab2a0d 1293 vec_info *vinfo = stmt_vinfo->vinfo;
ba9728b0 1294 gimple *last_stmt = stmt_vinfo->stmt;
848bb6fc 1295 tree base, exp;
355fe088 1296 gimple *stmt;
726a989a 1297 tree var;
0b2229b0 1298
51312233 1299 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1300 return NULL;
1301
7a31e5ef 1302 switch (gimple_call_combined_fn (last_stmt))
0b2229b0 1303 {
7a31e5ef
RS
1304 CASE_CFN_POW:
1305 CASE_CFN_POWI:
0b2229b0
RG
1306 break;
1307
726a989a
RB
1308 default:
1309 return NULL;
0b2229b0
RG
1310 }
1311
848bb6fc
JJ
1312 base = gimple_call_arg (last_stmt, 0);
1313 exp = gimple_call_arg (last_stmt, 1);
1314 if (TREE_CODE (exp) != REAL_CST
1315 && TREE_CODE (exp) != INTEGER_CST)
1316 {
1317 if (flag_unsafe_math_optimizations
1318 && TREE_CODE (base) == REAL_CST
cb1180d5 1319 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
848bb6fc
JJ
1320 {
1321 combined_fn log_cfn;
1322 built_in_function exp_bfn;
1323 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1324 {
1325 case BUILT_IN_POW:
1326 log_cfn = CFN_BUILT_IN_LOG;
1327 exp_bfn = BUILT_IN_EXP;
1328 break;
1329 case BUILT_IN_POWF:
1330 log_cfn = CFN_BUILT_IN_LOGF;
1331 exp_bfn = BUILT_IN_EXPF;
1332 break;
1333 case BUILT_IN_POWL:
1334 log_cfn = CFN_BUILT_IN_LOGL;
1335 exp_bfn = BUILT_IN_EXPL;
1336 break;
1337 default:
1338 return NULL;
1339 }
1340 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1341 tree exp_decl = builtin_decl_implicit (exp_bfn);
1342 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1343 does that, but if C is a power of 2, we want to use
1344 exp2 (log2 (C) * x) in the non-vectorized version, but for
1345 vectorization we don't have vectorized exp2. */
1346 if (logc
1347 && TREE_CODE (logc) == REAL_CST
1348 && exp_decl
1349 && lookup_attribute ("omp declare simd",
1350 DECL_ATTRIBUTES (exp_decl)))
1351 {
1352 cgraph_node *node = cgraph_node::get_create (exp_decl);
1353 if (node->simd_clones == NULL)
1354 {
73829f90
JJ
1355 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1356 || node->definition)
848bb6fc
JJ
1357 return NULL;
1358 expand_simd_clones (node);
1359 if (node->simd_clones == NULL)
1360 return NULL;
1361 }
7ed54790 1362 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1cbfeccc
RS
1363 if (!*type_out)
1364 return NULL;
848bb6fc
JJ
1365 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1366 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
9c58fb7a 1367 append_pattern_def_seq (stmt_vinfo, g);
848bb6fc
JJ
1368 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1369 g = gimple_build_call (exp_decl, 1, def);
1370 gimple_call_set_lhs (g, res);
1371 return g;
1372 }
1373 }
1374
1375 return NULL;
1376 }
1377
0b2229b0
RG
1378 /* We now have a pow or powi builtin function call with a constant
1379 exponent. */
1380
0b2229b0 1381 /* Catch squaring. */
9541ffee 1382 if ((tree_fits_shwi_p (exp)
9439e9a1 1383 && tree_to_shwi (exp) == 2)
0b2229b0 1384 || (TREE_CODE (exp) == REAL_CST
624d31fe 1385 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
c6b1b49b 1386 {
dcab2a0d 1387 if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
1cbfeccc
RS
1388 TREE_TYPE (base), type_out))
1389 return NULL;
726a989a
RB
1390
1391 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1392 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1393 return stmt;
c6b1b49b 1394 }
0b2229b0
RG
1395
1396 /* Catch square root. */
1397 if (TREE_CODE (exp) == REAL_CST
624d31fe 1398 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
0b2229b0 1399 {
7ed54790 1400 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
1cbfeccc
RS
1401 if (*type_out
1402 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
d95ab70a 1403 OPTIMIZE_FOR_SPEED))
c6b1b49b 1404 {
b4e5bc47
RS
1405 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1406 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1407 gimple_call_set_lhs (stmt, var);
a844293d 1408 gimple_call_set_nothrow (stmt, true);
b4e5bc47 1409 return stmt;
c6b1b49b 1410 }
0b2229b0
RG
1411 }
1412
726a989a 1413 return NULL;
0b2229b0
RG
1414}
1415
1416
20f06221
DN
1417/* Function vect_recog_widen_sum_pattern
1418
1419 Try to find the following pattern:
1420
b8698a0f 1421 type x_t;
20f06221
DN
1422 TYPE x_T, sum = init;
1423 loop:
1424 sum_0 = phi <init, sum_1>
1425 S1 x_t = *p;
1426 S2 x_T = (TYPE) x_t;
1427 S3 sum_1 = x_T + sum_0;
1428
b8698a0f 1429 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1430 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1431 a special case of a reduction computation.
20f06221
DN
1432
1433 Input:
1434
ba9728b0 1435 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
20f06221 1436 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1437
20f06221 1438 Output:
b8698a0f 1439
20f06221
DN
1440 * TYPE_OUT: The type of the output of this pattern.
1441
1442 * Return value: A new stmt that will be used to replace the sequence of
1443 stmts that constitute the pattern. In this case it will be:
1444 WIDEN_SUM <x_t, sum_0>
d29de1bf 1445
b8698a0f 1446 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1447 vectorized without preserving all the intermediate results. It
b8698a0f
L
1448 produces only N/2 (widened) results (by summing up pairs of
1449 intermediate results) rather than all N results. Therefore, we
1450 cannot allow this pattern when we want to get all the results and in
1451 the correct order (as is the case when this computation is in an
d29de1bf 1452 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1453
355fe088 1454static gimple *
ba9728b0 1455vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
20f06221 1456{
ba9728b0 1457 gimple *last_stmt = stmt_vinfo->stmt;
20f06221 1458 tree oprnd0, oprnd1;
00347934
RS
1459 vec_info *vinfo = stmt_vinfo->vinfo;
1460 tree type;
355fe088 1461 gimple *pattern_stmt;
726a989a 1462 tree var;
20f06221 1463
20f06221
DN
1464 /* Look for the following pattern
1465 DX = (TYPE) X;
1466 sum_1 = DX + sum_0;
1467 In which DX is at least double the size of X, and sum_1 has been
1468 recognized as a reduction variable.
1469 */
1470
1471 /* Starting from LAST_STMT, follow the defs of its uses in search
1472 of the above pattern. */
1473
7b98e98a
RS
1474 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1475 &oprnd0, &oprnd1))
b308d872
RB
1476 return NULL;
1477
7b98e98a 1478 type = gimple_expr_type (last_stmt);
20f06221 1479
51312233 1480 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1481 we know that oprnd1 is the reduction variable (defined by a loop-header
1482 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1483 Left to check that oprnd0 is defined by a cast from type 'type' to type
1484 'TYPE'. */
1485
00347934
RS
1486 vect_unpromoted_value unprom0;
1487 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1488 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1cbfeccc 1489 return NULL;
20f06221 1490
49d8df1b
RS
1491 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1492
dcab2a0d
RS
1493 if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
1494 unprom0.type, type_out))
1cbfeccc 1495 return NULL;
20f06221 1496
726a989a 1497 var = vect_recog_temp_ssa_var (type, NULL);
00347934 1498 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
726a989a 1499
726a989a 1500 return pattern_stmt;
20f06221
DN
1501}
1502
370c2ebe
RS
1503/* Recognize cases in which an operation is performed in one type WTYPE
1504 but could be done more efficiently in a narrower type NTYPE. For example,
1505 if we have:
20f06221 1506
370c2ebe
RS
1507 ATYPE a; // narrower than NTYPE
1508 BTYPE b; // narrower than NTYPE
1509 WTYPE aw = (WTYPE) a;
1510 WTYPE bw = (WTYPE) b;
1511 WTYPE res = aw + bw; // only uses of aw and bw
1107f3ae 1512
370c2ebe 1513 then it would be more efficient to do:
1107f3ae 1514
370c2ebe
RS
1515 NTYPE an = (NTYPE) a;
1516 NTYPE bn = (NTYPE) b;
1517 NTYPE resn = an + bn;
1518 WTYPE res = (WTYPE) resn;
1107f3ae 1519
370c2ebe 1520 Other situations include things like:
f5709183 1521
370c2ebe
RS
1522 ATYPE a; // NTYPE or narrower
1523 WTYPE aw = (WTYPE) a;
1524 WTYPE res = aw + b;
1107f3ae 1525
370c2ebe
RS
1526 when only "(NTYPE) res" is significant. In that case it's more efficient
1527 to truncate "b" and do the operation on NTYPE instead:
1107f3ae 1528
370c2ebe
RS
1529 NTYPE an = (NTYPE) a;
1530 NTYPE bn = (NTYPE) b; // truncation
1531 NTYPE resn = an + bn;
1532 WTYPE res = (WTYPE) resn;
1107f3ae 1533
370c2ebe
RS
1534 All users of "res" should then use "resn" instead, making the final
1535 statement dead (not marked as relevant). The final statement is still
1536 needed to maintain the type correctness of the IR.
1107f3ae 1537
370c2ebe
RS
1538 vect_determine_precisions has already determined the minimum
1539 precison of the operation and the minimum precision required
1540 by users of the result. */
1107f3ae 1541
370c2ebe 1542static gimple *
ba9728b0 1543vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
370c2ebe 1544{
ba9728b0 1545 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
370c2ebe
RS
1546 if (!last_stmt)
1547 return NULL;
9ef7adc0 1548
370c2ebe
RS
1549 /* See whether we have found that this operation can be done on a
1550 narrower type without changing its semantics. */
370c2ebe
RS
1551 unsigned int new_precision = last_stmt_info->operation_precision;
1552 if (!new_precision)
1553 return NULL;
1107f3ae 1554
370c2ebe
RS
1555 vec_info *vinfo = last_stmt_info->vinfo;
1556 tree lhs = gimple_assign_lhs (last_stmt);
1557 tree type = TREE_TYPE (lhs);
1558 tree_code code = gimple_assign_rhs_code (last_stmt);
1559
1560 /* Keep the first operand of a COND_EXPR as-is: only the other two
1561 operands are interesting. */
1562 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1563
1564 /* Check the operands. */
1565 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1566 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1567 unprom.quick_grow (nops);
1568 unsigned int min_precision = 0;
1569 bool single_use_p = false;
1570 for (unsigned int i = 0; i < nops; ++i)
1107f3ae 1571 {
370c2ebe
RS
1572 tree op = gimple_op (last_stmt, first_op + i);
1573 if (TREE_CODE (op) == INTEGER_CST)
1574 unprom[i].set_op (op, vect_constant_def);
1575 else if (TREE_CODE (op) == SSA_NAME)
1576 {
1577 bool op_single_use_p = true;
1578 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1579 &op_single_use_p))
1580 return NULL;
1581 /* If:
1107f3ae 1582
370c2ebe
RS
1583 (1) N bits of the result are needed;
1584 (2) all inputs are widened from M<N bits; and
1585 (3) one operand OP is a single-use SSA name
1107f3ae 1586
370c2ebe
RS
1587 we can shift the M->N widening from OP to the output
1588 without changing the number or type of extensions involved.
1589 This then reduces the number of copies of STMT_INFO.
1107f3ae 1590
370c2ebe
RS
1591 If instead of (3) more than one operand is a single-use SSA name,
1592 shifting the extension to the output is even more of a win.
1107f3ae 1593
370c2ebe 1594 If instead:
1107f3ae 1595
370c2ebe
RS
1596 (1) N bits of the result are needed;
1597 (2) one operand OP2 is widened from M2<N bits;
1598 (3) another operand OP1 is widened from M1<M2 bits; and
1599 (4) both OP1 and OP2 are single-use
1107f3ae 1600
370c2ebe 1601 the choice is between:
1107f3ae 1602
370c2ebe
RS
1603 (a) truncating OP2 to M1, doing the operation on M1,
1604 and then widening the result to N
1107f3ae 1605
370c2ebe
RS
1606 (b) widening OP1 to M2, doing the operation on M2, and then
1607 widening the result to N
1107f3ae 1608
370c2ebe
RS
1609 Both shift the M2->N widening of the inputs to the output.
1610 (a) additionally shifts the M1->M2 widening to the output;
1611 it requires fewer copies of STMT_INFO but requires an extra
1612 M2->M1 truncation.
1107f3ae 1613
370c2ebe
RS
1614 Which is better will depend on the complexity and cost of
1615 STMT_INFO, which is hard to predict at this stage. However,
1616 a clear tie-breaker in favor of (b) is the fact that the
1617 truncation in (a) increases the length of the operation chain.
1107f3ae 1618
370c2ebe
RS
1619 If instead of (4) only one of OP1 or OP2 is single-use,
1620 (b) is still a win over doing the operation in N bits:
1621 it still shifts the M2->N widening on the single-use operand
1622 to the output and reduces the number of STMT_INFO copies.
1107f3ae 1623
370c2ebe
RS
1624 If neither operand is single-use then operating on fewer than
1625 N bits might lead to more extensions overall. Whether it does
1626 or not depends on global information about the vectorization
1627 region, and whether that's a good trade-off would again
1628 depend on the complexity and cost of the statements involved,
1629 as well as things like register pressure that are not normally
1630 modelled at this stage. We therefore ignore these cases
1631 and just optimize the clear single-use wins above.
1107f3ae 1632
370c2ebe
RS
1633 Thus we take the maximum precision of the unpromoted operands
1634 and record whether any operand is single-use. */
1635 if (unprom[i].dt == vect_internal_def)
1636 {
1637 min_precision = MAX (min_precision,
1638 TYPE_PRECISION (unprom[i].type));
1639 single_use_p |= op_single_use_p;
1640 }
1641 }
1107f3ae
IR
1642 }
1643
370c2ebe
RS
1644 /* Although the operation could be done in operation_precision, we have
1645 to balance that against introducing extra truncations or extensions.
1646 Calculate the minimum precision that can be handled efficiently.
1647
1648 The loop above determined that the operation could be handled
1649 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1650 extension from the inputs to the output without introducing more
1651 instructions, and would reduce the number of instructions required
1652 for STMT_INFO itself.
1653
1654 vect_determine_precisions has also determined that the result only
1655 needs min_output_precision bits. Truncating by a factor of N times
1656 requires a tree of N - 1 instructions, so if TYPE is N times wider
1657 than min_output_precision, doing the operation in TYPE and truncating
1658 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1659 In contrast:
1660
1661 - truncating the input to a unary operation and doing the operation
1662 in the new type requires at most N - 1 + 1 = N instructions per
1663 output vector
1664
1665 - doing the same for a binary operation requires at most
1666 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1667
1668 Both unary and binary operations require fewer instructions than
1669 this if the operands were extended from a suitable truncated form.
1670 Thus there is usually nothing to lose by doing operations in
1671 min_output_precision bits, but there can be something to gain. */
1672 if (!single_use_p)
1673 min_precision = last_stmt_info->min_output_precision;
1107f3ae 1674 else
370c2ebe
RS
1675 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1676
1677 /* Apply the minimum efficient precision we just calculated. */
1678 if (new_precision < min_precision)
1679 new_precision = min_precision;
1680 if (new_precision >= TYPE_PRECISION (type))
1681 return NULL;
1682
1683 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1684
7ed54790 1685 *type_out = get_vectype_for_scalar_type (vinfo, type);
370c2ebe
RS
1686 if (!*type_out)
1687 return NULL;
1688
1689 /* We've found a viable pattern. Get the new type of the operation. */
1690 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1691 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1692
a1ff7f14
RS
1693 /* If we're truncating an operation, we need to make sure that we
1694 don't introduce new undefined overflow. The codes tested here are
1695 a subset of those accepted by vect_truncatable_operation_p. */
1696 tree op_type = new_type;
1697 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1698 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1699 op_type = build_nonstandard_integer_type (new_precision, true);
1700
370c2ebe
RS
1701 /* We specifically don't check here whether the target supports the
1702 new operation, since it might be something that a later pattern
1703 wants to rewrite anyway. If targets have a minimum element size
1704 for some optabs, we should pattern-match smaller ops to larger ops
1705 where beneficial. */
7ed54790
RS
1706 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
1707 tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
a1ff7f14 1708 if (!new_vectype || !op_vectype)
370c2ebe 1709 return NULL;
1107f3ae 1710
370c2ebe 1711 if (dump_enabled_p ())
3c2a8ed0
DM
1712 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1713 type, new_type);
1107f3ae 1714
a1ff7f14 1715 /* Calculate the rhs operands for an operation on OP_TYPE. */
370c2ebe
RS
1716 tree ops[3] = {};
1717 for (unsigned int i = 1; i < first_op; ++i)
1718 ops[i - 1] = gimple_op (last_stmt, i);
1719 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
a1ff7f14 1720 op_type, &unprom[0], op_vectype);
370c2ebe 1721
a1ff7f14
RS
1722 /* Use the operation to produce a result of type OP_TYPE. */
1723 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
370c2ebe
RS
1724 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1725 ops[0], ops[1], ops[2]);
1726 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1727
1728 if (dump_enabled_p ())
3c2a8ed0
DM
1729 dump_printf_loc (MSG_NOTE, vect_location,
1730 "created pattern stmt: %G", pattern_stmt);
1107f3ae 1731
a1ff7f14
RS
1732 /* Convert back to the original signedness, if OP_TYPE is different
1733 from NEW_TYPE. */
1734 if (op_type != new_type)
1735 pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1736 pattern_stmt, op_vectype);
1737
1738 /* Promote the result to the original type. */
370c2ebe
RS
1739 pattern_stmt = vect_convert_output (last_stmt_info, type,
1740 pattern_stmt, new_vectype);
1107f3ae 1741
370c2ebe 1742 return pattern_stmt;
1107f3ae
IR
1743}
1744
58cc9876
YW
1745/* Recognize the following patterns:
1746
1747 ATYPE a; // narrower than TYPE
1748 BTYPE b; // narrower than TYPE
1749
1750 1) Multiply high with scaling
1751 TYPE res = ((TYPE) a * (TYPE) b) >> c;
1752 2) ... or also with rounding
1753 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
1754
1755 where only the bottom half of res is used. */
1756
1757static gimple *
1758vect_recog_mulhs_pattern (stmt_vec_info last_stmt_info, tree *type_out)
1759{
1760 /* Check for a right shift. */
1761 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1762 if (!last_stmt
1763 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR)
1764 return NULL;
1765 vec_info *vinfo = last_stmt_info->vinfo;
1766
1767 /* Check that the shift result is wider than the users of the
1768 result need (i.e. that narrowing would be a natural choice). */
1769 tree lhs_type = TREE_TYPE (gimple_assign_lhs (last_stmt));
1770 unsigned int target_precision
1771 = vect_element_precision (last_stmt_info->min_output_precision);
1772 if (!INTEGRAL_TYPE_P (lhs_type)
1773 || target_precision >= TYPE_PRECISION (lhs_type))
1774 return NULL;
1775
1776 /* Look through any change in sign on the outer shift input. */
1777 vect_unpromoted_value unprom_rshift_input;
1778 tree rshift_input = vect_look_through_possible_promotion
1779 (vinfo, gimple_assign_rhs1 (last_stmt), &unprom_rshift_input);
1780 if (!rshift_input
1781 || TYPE_PRECISION (TREE_TYPE (rshift_input))
1782 != TYPE_PRECISION (lhs_type))
1783 return NULL;
1784
1785 /* Get the definition of the shift input. */
1786 stmt_vec_info rshift_input_stmt_info
1787 = vect_get_internal_def (vinfo, rshift_input);
1788 if (!rshift_input_stmt_info)
1789 return NULL;
1790 gassign *rshift_input_stmt
1791 = dyn_cast <gassign *> (rshift_input_stmt_info->stmt);
1792 if (!rshift_input_stmt)
1793 return NULL;
1794
1795 stmt_vec_info mulh_stmt_info;
1796 tree scale_term;
1797 internal_fn ifn;
1798 unsigned int expect_offset;
1799
1800 /* Check for the presence of the rounding term. */
1801 if (gimple_assign_rhs_code (rshift_input_stmt) == PLUS_EXPR)
1802 {
1803 /* Check that the outer shift was by 1. */
1804 if (!integer_onep (gimple_assign_rhs2 (last_stmt)))
1805 return NULL;
1806
1807 /* Check that the second operand of the PLUS_EXPR is 1. */
1808 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt)))
1809 return NULL;
1810
1811 /* Look through any change in sign on the addition input. */
1812 vect_unpromoted_value unprom_plus_input;
1813 tree plus_input = vect_look_through_possible_promotion
1814 (vinfo, gimple_assign_rhs1 (rshift_input_stmt), &unprom_plus_input);
1815 if (!plus_input
1816 || TYPE_PRECISION (TREE_TYPE (plus_input))
1817 != TYPE_PRECISION (TREE_TYPE (rshift_input)))
1818 return NULL;
1819
1820 /* Get the definition of the multiply-high-scale part. */
1821 stmt_vec_info plus_input_stmt_info
1822 = vect_get_internal_def (vinfo, plus_input);
1823 if (!plus_input_stmt_info)
1824 return NULL;
1825 gassign *plus_input_stmt
1826 = dyn_cast <gassign *> (plus_input_stmt_info->stmt);
1827 if (!plus_input_stmt
1828 || gimple_assign_rhs_code (plus_input_stmt) != RSHIFT_EXPR)
1829 return NULL;
1830
1831 /* Look through any change in sign on the scaling input. */
1832 vect_unpromoted_value unprom_scale_input;
1833 tree scale_input = vect_look_through_possible_promotion
1834 (vinfo, gimple_assign_rhs1 (plus_input_stmt), &unprom_scale_input);
1835 if (!scale_input
1836 || TYPE_PRECISION (TREE_TYPE (scale_input))
1837 != TYPE_PRECISION (TREE_TYPE (plus_input)))
1838 return NULL;
1839
1840 /* Get the definition of the multiply-high part. */
1841 mulh_stmt_info = vect_get_internal_def (vinfo, scale_input);
1842 if (!mulh_stmt_info)
1843 return NULL;
1844
1845 /* Get the scaling term. */
1846 scale_term = gimple_assign_rhs2 (plus_input_stmt);
1847
1848 expect_offset = target_precision + 2;
1849 ifn = IFN_MULHRS;
1850 }
1851 else
1852 {
1853 mulh_stmt_info = rshift_input_stmt_info;
1854 scale_term = gimple_assign_rhs2 (last_stmt);
1855
1856 expect_offset = target_precision + 1;
1857 ifn = IFN_MULHS;
1858 }
1859
1860 /* Check that the scaling factor is correct. */
1861 if (TREE_CODE (scale_term) != INTEGER_CST
1862 || wi::to_widest (scale_term) + expect_offset
1863 != TYPE_PRECISION (lhs_type))
1864 return NULL;
1865
1866 /* Check whether the scaling input term can be seen as two widened
1867 inputs multiplied together. */
1868 vect_unpromoted_value unprom_mult[2];
1869 tree new_type;
1870 unsigned int nops
1871 = vect_widened_op_tree (mulh_stmt_info, MULT_EXPR, WIDEN_MULT_EXPR,
1872 false, 2, unprom_mult, &new_type);
1873 if (nops != 2)
1874 return NULL;
1875
1876 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt);
1877
1878 /* Adjust output precision. */
1879 if (TYPE_PRECISION (new_type) < target_precision)
1880 new_type = build_nonstandard_integer_type
1881 (target_precision, TYPE_UNSIGNED (new_type));
1882
1883 /* Check for target support. */
7ed54790 1884 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
58cc9876
YW
1885 if (!new_vectype
1886 || !direct_internal_fn_supported_p
1887 (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
1888 return NULL;
1889
1890 /* The IR requires a valid vector type for the cast result, even though
1891 it's likely to be discarded. */
7ed54790 1892 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
58cc9876
YW
1893 if (!*type_out)
1894 return NULL;
1895
1896 /* Generate the IFN_MULHRS call. */
1897 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1898 tree new_ops[2];
1899 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1900 unprom_mult, new_vectype);
1901 gcall *mulhrs_stmt
1902 = gimple_build_call_internal (ifn, 2, new_ops[0], new_ops[1]);
1903 gimple_call_set_lhs (mulhrs_stmt, new_var);
1904 gimple_set_location (mulhrs_stmt, gimple_location (last_stmt));
1905
1906 if (dump_enabled_p ())
1907 dump_printf_loc (MSG_NOTE, vect_location,
1908 "created pattern stmt: %G", mulhrs_stmt);
1909
1910 return vect_convert_output (last_stmt_info, lhs_type,
1911 mulhrs_stmt, new_vectype);
1912}
1913
0267732b
RS
1914/* Recognize the patterns:
1915
1916 ATYPE a; // narrower than TYPE
1917 BTYPE b; // narrower than TYPE
1918 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1919 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1920
1921 where only the bottom half of avg is used. Try to transform them into:
1922
1923 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1924 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1925
1926 followed by:
1927
1928 TYPE avg = (TYPE) avg';
1929
1930 where NTYPE is no wider than half of TYPE. Since only the bottom half
8e03b21e
PK
1931 of avg is used, all or part of the cast of avg' should become redundant.
1932
1933 If there is no target support available, generate code to distribute rshift
1934 over plus and add a carry. */
0267732b
RS
1935
1936static gimple *
ba9728b0 1937vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
0267732b
RS
1938{
1939 /* Check for a shift right by one bit. */
ba9728b0
RS
1940 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1941 vec_info *vinfo = last_stmt_info->vinfo;
0267732b
RS
1942 if (!last_stmt
1943 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1944 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1945 return NULL;
1946
0267732b
RS
1947 /* Check that the shift result is wider than the users of the
1948 result need (i.e. that narrowing would be a natural choice). */
1949 tree lhs = gimple_assign_lhs (last_stmt);
1950 tree type = TREE_TYPE (lhs);
1951 unsigned int target_precision
1952 = vect_element_precision (last_stmt_info->min_output_precision);
1953 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1954 return NULL;
1955
a1ff7f14 1956 /* Look through any change in sign on the shift input. */
0267732b 1957 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
a1ff7f14
RS
1958 vect_unpromoted_value unprom_plus;
1959 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1960 &unprom_plus);
1961 if (!rshift_rhs
1962 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1963 return NULL;
1964
1965 /* Get the definition of the shift input. */
0267732b
RS
1966 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1967 if (!plus_stmt_info)
1968 return NULL;
1969
1970 /* Check whether the shift input can be seen as a tree of additions on
1971 2 or 3 widened inputs.
1972
1973 Note that the pattern should be a win even if the result of one or
1974 more additions is reused elsewhere: if the pattern matches, we'd be
1975 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1976 internal_fn ifn = IFN_AVG_FLOOR;
1977 vect_unpromoted_value unprom[3];
1978 tree new_type;
1979 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1980 PLUS_EXPR, false, 3,
1981 unprom, &new_type);
1982 if (nops == 0)
1983 return NULL;
1984 if (nops == 3)
1985 {
1986 /* Check that one operand is 1. */
1987 unsigned int i;
1988 for (i = 0; i < 3; ++i)
1989 if (integer_onep (unprom[i].op))
1990 break;
1991 if (i == 3)
1992 return NULL;
1993 /* Throw away the 1 operand and keep the other two. */
1994 if (i < 2)
1995 unprom[i] = unprom[2];
1996 ifn = IFN_AVG_CEIL;
1997 }
1998
1999 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
2000
2001 /* We know that:
2002
2003 (a) the operation can be viewed as:
2004
2005 TYPE widened0 = (TYPE) UNPROM[0];
2006 TYPE widened1 = (TYPE) UNPROM[1];
2007 TYPE tmp1 = widened0 + widened1 {+ 1};
2008 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
2009
2010 (b) the first two statements are equivalent to:
2011
2012 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
2013 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
2014
2015 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
2016 where sensible;
2017
2018 (d) all the operations can be performed correctly at twice the width of
2019 NEW_TYPE, due to the nature of the average operation; and
2020
2021 (e) users of the result of the right shift need only TARGET_PRECISION
2022 bits, where TARGET_PRECISION is no more than half of TYPE's
2023 precision.
2024
2025 Under these circumstances, the only situation in which NEW_TYPE
2026 could be narrower than TARGET_PRECISION is if widened0, widened1
2027 and an addition result are all used more than once. Thus we can
2028 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
2029 as "free", whereas widening the result of the average instruction
2030 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
2031 therefore better not to go narrower than TARGET_PRECISION. */
2032 if (TYPE_PRECISION (new_type) < target_precision)
2033 new_type = build_nonstandard_integer_type (target_precision,
2034 TYPE_UNSIGNED (new_type));
2035
2036 /* Check for target support. */
7ed54790 2037 tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
8e03b21e
PK
2038 if (!new_vectype)
2039 return NULL;
2040
2041 bool fallback_p = false;
2042
2043 if (direct_internal_fn_supported_p (ifn, new_vectype, OPTIMIZE_FOR_SPEED))
2044 ;
2045 else if (TYPE_UNSIGNED (new_type)
2046 && optab_for_tree_code (RSHIFT_EXPR, new_vectype, optab_scalar)
2047 && optab_for_tree_code (PLUS_EXPR, new_vectype, optab_default)
2048 && optab_for_tree_code (BIT_IOR_EXPR, new_vectype, optab_default)
2049 && optab_for_tree_code (BIT_AND_EXPR, new_vectype, optab_default))
2050 fallback_p = true;
2051 else
0267732b
RS
2052 return NULL;
2053
2054 /* The IR requires a valid vector type for the cast result, even though
2055 it's likely to be discarded. */
7ed54790 2056 *type_out = get_vectype_for_scalar_type (vinfo, type);
0267732b
RS
2057 if (!*type_out)
2058 return NULL;
2059
0267732b
RS
2060 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
2061 tree new_ops[2];
2062 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
2063 unprom, new_vectype);
8e03b21e
PK
2064
2065 if (fallback_p)
2066 {
2067 /* As a fallback, generate code for following sequence:
2068
2069 shifted_op0 = new_ops[0] >> 1;
2070 shifted_op1 = new_ops[1] >> 1;
2071 sum_of_shifted = shifted_op0 + shifted_op1;
2072 unmasked_carry = new_ops[0] and/or new_ops[1];
2073 carry = unmasked_carry & 1;
2074 new_var = sum_of_shifted + carry;
2075 */
2076
2077 tree one_cst = build_one_cst (new_type);
2078 gassign *g;
2079
2080 tree shifted_op0 = vect_recog_temp_ssa_var (new_type, NULL);
2081 g = gimple_build_assign (shifted_op0, RSHIFT_EXPR, new_ops[0], one_cst);
2082 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2083
2084 tree shifted_op1 = vect_recog_temp_ssa_var (new_type, NULL);
2085 g = gimple_build_assign (shifted_op1, RSHIFT_EXPR, new_ops[1], one_cst);
2086 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2087
2088 tree sum_of_shifted = vect_recog_temp_ssa_var (new_type, NULL);
2089 g = gimple_build_assign (sum_of_shifted, PLUS_EXPR,
2090 shifted_op0, shifted_op1);
2091 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2092
2093 tree unmasked_carry = vect_recog_temp_ssa_var (new_type, NULL);
2094 tree_code c = (ifn == IFN_AVG_CEIL) ? BIT_IOR_EXPR : BIT_AND_EXPR;
2095 g = gimple_build_assign (unmasked_carry, c, new_ops[0], new_ops[1]);
2096 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2097
2098 tree carry = vect_recog_temp_ssa_var (new_type, NULL);
2099 g = gimple_build_assign (carry, BIT_AND_EXPR, unmasked_carry, one_cst);
2100 append_pattern_def_seq (last_stmt_info, g, new_vectype);
2101
2102 g = gimple_build_assign (new_var, PLUS_EXPR, sum_of_shifted, carry);
2103 return vect_convert_output (last_stmt_info, type, g, new_vectype);
2104 }
2105
2106 /* Generate the IFN_AVG* call. */
0267732b
RS
2107 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
2108 new_ops[1]);
2109 gimple_call_set_lhs (average_stmt, new_var);
2110 gimple_set_location (average_stmt, gimple_location (last_stmt));
2111
2112 if (dump_enabled_p ())
3c2a8ed0
DM
2113 dump_printf_loc (MSG_NOTE, vect_location,
2114 "created pattern stmt: %G", average_stmt);
0267732b 2115
0267732b
RS
2116 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
2117}
2118
370c2ebe
RS
2119/* Recognize cases in which the input to a cast is wider than its
2120 output, and the input is fed by a widening operation. Fold this
2121 by removing the unnecessary intermediate widening. E.g.:
1107f3ae 2122
370c2ebe
RS
2123 unsigned char a;
2124 unsigned int b = (unsigned int) a;
2125 unsigned short c = (unsigned short) b;
1107f3ae 2126
370c2ebe 2127 -->
1107f3ae 2128
370c2ebe 2129 unsigned short c = (unsigned short) a;
1107f3ae 2130
370c2ebe
RS
2131 Although this is rare in input IR, it is an expected side-effect
2132 of the over-widening pattern above.
1107f3ae 2133
370c2ebe
RS
2134 This is beneficial also for integer-to-float conversions, if the
2135 widened integer has more bits than the float, and if the unwidened
2136 input doesn't. */
1107f3ae 2137
370c2ebe 2138static gimple *
ba9728b0 2139vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
370c2ebe
RS
2140{
2141 /* Check for a cast, including an integer-to-float conversion. */
ba9728b0 2142 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
370c2ebe
RS
2143 if (!last_stmt)
2144 return NULL;
2145 tree_code code = gimple_assign_rhs_code (last_stmt);
2146 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
2147 return NULL;
1107f3ae 2148
370c2ebe
RS
2149 /* Make sure that the rhs is a scalar with a natural bitsize. */
2150 tree lhs = gimple_assign_lhs (last_stmt);
2151 if (!lhs)
2152 return NULL;
2153 tree lhs_type = TREE_TYPE (lhs);
2154 scalar_mode lhs_mode;
2155 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
2156 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
2157 return NULL;
1107f3ae 2158
370c2ebe
RS
2159 /* Check for a narrowing operation (from a vector point of view). */
2160 tree rhs = gimple_assign_rhs1 (last_stmt);
2161 tree rhs_type = TREE_TYPE (rhs);
2162 if (!INTEGRAL_TYPE_P (rhs_type)
2163 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
2164 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
2165 return NULL;
82db3d43 2166
370c2ebe 2167 /* Try to find an unpromoted input. */
370c2ebe
RS
2168 vec_info *vinfo = last_stmt_info->vinfo;
2169 vect_unpromoted_value unprom;
2170 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
2171 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
2172 return NULL;
1cbfeccc 2173
370c2ebe
RS
2174 /* If the bits above RHS_TYPE matter, make sure that they're the
2175 same when extending from UNPROM as they are when extending from RHS. */
2176 if (!INTEGRAL_TYPE_P (lhs_type)
2177 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
2178 return NULL;
1107f3ae 2179
370c2ebe
RS
2180 /* We can get the same result by casting UNPROM directly, to avoid
2181 the unnecessary widening and narrowing. */
2182 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1107f3ae 2183
7ed54790 2184 *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
370c2ebe 2185 if (!*type_out)
1107f3ae
IR
2186 return NULL;
2187
370c2ebe
RS
2188 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
2189 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
2190 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1107f3ae
IR
2191
2192 return pattern_stmt;
2193}
2194
00347934
RS
2195/* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
2196 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
36ba4aae 2197
355fe088 2198static gimple *
ba9728b0 2199vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
36ba4aae 2200{
ba9728b0 2201 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
00347934 2202 WIDEN_LSHIFT_EXPR, true,
d379ac22 2203 "vect_recog_widen_shift_pattern");
7e9a3abb
JJ
2204}
2205
2206/* Detect a rotate pattern wouldn't be otherwise vectorized:
2207
2208 type a_t, b_t, c_t;
2209
2210 S0 a_t = b_t r<< c_t;
2211
2212 Input/Output:
2213
ba9728b0 2214 * STMT_VINFO: The stmt from which the pattern search begins,
7e9a3abb
JJ
2215 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
2216 with a sequence:
2217
2218 S1 d_t = -c_t;
2219 S2 e_t = d_t & (B - 1);
2220 S3 f_t = b_t << c_t;
2221 S4 g_t = b_t >> e_t;
2222 S0 a_t = f_t | g_t;
2223
2224 where B is element bitsize of type.
2225
2226 Output:
2227
7e9a3abb
JJ
2228 * TYPE_OUT: The type of the output of this pattern.
2229
2230 * Return value: A new stmt that will be used to replace the rotate
2231 S0 stmt. */
2232
355fe088 2233static gimple *
ba9728b0 2234vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
7e9a3abb 2235{
ba9728b0 2236 gimple *last_stmt = stmt_vinfo->stmt;
7e9a3abb 2237 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
355fe088 2238 gimple *pattern_stmt, *def_stmt;
7e9a3abb 2239 enum tree_code rhs_code;
310213d4 2240 vec_info *vinfo = stmt_vinfo->vinfo;
7e9a3abb
JJ
2241 enum vect_def_type dt;
2242 optab optab1, optab2;
68119618 2243 edge ext_def = NULL;
9ff9a0a5 2244 bool bswap16_p = false;
7e9a3abb 2245
9ff9a0a5
JJ
2246 if (is_gimple_assign (last_stmt))
2247 {
2248 rhs_code = gimple_assign_rhs_code (last_stmt);
2249 switch (rhs_code)
2250 {
2251 case LROTATE_EXPR:
2252 case RROTATE_EXPR:
2253 break;
2254 default:
2255 return NULL;
2256 }
7e9a3abb 2257
9ff9a0a5
JJ
2258 lhs = gimple_assign_lhs (last_stmt);
2259 oprnd0 = gimple_assign_rhs1 (last_stmt);
2260 type = TREE_TYPE (oprnd0);
2261 oprnd1 = gimple_assign_rhs2 (last_stmt);
2262 }
2263 else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
7e9a3abb 2264 {
9ff9a0a5
JJ
2265 /* __builtin_bswap16 (x) is another form of x r>> 8.
2266 The vectorizer has bswap support, but only if the argument isn't
2267 promoted. */
2268 lhs = gimple_call_lhs (last_stmt);
2269 oprnd0 = gimple_call_arg (last_stmt, 0);
2270 type = TREE_TYPE (oprnd0);
bf78ed91
RB
2271 if (!lhs
2272 || TYPE_PRECISION (TREE_TYPE (lhs)) != 16
9ff9a0a5
JJ
2273 || TYPE_PRECISION (type) <= 16
2274 || TREE_CODE (oprnd0) != SSA_NAME
2275 || BITS_PER_UNIT != 8
2276 || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
2277 return NULL;
2278
2279 stmt_vec_info def_stmt_info;
2280 if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
2281 return NULL;
2282
2283 if (dt != vect_internal_def)
2284 return NULL;
2285
2286 if (gimple_assign_cast_p (def_stmt))
2287 {
2288 def = gimple_assign_rhs1 (def_stmt);
2289 if (INTEGRAL_TYPE_P (TREE_TYPE (def))
2290 && TYPE_PRECISION (TREE_TYPE (def)) == 16)
2291 oprnd0 = def;
2292 }
2293
2294 type = TREE_TYPE (lhs);
7ed54790 2295 vectype = get_vectype_for_scalar_type (vinfo, type);
9ff9a0a5
JJ
2296 if (vectype == NULL_TREE)
2297 return NULL;
2298
2299 if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
2300 {
2301 /* The encoding uses one stepped pattern for each byte in the
2302 16-bit word. */
2303 vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
2304 for (unsigned i = 0; i < 3; ++i)
2305 for (unsigned j = 0; j < 2; ++j)
2306 elts.quick_push ((i + 1) * 2 - j - 1);
2307
2308 vec_perm_indices indices (elts, 1,
2309 TYPE_VECTOR_SUBPARTS (char_vectype));
2310 if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
2311 {
2312 /* vectorizable_bswap can handle the __builtin_bswap16 if we
2313 undo the argument promotion. */
2314 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2315 {
2316 def = vect_recog_temp_ssa_var (type, NULL);
2317 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2318 append_pattern_def_seq (stmt_vinfo, def_stmt);
2319 oprnd0 = def;
2320 }
2321
2322 /* Pattern detected. */
2323 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2324
2325 *type_out = vectype;
2326
2327 /* Pattern supported. Create a stmt to be used to replace the
2328 pattern, with the unpromoted argument. */
2329 var = vect_recog_temp_ssa_var (type, NULL);
2330 pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
2331 1, oprnd0);
2332 gimple_call_set_lhs (pattern_stmt, var);
2333 gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
2334 gimple_call_fntype (last_stmt));
2335 return pattern_stmt;
2336 }
2337 }
2338
2339 oprnd1 = build_int_cst (integer_type_node, 8);
2340 rhs_code = LROTATE_EXPR;
2341 bswap16_p = true;
7e9a3abb 2342 }
9ff9a0a5
JJ
2343 else
2344 return NULL;
7e9a3abb 2345
7e9a3abb
JJ
2346 if (TREE_CODE (oprnd0) != SSA_NAME
2347 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2348 || !INTEGRAL_TYPE_P (type)
2349 || !TYPE_UNSIGNED (type))
2350 return NULL;
2351
fef96d8e
RS
2352 stmt_vec_info def_stmt_info;
2353 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
7e9a3abb
JJ
2354 return NULL;
2355
2356 if (dt != vect_internal_def
2357 && dt != vect_constant_def
2358 && dt != vect_external_def)
2359 return NULL;
2360
7ed54790 2361 vectype = get_vectype_for_scalar_type (vinfo, type);
7e9a3abb
JJ
2362 if (vectype == NULL_TREE)
2363 return NULL;
2364
2365 /* If vector/vector or vector/scalar rotate is supported by the target,
2366 don't do anything here. */
2367 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2368 if (optab1
2369 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
9ff9a0a5
JJ
2370 {
2371 use_rotate:
2372 if (bswap16_p)
2373 {
2374 if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2375 {
2376 def = vect_recog_temp_ssa_var (type, NULL);
2377 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2378 append_pattern_def_seq (stmt_vinfo, def_stmt);
2379 oprnd0 = def;
2380 }
2381
2382 /* Pattern detected. */
2383 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
2384
2385 *type_out = vectype;
2386
2387 /* Pattern supported. Create a stmt to be used to replace the
2388 pattern. */
2389 var = vect_recog_temp_ssa_var (type, NULL);
2390 pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
2391 oprnd1);
2392 return pattern_stmt;
2393 }
2394 return NULL;
2395 }
7e9a3abb 2396
310213d4 2397 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
7e9a3abb
JJ
2398 {
2399 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2400 if (optab2
2401 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
9ff9a0a5 2402 goto use_rotate;
7e9a3abb
JJ
2403 }
2404
2405 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2406 don't do anything here either. */
2407 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2408 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2409 if (!optab1
2410 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2411 || !optab2
2412 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2413 {
310213d4 2414 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
7e9a3abb
JJ
2415 return NULL;
2416 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2417 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2418 if (!optab1
2419 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2420 || !optab2
2421 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2422 return NULL;
2423 }
2424
7e9a3abb 2425 *type_out = vectype;
7e9a3abb 2426
9ff9a0a5
JJ
2427 if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
2428 {
2429 def = vect_recog_temp_ssa_var (type, NULL);
2430 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
2431 append_pattern_def_seq (stmt_vinfo, def_stmt);
2432 oprnd0 = def;
2433 }
2434
f5c03155 2435 if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
3330053e 2436 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
68119618 2437
7e9a3abb 2438 def = NULL_TREE;
7a504f33 2439 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
f5c03155 2440 if (dt != vect_internal_def || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
7e9a3abb
JJ
2441 def = oprnd1;
2442 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2443 {
2444 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7a504f33 2445 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
7e9a3abb
JJ
2446 && TYPE_PRECISION (TREE_TYPE (rhs1))
2447 == TYPE_PRECISION (type))
2448 def = rhs1;
2449 }
2450
7e9a3abb
JJ
2451 if (def == NULL_TREE)
2452 {
2453 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2454 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
f5c03155 2455 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
2456 }
2457 stype = TREE_TYPE (def);
7a504f33 2458 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
7e9a3abb
JJ
2459
2460 if (TREE_CODE (def) == INTEGER_CST)
2461 {
cc269bb6 2462 if (!tree_fits_uhwi_p (def)
7a504f33 2463 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
7e9a3abb
JJ
2464 || integer_zerop (def))
2465 return NULL;
2466 def2 = build_int_cst (stype,
7a504f33 2467 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
7e9a3abb
JJ
2468 }
2469 else
2470 {
7ed54790 2471 tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
7e9a3abb
JJ
2472
2473 if (vecstype == NULL_TREE)
2474 return NULL;
2475 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 2476 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
2477 if (ext_def)
2478 {
2479 basic_block new_bb
2480 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2481 gcc_assert (!new_bb);
2482 }
2483 else
776bfcea 2484 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
7e9a3abb
JJ
2485
2486 def2 = vect_recog_temp_ssa_var (stype, NULL);
7a504f33 2487 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
0d0e4a03
JJ
2488 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2489 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
2490 if (ext_def)
2491 {
2492 basic_block new_bb
2493 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2494 gcc_assert (!new_bb);
2495 }
2496 else
776bfcea 2497 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
7e9a3abb
JJ
2498 }
2499
2500 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2501 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2502 ? LSHIFT_EXPR : RSHIFT_EXPR,
2503 oprnd0, def);
7e9a3abb
JJ
2504 append_pattern_def_seq (stmt_vinfo, def_stmt);
2505
2506 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2507 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2508 ? RSHIFT_EXPR : LSHIFT_EXPR,
2509 oprnd0, def2);
7e9a3abb
JJ
2510 append_pattern_def_seq (stmt_vinfo, def_stmt);
2511
2512 /* Pattern detected. */
49d8df1b 2513 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
7e9a3abb
JJ
2514
2515 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2516 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2517 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb 2518
36ba4aae
IR
2519 return pattern_stmt;
2520}
1107f3ae 2521
732a0ad3
JJ
2522/* Detect a vector by vector shift pattern that wouldn't be otherwise
2523 vectorized:
2524
2525 type a_t;
2526 TYPE b_T, res_T;
2527
2528 S1 a_t = ;
2529 S2 b_T = ;
2530 S3 res_T = b_T op a_t;
2531
2532 where type 'TYPE' is a type with different size than 'type',
2533 and op is <<, >> or rotate.
2534
2535 Also detect cases:
2536
2537 type a_t;
2538 TYPE b_T, c_T, res_T;
2539
2540 S0 c_T = ;
2541 S1 a_t = (type) c_T;
2542 S2 b_T = ;
2543 S3 res_T = b_T op a_t;
2544
2545 Input/Output:
2546
ba9728b0 2547 * STMT_VINFO: The stmt from which the pattern search begins,
732a0ad3
JJ
2548 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2549 with a shift/rotate which has same type on both operands, in the
2550 second case just b_T op c_T, in the first case with added cast
363477c0 2551 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2552
2553 Output:
2554
732a0ad3
JJ
2555 * TYPE_OUT: The type of the output of this pattern.
2556
2557 * Return value: A new stmt that will be used to replace the shift/rotate
2558 S3 stmt. */
2559
355fe088 2560static gimple *
ba9728b0
RS
2561vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2562 tree *type_out)
732a0ad3 2563{
ba9728b0 2564 gimple *last_stmt = stmt_vinfo->stmt;
732a0ad3 2565 tree oprnd0, oprnd1, lhs, var;
25927307 2566 gimple *pattern_stmt;
732a0ad3 2567 enum tree_code rhs_code;
310213d4 2568 vec_info *vinfo = stmt_vinfo->vinfo;
732a0ad3
JJ
2569
2570 if (!is_gimple_assign (last_stmt))
2571 return NULL;
2572
2573 rhs_code = gimple_assign_rhs_code (last_stmt);
2574 switch (rhs_code)
2575 {
2576 case LSHIFT_EXPR:
2577 case RSHIFT_EXPR:
2578 case LROTATE_EXPR:
2579 case RROTATE_EXPR:
2580 break;
2581 default:
2582 return NULL;
2583 }
2584
732a0ad3
JJ
2585 lhs = gimple_assign_lhs (last_stmt);
2586 oprnd0 = gimple_assign_rhs1 (last_stmt);
2587 oprnd1 = gimple_assign_rhs2 (last_stmt);
2588 if (TREE_CODE (oprnd0) != SSA_NAME
2589 || TREE_CODE (oprnd1) != SSA_NAME
2590 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2be65d9e 2591 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
732a0ad3
JJ
2592 || TYPE_PRECISION (TREE_TYPE (lhs))
2593 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2594 return NULL;
2595
25927307
RS
2596 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2597 if (!def_vinfo)
732a0ad3
JJ
2598 return NULL;
2599
7ed54790 2600 *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
1cbfeccc 2601 if (*type_out == NULL_TREE)
732a0ad3
JJ
2602 return NULL;
2603
81c40241 2604 tree def = NULL_TREE;
25927307 2605 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
0f8c840c 2606 if (def_stmt && gimple_assign_cast_p (def_stmt))
732a0ad3
JJ
2607 {
2608 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2609 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2610 && TYPE_PRECISION (TREE_TYPE (rhs1))
2611 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
0179520a
JJ
2612 {
2613 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2614 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2615 def = rhs1;
2616 else
2617 {
2618 tree mask
2619 = build_low_bits_mask (TREE_TYPE (rhs1),
2620 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2621 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2622 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
7ed54790
RS
2623 tree vecstype = get_vectype_for_scalar_type (vinfo,
2624 TREE_TYPE (rhs1));
776bfcea 2625 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
0179520a
JJ
2626 }
2627 }
732a0ad3
JJ
2628 }
2629
2630 if (def == NULL_TREE)
2631 {
2632 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2633 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
9c58fb7a 2634 append_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2635 }
2636
2637 /* Pattern detected. */
49d8df1b 2638 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
732a0ad3
JJ
2639
2640 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2641 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2642 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2643
732a0ad3
JJ
2644 return pattern_stmt;
2645}
2646
53109ba8
KT
2647/* Return true iff the target has a vector optab implementing the operation
2648 CODE on type VECTYPE. */
47486460 2649
53109ba8
KT
2650static bool
2651target_has_vecop_for_code (tree_code code, tree vectype)
2652{
2653 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2654 return voptab
2655 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2656}
47486460 2657
53109ba8
KT
2658/* Verify that the target has optabs of VECTYPE to perform all the steps
2659 needed by the multiplication-by-immediate synthesis algorithm described by
2660 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2661 present. Return true iff the target supports all the steps. */
2662
2663static bool
2664target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2665 tree vectype, bool synth_shift_p)
2666{
2667 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2668 return false;
2669
2670 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2671 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2672
2673 if (var == negate_variant
2674 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2675 return false;
2676
2677 /* If we must synthesize shifts with additions make sure that vector
2678 addition is available. */
2679 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2680 return false;
2681
2682 for (int i = 1; i < alg->ops; i++)
2683 {
2684 switch (alg->op[i])
2685 {
2686 case alg_shift:
2687 break;
2688 case alg_add_t_m2:
2689 case alg_add_t2_m:
2690 case alg_add_factor:
2691 if (!supports_vplus)
2692 return false;
2693 break;
2694 case alg_sub_t_m2:
2695 case alg_sub_t2_m:
2696 case alg_sub_factor:
2697 if (!supports_vminus)
2698 return false;
2699 break;
2700 case alg_unknown:
2701 case alg_m:
2702 case alg_zero:
2703 case alg_impossible:
2704 return false;
2705 default:
2706 gcc_unreachable ();
2707 }
2708 }
2709
2710 return true;
2711}
2712
2713/* Synthesize a left shift of OP by AMNT bits using a series of additions and
2714 putting the final result in DEST. Append all statements but the last into
2715 VINFO. Return the last statement. */
2716
2717static gimple *
2718synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2719 stmt_vec_info vinfo)
2720{
2721 HOST_WIDE_INT i;
2722 tree itype = TREE_TYPE (op);
2723 tree prev_res = op;
2724 gcc_assert (amnt >= 0);
2725 for (i = 0; i < amnt; i++)
2726 {
2727 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2728 : dest;
2729 gimple *stmt
2730 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2731 prev_res = tmp_var;
2732 if (i < amnt - 1)
2733 append_pattern_def_seq (vinfo, stmt);
2734 else
2735 return stmt;
2736 }
2737 gcc_unreachable ();
2738 return NULL;
2739}
2740
2741/* Helper for vect_synth_mult_by_constant. Apply a binary operation
2742 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2743 the process if necessary. Append the resulting assignment statements
2744 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2745 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2746 left shifts using additions. */
2747
2748static tree
2749apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2750 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2751{
2752 if (integer_zerop (op2)
2753 && (code == LSHIFT_EXPR
2754 || code == PLUS_EXPR))
2755 {
2756 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2757 return op1;
2758 }
2759
2760 gimple *stmt;
2761 tree itype = TREE_TYPE (op1);
2762 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2763
2764 if (code == LSHIFT_EXPR
2765 && synth_shift_p)
2766 {
2767 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2768 stmt_vinfo);
2769 append_pattern_def_seq (stmt_vinfo, stmt);
2770 return tmp_var;
2771 }
2772
2773 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2774 append_pattern_def_seq (stmt_vinfo, stmt);
2775 return tmp_var;
2776}
2777
2778/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2779 and simple arithmetic operations to be vectorized. Record the statements
2780 produced in STMT_VINFO and return the last statement in the sequence or
2781 NULL if it's not possible to synthesize such a multiplication.
2782 This function mirrors the behavior of expand_mult_const in expmed.c but
2783 works on tree-ssa form. */
2784
2785static gimple *
2786vect_synth_mult_by_constant (tree op, tree val,
2787 stmt_vec_info stmt_vinfo)
2788{
a5c3185a 2789 vec_info *vinfo = stmt_vinfo->vinfo;
53109ba8
KT
2790 tree itype = TREE_TYPE (op);
2791 machine_mode mode = TYPE_MODE (itype);
2792 struct algorithm alg;
2793 mult_variant variant;
2794 if (!tree_fits_shwi_p (val))
2795 return NULL;
2796
2797 /* Multiplication synthesis by shifts, adds and subs can introduce
2798 signed overflow where the original operation didn't. Perform the
2799 operations on an unsigned type and cast back to avoid this.
2800 In the future we may want to relax this for synthesis algorithms
2801 that we can prove do not cause unexpected overflow. */
2802 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2803
2804 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
47486460 2805
53109ba8
KT
2806 /* Targets that don't support vector shifts but support vector additions
2807 can synthesize shifts that way. */
a5c3185a 2808 bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
53109ba8
KT
2809
2810 HOST_WIDE_INT hwval = tree_to_shwi (val);
2811 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2812 The vectorizer's benefit analysis will decide whether it's beneficial
2813 to do this. */
2814 bool possible = choose_mult_variant (mode, hwval, &alg,
2815 &variant, MAX_COST);
2816 if (!possible)
2817 return NULL;
2818
7ed54790 2819 tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
53109ba8
KT
2820
2821 if (!vectype
2822 || !target_supports_mult_synth_alg (&alg, variant,
2823 vectype, synth_shift_p))
2824 return NULL;
2825
2826 tree accumulator;
2827
2828 /* Clear out the sequence of statements so we can populate it below. */
53109ba8
KT
2829 gimple *stmt = NULL;
2830
2831 if (cast_to_unsigned_p)
2832 {
2833 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2834 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2835 append_pattern_def_seq (stmt_vinfo, stmt);
2836 op = tmp_op;
2837 }
2838
2839 if (alg.op[0] == alg_zero)
2840 accumulator = build_int_cst (multtype, 0);
2841 else
2842 accumulator = op;
2843
2844 bool needs_fixup = (variant == negate_variant)
2845 || (variant == add_variant);
2846
2847 for (int i = 1; i < alg.ops; i++)
2848 {
2849 tree shft_log = build_int_cst (multtype, alg.log[i]);
2850 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2851 tree tmp_var = NULL_TREE;
2852
2853 switch (alg.op[i])
2854 {
2855 case alg_shift:
2856 if (synth_shift_p)
2857 stmt
2858 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2859 stmt_vinfo);
2860 else
2861 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2862 shft_log);
2863 break;
2864 case alg_add_t_m2:
2865 tmp_var
2866 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2867 stmt_vinfo, synth_shift_p);
2868 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2869 tmp_var);
2870 break;
2871 case alg_sub_t_m2:
2872 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2873 shft_log, stmt_vinfo,
2874 synth_shift_p);
2875 /* In some algorithms the first step involves zeroing the
2876 accumulator. If subtracting from such an accumulator
2877 just emit the negation directly. */
2878 if (integer_zerop (accumulator))
2879 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2880 else
2881 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2882 tmp_var);
2883 break;
2884 case alg_add_t2_m:
2885 tmp_var
2886 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2887 stmt_vinfo, synth_shift_p);
2888 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2889 break;
2890 case alg_sub_t2_m:
2891 tmp_var
2892 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2893 stmt_vinfo, synth_shift_p);
2894 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2895 break;
2896 case alg_add_factor:
2897 tmp_var
2898 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2899 stmt_vinfo, synth_shift_p);
2900 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2901 tmp_var);
2902 break;
2903 case alg_sub_factor:
2904 tmp_var
2905 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2906 stmt_vinfo, synth_shift_p);
2907 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2908 accumulator);
2909 break;
2910 default:
2911 gcc_unreachable ();
2912 }
2913 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2914 but rather return it directly. */
2915
2916 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2917 append_pattern_def_seq (stmt_vinfo, stmt);
2918 accumulator = accum_tmp;
2919 }
2920 if (variant == negate_variant)
2921 {
2922 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2923 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2924 accumulator = accum_tmp;
2925 if (cast_to_unsigned_p)
2926 append_pattern_def_seq (stmt_vinfo, stmt);
2927 }
2928 else if (variant == add_variant)
2929 {
2930 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2931 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2932 accumulator = accum_tmp;
2933 if (cast_to_unsigned_p)
2934 append_pattern_def_seq (stmt_vinfo, stmt);
2935 }
2936 /* Move back to a signed if needed. */
2937 if (cast_to_unsigned_p)
2938 {
2939 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2940 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2941 }
2942
2943 return stmt;
2944}
2945
2946/* Detect multiplication by constant and convert it into a sequence of
2947 shifts and additions, subtractions, negations. We reuse the
2948 choose_mult_variant algorithms from expmed.c
47486460
VK
2949
2950 Input/Output:
2951
ba9728b0 2952 STMT_VINFO: The stmt from which the pattern search begins,
53109ba8 2953 i.e. the mult stmt.
47486460
VK
2954
2955 Output:
2956
47486460
VK
2957 * TYPE_OUT: The type of the output of this pattern.
2958
53109ba8
KT
2959 * Return value: A new stmt that will be used to replace
2960 the multiplication. */
47486460 2961
355fe088 2962static gimple *
ba9728b0 2963vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
47486460 2964{
7ed54790 2965 vec_info *vinfo = stmt_vinfo->vinfo;
ba9728b0 2966 gimple *last_stmt = stmt_vinfo->stmt;
47486460 2967 tree oprnd0, oprnd1, vectype, itype;
53109ba8 2968 gimple *pattern_stmt;
47486460
VK
2969
2970 if (!is_gimple_assign (last_stmt))
2971 return NULL;
2972
2973 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2974 return NULL;
2975
2976 oprnd0 = gimple_assign_rhs1 (last_stmt);
2977 oprnd1 = gimple_assign_rhs2 (last_stmt);
2978 itype = TREE_TYPE (oprnd0);
2979
2980 if (TREE_CODE (oprnd0) != SSA_NAME
2981 || TREE_CODE (oprnd1) != INTEGER_CST
2982 || !INTEGRAL_TYPE_P (itype)
2be65d9e 2983 || !type_has_mode_precision_p (itype))
47486460
VK
2984 return NULL;
2985
7ed54790 2986 vectype = get_vectype_for_scalar_type (vinfo, itype);
47486460
VK
2987 if (vectype == NULL_TREE)
2988 return NULL;
2989
2990 /* If the target can handle vectorized multiplication natively,
2991 don't attempt to optimize this. */
53109ba8
KT
2992 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2993 if (mul_optab != unknown_optab)
47486460
VK
2994 {
2995 machine_mode vec_mode = TYPE_MODE (vectype);
53109ba8 2996 int icode = (int) optab_handler (mul_optab, vec_mode);
47486460 2997 if (icode != CODE_FOR_nothing)
53109ba8 2998 return NULL;
47486460
VK
2999 }
3000
53109ba8
KT
3001 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
3002 if (!pattern_stmt)
47486460
VK
3003 return NULL;
3004
3005 /* Pattern detected. */
49d8df1b 3006 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
47486460 3007
47486460
VK
3008 *type_out = vectype;
3009
3010 return pattern_stmt;
3011}
3012
079c527f 3013/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
3014 otherwise vectorized:
3015
3016 type a_t, b_t;
3017
3018 S1 a_t = b_t / N;
3019
079c527f 3020 where type 'type' is an integral type and N is a constant.
363477c0 3021
079c527f 3022 Similarly handle modulo by a constant:
363477c0
JJ
3023
3024 S4 a_t = b_t % N;
3025
3026 Input/Output:
3027
ba9728b0 3028 * STMT_VINFO: The stmt from which the pattern search begins,
079c527f
JJ
3029 i.e. the division stmt. S1 is replaced by if N is a power
3030 of two constant and type is signed:
363477c0
JJ
3031 S3 y_t = b_t < 0 ? N - 1 : 0;
3032 S2 x_t = b_t + y_t;
3033 S1' a_t = x_t >> log2 (N);
3034
079c527f
JJ
3035 S4 is replaced if N is a power of two constant and
3036 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
3037 S9 y_T = b_t < 0 ? -1U : 0U;
3038 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
3039 S7 z_t = (type) z_T;
3040 S6 w_t = b_t + z_t;
3041 S5 x_t = w_t & (N - 1);
3042 S4' a_t = x_t - z_t;
3043
3044 Output:
3045
363477c0
JJ
3046 * TYPE_OUT: The type of the output of this pattern.
3047
3048 * Return value: A new stmt that will be used to replace the division
3049 S1 or modulo S4 stmt. */
3050
355fe088 3051static gimple *
ba9728b0 3052vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
363477c0 3053{
7ed54790 3054 vec_info *vinfo = stmt_vinfo->vinfo;
ba9728b0 3055 gimple *last_stmt = stmt_vinfo->stmt;
5deb57cb 3056 tree oprnd0, oprnd1, vectype, itype, cond;
355fe088 3057 gimple *pattern_stmt, *def_stmt;
363477c0 3058 enum tree_code rhs_code;
363477c0 3059 optab optab;
00f07b86 3060 tree q;
079c527f 3061 int dummy_int, prec;
363477c0
JJ
3062
3063 if (!is_gimple_assign (last_stmt))
3064 return NULL;
3065
3066 rhs_code = gimple_assign_rhs_code (last_stmt);
3067 switch (rhs_code)
3068 {
3069 case TRUNC_DIV_EXPR:
eb592645 3070 case EXACT_DIV_EXPR:
363477c0
JJ
3071 case TRUNC_MOD_EXPR:
3072 break;
3073 default:
3074 return NULL;
3075 }
3076
363477c0
JJ
3077 oprnd0 = gimple_assign_rhs1 (last_stmt);
3078 oprnd1 = gimple_assign_rhs2 (last_stmt);
3079 itype = TREE_TYPE (oprnd0);
3080 if (TREE_CODE (oprnd0) != SSA_NAME
3081 || TREE_CODE (oprnd1) != INTEGER_CST
3082 || TREE_CODE (itype) != INTEGER_TYPE
2be65d9e 3083 || !type_has_mode_precision_p (itype))
363477c0
JJ
3084 return NULL;
3085
7a504f33 3086 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
7ed54790 3087 vectype = get_vectype_for_scalar_type (vinfo, itype);
363477c0
JJ
3088 if (vectype == NULL_TREE)
3089 return NULL;
3090
8f76f377 3091 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
363477c0 3092 {
8f76f377
RS
3093 /* If the target can handle vectorized division or modulo natively,
3094 don't attempt to optimize this, since native division is likely
3095 to give smaller code. */
3096 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
3097 if (optab != unknown_optab)
3098 {
3099 machine_mode vec_mode = TYPE_MODE (vectype);
3100 int icode = (int) optab_handler (optab, vec_mode);
3101 if (icode != CODE_FOR_nothing)
3102 return NULL;
3103 }
363477c0
JJ
3104 }
3105
079c527f
JJ
3106 prec = TYPE_PRECISION (itype);
3107 if (integer_pow2p (oprnd1))
363477c0 3108 {
079c527f
JJ
3109 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
3110 return NULL;
363477c0 3111
079c527f 3112 /* Pattern detected. */
49d8df1b 3113 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
079c527f 3114
c0c2f013
YW
3115 *type_out = vectype;
3116
3117 /* Check if the target supports this internal function. */
3118 internal_fn ifn = IFN_DIV_POW2;
3119 if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
3120 {
3121 tree shift = build_int_cst (itype, tree_log2 (oprnd1));
3122
3123 tree var_div = vect_recog_temp_ssa_var (itype, NULL);
3124 gimple *div_stmt = gimple_build_call_internal (ifn, 2, oprnd0, shift);
3125 gimple_call_set_lhs (div_stmt, var_div);
3126
3127 if (rhs_code == TRUNC_MOD_EXPR)
3128 {
3129 append_pattern_def_seq (stmt_vinfo, div_stmt);
3130 def_stmt
3131 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3132 LSHIFT_EXPR, var_div, shift);
3133 append_pattern_def_seq (stmt_vinfo, def_stmt);
3134 pattern_stmt
3135 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3136 MINUS_EXPR, oprnd0,
3137 gimple_assign_lhs (def_stmt));
3138 }
3139 else
3140 pattern_stmt = div_stmt;
3141 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
3142
3143 return pattern_stmt;
3144 }
3145
079c527f
JJ
3146 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
3147 build_int_cst (itype, 0));
eb592645
RB
3148 if (rhs_code == TRUNC_DIV_EXPR
3149 || rhs_code == EXACT_DIV_EXPR)
079c527f
JJ
3150 {
3151 tree var = vect_recog_temp_ssa_var (itype, NULL);
3152 tree shift;
3153 def_stmt
0d0e4a03
JJ
3154 = gimple_build_assign (var, COND_EXPR, cond,
3155 fold_build2 (MINUS_EXPR, itype, oprnd1,
3156 build_int_cst (itype, 1)),
3157 build_int_cst (itype, 0));
9c58fb7a 3158 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
3159 var = vect_recog_temp_ssa_var (itype, NULL);
3160 def_stmt
0d0e4a03
JJ
3161 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
3162 gimple_assign_lhs (def_stmt));
079c527f
JJ
3163 append_pattern_def_seq (stmt_vinfo, def_stmt);
3164
3165 shift = build_int_cst (itype, tree_log2 (oprnd1));
3166 pattern_stmt
0d0e4a03
JJ
3167 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 RSHIFT_EXPR, var, shift);
079c527f
JJ
3169 }
3170 else
3171 {
3172 tree signmask;
079c527f
JJ
3173 if (compare_tree_int (oprnd1, 2) == 0)
3174 {
3175 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3176 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
3177 build_int_cst (itype, 1),
3178 build_int_cst (itype, 0));
079c527f
JJ
3179 append_pattern_def_seq (stmt_vinfo, def_stmt);
3180 }
3181 else
3182 {
3183 tree utype
3184 = build_nonstandard_integer_type (prec, 1);
7ed54790 3185 tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
079c527f 3186 tree shift
7a504f33 3187 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
079c527f
JJ
3188 - tree_log2 (oprnd1));
3189 tree var = vect_recog_temp_ssa_var (utype, NULL);
3190
0d0e4a03
JJ
3191 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
3192 build_int_cst (utype, -1),
3193 build_int_cst (utype, 0));
776bfcea 3194 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
079c527f 3195 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
3196 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
3197 gimple_assign_lhs (def_stmt),
3198 shift);
776bfcea 3199 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
079c527f
JJ
3200 signmask = vect_recog_temp_ssa_var (itype, NULL);
3201 def_stmt
0d0e4a03 3202 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
3203 append_pattern_def_seq (stmt_vinfo, def_stmt);
3204 }
3205 def_stmt
0d0e4a03
JJ
3206 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3207 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
3208 append_pattern_def_seq (stmt_vinfo, def_stmt);
3209 def_stmt
0d0e4a03
JJ
3210 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3211 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
3212 fold_build2 (MINUS_EXPR, itype, oprnd1,
3213 build_int_cst (itype, 1)));
079c527f
JJ
3214 append_pattern_def_seq (stmt_vinfo, def_stmt);
3215
3216 pattern_stmt
0d0e4a03
JJ
3217 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3218 MINUS_EXPR, gimple_assign_lhs (def_stmt),
3219 signmask);
079c527f
JJ
3220 }
3221
079c527f 3222 return pattern_stmt;
363477c0 3223 }
079c527f 3224
6b58915b
RS
3225 if (prec > HOST_BITS_PER_WIDE_INT
3226 || integer_zerop (oprnd1))
079c527f
JJ
3227 return NULL;
3228
00f07b86
RH
3229 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
3230 return NULL;
079c527f 3231
079c527f 3232 if (TYPE_UNSIGNED (itype))
363477c0 3233 {
079c527f
JJ
3234 unsigned HOST_WIDE_INT mh, ml;
3235 int pre_shift, post_shift;
6b58915b 3236 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
7a504f33 3237 & GET_MODE_MASK (itype_mode));
5deb57cb 3238 tree t1, t2, t3, t4;
079c527f 3239
fecfbfa4 3240 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
079c527f
JJ
3241 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
3242 return NULL;
3243
3244 /* Find a suitable multiplier and right shift count
3245 instead of multiplying with D. */
3246 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
3247
3248 /* If the suggested multiplier is more than SIZE bits, we can do better
3249 for even divisors, using an initial right shift. */
3250 if (mh != 0 && (d & 1) == 0)
363477c0 3251 {
146ec50f 3252 pre_shift = ctz_or_zero (d);
079c527f
JJ
3253 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
3254 &ml, &post_shift, &dummy_int);
3255 gcc_assert (!mh);
3256 }
3257 else
3258 pre_shift = 0;
3259
3260 if (mh != 0)
3261 {
3262 if (post_shift - 1 >= prec)
3263 return NULL;
3264
5deb57cb
JJ
3265 /* t1 = oprnd0 h* ml;
3266 t2 = oprnd0 - t1;
3267 t3 = t2 >> 1;
3268 t4 = t1 + t3;
3269 q = t4 >> (post_shift - 1); */
3270 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3271 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3272 build_int_cst (itype, ml));
079c527f 3273 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 3274
5deb57cb 3275 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 3276 def_stmt
0d0e4a03 3277 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 3278 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
3279
3280 t3 = vect_recog_temp_ssa_var (itype, NULL);
3281 def_stmt
0d0e4a03 3282 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
3283 append_pattern_def_seq (stmt_vinfo, def_stmt);
3284
5deb57cb 3285 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 3286 def_stmt
0d0e4a03 3287 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
3288
3289 if (post_shift != 1)
3290 {
3291 append_pattern_def_seq (stmt_vinfo, def_stmt);
3292
5deb57cb 3293 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 3294 pattern_stmt
0d0e4a03
JJ
3295 = gimple_build_assign (q, RSHIFT_EXPR, t4,
3296 build_int_cst (itype, post_shift - 1));
079c527f
JJ
3297 }
3298 else
3299 {
5deb57cb 3300 q = t4;
079c527f
JJ
3301 pattern_stmt = def_stmt;
3302 }
363477c0
JJ
3303 }
3304 else
3305 {
079c527f
JJ
3306 if (pre_shift >= prec || post_shift >= prec)
3307 return NULL;
3308
3309 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
3310 t2 = t1 h* ml;
3311 q = t2 >> post_shift; */
079c527f
JJ
3312 if (pre_shift)
3313 {
3314 t1 = vect_recog_temp_ssa_var (itype, NULL);
3315 def_stmt
0d0e4a03
JJ
3316 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
3317 build_int_cst (NULL, pre_shift));
079c527f
JJ
3318 append_pattern_def_seq (stmt_vinfo, def_stmt);
3319 }
3320 else
3321 t1 = oprnd0;
363477c0 3322
5deb57cb 3323 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3324 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
3325 build_int_cst (itype, ml));
079c527f 3326
5deb57cb
JJ
3327 if (post_shift)
3328 {
3329 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 3330
5deb57cb
JJ
3331 q = vect_recog_temp_ssa_var (itype, NULL);
3332 def_stmt
0d0e4a03
JJ
3333 = gimple_build_assign (q, RSHIFT_EXPR, t2,
3334 build_int_cst (itype, post_shift));
5deb57cb
JJ
3335 }
3336 else
3337 q = t2;
3338
3339 pattern_stmt = def_stmt;
079c527f
JJ
3340 }
3341 }
3342 else
3343 {
3344 unsigned HOST_WIDE_INT ml;
4ee4c52c 3345 int post_shift;
6b58915b 3346 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
3347 unsigned HOST_WIDE_INT abs_d;
3348 bool add = false;
5deb57cb 3349 tree t1, t2, t3, t4;
079c527f
JJ
3350
3351 /* Give up for -1. */
3352 if (d == -1)
3353 return NULL;
3354
079c527f
JJ
3355 /* Since d might be INT_MIN, we have to cast to
3356 unsigned HOST_WIDE_INT before negating to avoid
3357 undefined signed overflow. */
3358 abs_d = (d >= 0
3359 ? (unsigned HOST_WIDE_INT) d
3360 : - (unsigned HOST_WIDE_INT) d);
3361
3362 /* n rem d = n rem -d */
3363 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
3364 {
3365 d = abs_d;
3366 oprnd1 = build_int_cst (itype, abs_d);
3367 }
3368 else if (HOST_BITS_PER_WIDE_INT >= prec
fecfbfa4 3369 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
3370 /* This case is not handled correctly below. */
3371 return NULL;
3372
4ee4c52c 3373 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
fecfbfa4 3374 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
3375 {
3376 add = true;
dd4786fe 3377 ml |= HOST_WIDE_INT_M1U << (prec - 1);
079c527f
JJ
3378 }
3379 if (post_shift >= prec)
3380 return NULL;
3381
7abed779 3382 /* t1 = oprnd0 h* ml; */
5deb57cb 3383 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3384 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
3385 build_int_cst (itype, ml));
079c527f
JJ
3386
3387 if (add)
3388 {
5deb57cb 3389 /* t2 = t1 + oprnd0; */
7abed779 3390 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 3391 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3392 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
3393 }
3394 else
5deb57cb 3395 t2 = t1;
079c527f 3396
5deb57cb 3397 if (post_shift)
079c527f 3398 {
5deb57cb 3399 /* t3 = t2 >> post_shift; */
7abed779 3400 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 3401 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3402 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3403 build_int_cst (itype, post_shift));
363477c0 3404 }
079c527f 3405 else
5deb57cb 3406 t3 = t2;
079c527f 3407
807e902e 3408 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
3409 int msb = 1;
3410 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3411 {
807e902e 3412 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 3413 msb = 0;
807e902e 3414 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
3415 msb = -1;
3416 }
079c527f 3417
7abed779
JJ
3418 if (msb == 0 && d >= 0)
3419 {
3420 /* q = t3; */
3421 q = t3;
3422 pattern_stmt = def_stmt;
3423 }
3424 else
3425 {
3426 /* t4 = oprnd0 >> (prec - 1);
3427 or if we know from VRP that oprnd0 >= 0
3428 t4 = 0;
3429 or if we know from VRP that oprnd0 < 0
3430 t4 = -1; */
3431 append_pattern_def_seq (stmt_vinfo, def_stmt);
3432 t4 = vect_recog_temp_ssa_var (itype, NULL);
3433 if (msb != 1)
0d0e4a03
JJ
3434 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3435 build_int_cst (itype, msb));
7abed779 3436 else
0d0e4a03
JJ
3437 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3438 build_int_cst (itype, prec - 1));
7abed779
JJ
3439 append_pattern_def_seq (stmt_vinfo, def_stmt);
3440
3441 /* q = t3 - t4; or q = t4 - t3; */
3442 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3443 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3444 d < 0 ? t3 : t4);
7abed779 3445 }
079c527f
JJ
3446 }
3447
3448 if (rhs_code == TRUNC_MOD_EXPR)
3449 {
3450 tree r, t1;
3451
3452 /* We divided. Now finish by:
3453 t1 = q * oprnd1;
3454 r = oprnd0 - t1; */
3455 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3456
3457 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3458 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 3459 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 3460
079c527f 3461 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3462 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
3463 }
3464
079c527f 3465 /* Pattern detected. */
49d8df1b 3466 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
363477c0 3467
363477c0
JJ
3468 *type_out = vectype;
3469 return pattern_stmt;
3470}
3471
69d2aade
JJ
3472/* Function vect_recog_mixed_size_cond_pattern
3473
3474 Try to find the following pattern:
3475
3476 type x_t, y_t;
3477 TYPE a_T, b_T, c_T;
3478 loop:
3479 S1 a_T = x_t CMP y_t ? b_T : c_T;
3480
3481 where type 'TYPE' is an integral type which has different size
bc4fb355 3482 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 3483 than 'type', the constants need to fit into an integer type
bc4fb355 3484 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
3485
3486 Input:
3487
ba9728b0 3488 * STMT_VINFO: The stmt from which the pattern search begins.
69d2aade
JJ
3489
3490 Output:
3491
69d2aade
JJ
3492 * TYPE_OUT: The type of the output of this pattern.
3493
3494 * Return value: A new stmt that will be used to replace the pattern.
3495 Additionally a def_stmt is added.
3496
3497 a_it = x_t CMP y_t ? b_it : c_it;
3498 a_T = (TYPE) a_it; */
3499
355fe088 3500static gimple *
ba9728b0 3501vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
69d2aade 3502{
7ed54790 3503 vec_info *vinfo = stmt_vinfo->vinfo;
ba9728b0 3504 gimple *last_stmt = stmt_vinfo->stmt;
69d2aade 3505 tree cond_expr, then_clause, else_clause;
bc4fb355 3506 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
355fe088 3507 gimple *pattern_stmt, *def_stmt;
bc4fb355 3508 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
355fe088 3509 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
bc4fb355
IR
3510 bool promotion;
3511 tree comp_scalar_type;
69d2aade
JJ
3512
3513 if (!is_gimple_assign (last_stmt)
3514 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3515 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3516 return NULL;
3517
3518 cond_expr = gimple_assign_rhs1 (last_stmt);
3519 then_clause = gimple_assign_rhs2 (last_stmt);
3520 else_clause = gimple_assign_rhs3 (last_stmt);
3521
87aab9b2
JJ
3522 if (!COMPARISON_CLASS_P (cond_expr))
3523 return NULL;
3524
bc4fb355 3525 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
7ed54790 3526 comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
87aab9b2 3527 if (comp_vectype == NULL_TREE)
69d2aade
JJ
3528 return NULL;
3529
3530 type = gimple_expr_type (last_stmt);
bc4fb355
IR
3531 if (types_compatible_p (type, comp_scalar_type)
3532 || ((TREE_CODE (then_clause) != INTEGER_CST
3533 || TREE_CODE (else_clause) != INTEGER_CST)
3534 && !INTEGRAL_TYPE_P (comp_scalar_type))
3535 || !INTEGRAL_TYPE_P (type))
3536 return NULL;
3537
3538 if ((TREE_CODE (then_clause) != INTEGER_CST
86a91c0a
RS
3539 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3540 &def_stmt0, &promotion))
bc4fb355 3541 || (TREE_CODE (else_clause) != INTEGER_CST
86a91c0a
RS
3542 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3543 &def_stmt1, &promotion)))
bc4fb355
IR
3544 return NULL;
3545
3546 if (orig_type0 && orig_type1
3547 && !types_compatible_p (orig_type0, orig_type1))
3548 return NULL;
3549
3550 if (orig_type0)
3551 {
3552 if (!types_compatible_p (orig_type0, comp_scalar_type))
3553 return NULL;
3554 then_clause = gimple_assign_rhs1 (def_stmt0);
3555 itype = orig_type0;
3556 }
3557
3558 if (orig_type1)
3559 {
3560 if (!types_compatible_p (orig_type1, comp_scalar_type))
3561 return NULL;
3562 else_clause = gimple_assign_rhs1 (def_stmt1);
3563 itype = orig_type1;
3564 }
3565
69d2aade 3566
6c825cd4
DS
3567 HOST_WIDE_INT cmp_mode_size
3568 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3569
7a504f33
RS
3570 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3571 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
69d2aade
JJ
3572 return NULL;
3573
7ed54790 3574 vectype = get_vectype_for_scalar_type (vinfo, type);
69d2aade
JJ
3575 if (vectype == NULL_TREE)
3576 return NULL;
3577
96592eed 3578 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3579 return NULL;
3580
bc4fb355 3581 if (itype == NULL_TREE)
6c825cd4 3582 itype = build_nonstandard_integer_type (cmp_mode_size,
bc4fb355
IR
3583 TYPE_UNSIGNED (type));
3584
69d2aade 3585 if (itype == NULL_TREE
b397965c 3586 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
69d2aade
JJ
3587 return NULL;
3588
7ed54790 3589 vecitype = get_vectype_for_scalar_type (vinfo, itype);
69d2aade
JJ
3590 if (vecitype == NULL_TREE)
3591 return NULL;
3592
96592eed 3593 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3594 return NULL;
3595
7a504f33 3596 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
69d2aade 3597 {
bc4fb355
IR
3598 if ((TREE_CODE (then_clause) == INTEGER_CST
3599 && !int_fits_type_p (then_clause, itype))
3600 || (TREE_CODE (else_clause) == INTEGER_CST
3601 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
3602 return NULL;
3603 }
3604
0d0e4a03
JJ
3605 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3606 COND_EXPR, unshare_expr (cond_expr),
3607 fold_convert (itype, then_clause),
3608 fold_convert (itype, else_clause));
3609 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3610 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 3611
776bfcea 3612 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
69d2aade
JJ
3613 *type_out = vectype;
3614
49d8df1b 3615 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
f5709183 3616
69d2aade
JJ
3617 return pattern_stmt;
3618}
3619
3620
71c92d17 3621/* Helper function of vect_recog_bool_pattern. Called recursively, return
42fd8198
IE
3622 true if bool VAR can and should be optimized that way. Assume it shouldn't
3623 in case it's a result of a comparison which can be directly vectorized into
fa2c9034
RB
3624 a vector comparison. Fills in STMTS with all stmts visited during the
3625 walk. */
71c92d17
JJ
3626
3627static bool
fa2c9034 3628check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
71c92d17 3629{
81c40241 3630 tree rhs1;
71c92d17
JJ
3631 enum tree_code rhs_code;
3632
25927307
RS
3633 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3634 if (!def_stmt_info)
71c92d17
JJ
3635 return false;
3636
25927307
RS
3637 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3638 if (!def_stmt)
71c92d17
JJ
3639 return false;
3640
fa2c9034
RB
3641 if (stmts.contains (def_stmt))
3642 return true;
71c92d17
JJ
3643
3644 rhs1 = gimple_assign_rhs1 (def_stmt);
3645 rhs_code = gimple_assign_rhs_code (def_stmt);
3646 switch (rhs_code)
3647 {
3648 case SSA_NAME:
fa2c9034
RB
3649 if (! check_bool_pattern (rhs1, vinfo, stmts))
3650 return false;
3651 break;
71c92d17
JJ
3652
3653 CASE_CONVERT:
2568d8a1 3654 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
71c92d17 3655 return false;
fa2c9034
RB
3656 if (! check_bool_pattern (rhs1, vinfo, stmts))
3657 return false;
3658 break;
71c92d17
JJ
3659
3660 case BIT_NOT_EXPR:
fa2c9034
RB
3661 if (! check_bool_pattern (rhs1, vinfo, stmts))
3662 return false;
3663 break;
71c92d17
JJ
3664
3665 case BIT_AND_EXPR:
3666 case BIT_IOR_EXPR:
3667 case BIT_XOR_EXPR:
fa2c9034
RB
3668 if (! check_bool_pattern (rhs1, vinfo, stmts)
3669 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
71c92d17 3670 return false;
fa2c9034 3671 break;
71c92d17
JJ
3672
3673 default:
3674 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3675 {
fa2c9034 3676 tree vecitype, comp_vectype;
71c92d17 3677
2f326699
JJ
3678 /* If the comparison can throw, then is_gimple_condexpr will be
3679 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
36bbc05d 3680 if (stmt_could_throw_p (cfun, def_stmt))
2f326699
JJ
3681 return false;
3682
7ed54790 3683 comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
71c92d17
JJ
3684 if (comp_vectype == NULL_TREE)
3685 return false;
3686
1bd5196c
RS
3687 tree mask_type = get_mask_type_for_scalar_type (vinfo,
3688 TREE_TYPE (rhs1));
42fd8198 3689 if (mask_type
96592eed 3690 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
42fd8198
IE
3691 return false;
3692
71c92d17
JJ
3693 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3694 {
b397965c 3695 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3696 tree itype
ab0ef706 3697 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
7ed54790 3698 vecitype = get_vectype_for_scalar_type (vinfo, itype);
71c92d17
JJ
3699 if (vecitype == NULL_TREE)
3700 return false;
3701 }
3702 else
3703 vecitype = comp_vectype;
96592eed 3704 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
fa2c9034 3705 return false;
71c92d17 3706 }
fa2c9034
RB
3707 else
3708 return false;
3709 break;
71c92d17 3710 }
fa2c9034
RB
3711
3712 bool res = stmts.add (def_stmt);
3713 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3714 gcc_assert (!res);
3715
3716 return true;
71c92d17
JJ
3717}
3718
3719
3720/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
fa2c9034
RB
3721 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3722 pattern sequence. */
71c92d17
JJ
3723
3724static tree
fa2c9034 3725adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
71c92d17 3726{
7ed54790 3727 vec_info *vinfo = stmt_info->vinfo;
fa2c9034
RB
3728 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3729 NOP_EXPR, var);
776bfcea 3730 append_pattern_def_seq (stmt_info, cast_stmt,
7ed54790 3731 get_vectype_for_scalar_type (vinfo, type));
71c92d17
JJ
3732 return gimple_assign_lhs (cast_stmt);
3733}
3734
fa2c9034
RB
3735/* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3736 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3737 type, OUT_TYPE is the desired final integer type of the whole pattern.
3738 STMT_INFO is the info of the pattern root and is where pattern stmts should
3739 be associated with. DEFS is a map of pattern defs. */
71c92d17 3740
fa2c9034
RB
3741static void
3742adjust_bool_pattern (tree var, tree out_type,
3743 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
71c92d17 3744{
7ed54790 3745 vec_info *vinfo = stmt_info->vinfo;
355fe088 3746 gimple *stmt = SSA_NAME_DEF_STMT (var);
71c92d17
JJ
3747 enum tree_code rhs_code, def_rhs_code;
3748 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3749 location_t loc;
355fe088 3750 gimple *pattern_stmt, *def_stmt;
fa2c9034 3751 tree trueval = NULL_TREE;
71c92d17
JJ
3752
3753 rhs1 = gimple_assign_rhs1 (stmt);
3754 rhs2 = gimple_assign_rhs2 (stmt);
3755 rhs_code = gimple_assign_rhs_code (stmt);
3756 loc = gimple_location (stmt);
3757 switch (rhs_code)
3758 {
3759 case SSA_NAME:
3760 CASE_CONVERT:
fa2c9034 3761 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3762 itype = TREE_TYPE (irhs1);
3763 pattern_stmt
0d0e4a03
JJ
3764 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3765 SSA_NAME, irhs1);
71c92d17
JJ
3766 break;
3767
3768 case BIT_NOT_EXPR:
fa2c9034 3769 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3770 itype = TREE_TYPE (irhs1);
3771 pattern_stmt
0d0e4a03
JJ
3772 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3773 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
3774 break;
3775
3776 case BIT_AND_EXPR:
3777 /* Try to optimize x = y & (a < b ? 1 : 0); into
3778 x = (a < b ? y : 0);
3779
3780 E.g. for:
3781 bool a_b, b_b, c_b;
3782 TYPE d_T;
3783
3784 S1 a_b = x1 CMP1 y1;
3785 S2 b_b = x2 CMP2 y2;
3786 S3 c_b = a_b & b_b;
3787 S4 d_T = (TYPE) c_b;
3788
3789 we would normally emit:
3790
3791 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3792 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3793 S3' c_T = a_T & b_T;
3794 S4' d_T = c_T;
3795
3796 but we can save one stmt by using the
3797 result of one of the COND_EXPRs in the other COND_EXPR and leave
3798 BIT_AND_EXPR stmt out:
3799
3800 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3801 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3802 S4' f_T = c_T;
3803
3804 At least when VEC_COND_EXPR is implemented using masks
3805 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3806 computes the comparison masks and ands it, in one case with
3807 all ones vector, in the other case with a vector register.
3808 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3809 often more expensive. */
3810 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3811 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3812 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3813 {
fa2c9034 3814 irhs1 = *defs.get (rhs1);
71c92d17 3815 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17 3816 if (TYPE_PRECISION (TREE_TYPE (irhs1))
b397965c 3817 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
71c92d17 3818 {
fa2c9034
RB
3819 rhs_code = def_rhs_code;
3820 rhs1 = def_rhs1;
3821 rhs2 = gimple_assign_rhs2 (def_stmt);
3822 trueval = irhs1;
3823 goto do_compare;
71c92d17
JJ
3824 }
3825 else
fa2c9034 3826 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3827 goto and_ior_xor;
3828 }
3829 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3830 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3831 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3832 {
fa2c9034 3833 irhs2 = *defs.get (rhs2);
71c92d17 3834 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17 3835 if (TYPE_PRECISION (TREE_TYPE (irhs2))
b397965c 3836 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
71c92d17 3837 {
fa2c9034
RB
3838 rhs_code = def_rhs_code;
3839 rhs1 = def_rhs1;
3840 rhs2 = gimple_assign_rhs2 (def_stmt);
3841 trueval = irhs2;
3842 goto do_compare;
71c92d17
JJ
3843 }
3844 else
fa2c9034 3845 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3846 goto and_ior_xor;
3847 }
3848 /* FALLTHRU */
3849 case BIT_IOR_EXPR:
3850 case BIT_XOR_EXPR:
fa2c9034
RB
3851 irhs1 = *defs.get (rhs1);
3852 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3853 and_ior_xor:
3854 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3855 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3856 {
3857 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3858 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3859 int out_prec = TYPE_PRECISION (out_type);
3860 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
fa2c9034
RB
3861 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3862 stmt_info);
71c92d17 3863 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
fa2c9034
RB
3864 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3865 stmt_info);
71c92d17
JJ
3866 else
3867 {
fa2c9034
RB
3868 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3869 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
71c92d17
JJ
3870 }
3871 }
3872 itype = TREE_TYPE (irhs1);
3873 pattern_stmt
0d0e4a03
JJ
3874 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3875 rhs_code, irhs1, irhs2);
71c92d17
JJ
3876 break;
3877
3878 default:
fa2c9034 3879 do_compare:
71c92d17
JJ
3880 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3881 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2 3882 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
73a699ae
RS
3883 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3884 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3885 {
b397965c 3886 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3887 itype
ab0ef706 3888 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3889 }
3890 else
3891 itype = TREE_TYPE (rhs1);
3892 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3893 if (trueval == NULL_TREE)
3894 trueval = build_int_cst (itype, 1);
3895 else
3896 gcc_checking_assert (useless_type_conversion_p (itype,
3897 TREE_TYPE (trueval)));
3898 pattern_stmt
0d0e4a03
JJ
3899 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3900 COND_EXPR, cond_expr, trueval,
3901 build_int_cst (itype, 0));
71c92d17
JJ
3902 break;
3903 }
3904
71c92d17 3905 gimple_set_location (pattern_stmt, loc);
776bfcea 3906 append_pattern_def_seq (stmt_info, pattern_stmt,
7ed54790 3907 get_vectype_for_scalar_type (vinfo, itype));
fa2c9034
RB
3908 defs.put (var, gimple_assign_lhs (pattern_stmt));
3909}
3910
3911/* Comparison function to qsort a vector of gimple stmts after UID. */
3912
3913static int
3914sort_after_uid (const void *p1, const void *p2)
3915{
3916 const gimple *stmt1 = *(const gimple * const *)p1;
3917 const gimple *stmt2 = *(const gimple * const *)p2;
3918 return gimple_uid (stmt1) - gimple_uid (stmt2);
71c92d17
JJ
3919}
3920
fa2c9034 3921/* Create pattern stmts for all stmts participating in the bool pattern
32e8e429 3922 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
fa2c9034
RB
3923 OUT_TYPE. Return the def of the pattern root. */
3924
3925static tree
3926adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
32e8e429 3927 tree out_type, stmt_vec_info stmt_info)
fa2c9034
RB
3928{
3929 /* Gather original stmts in the bool pattern in their order of appearance
3930 in the IL. */
3931 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3932 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3933 i != bool_stmt_set.end (); ++i)
3934 bool_stmts.quick_push (*i);
3935 bool_stmts.qsort (sort_after_uid);
3936
3937 /* Now process them in that order, producing pattern stmts. */
3938 hash_map <tree, tree> defs;
3939 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3940 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
91987857 3941 out_type, stmt_info, defs);
fa2c9034
RB
3942
3943 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3944 gimple *pattern_stmt
91987857 3945 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
fa2c9034
RB
3946 return gimple_assign_lhs (pattern_stmt);
3947}
71c92d17 3948
0c3ea6b3
RS
3949/* Return the proper type for converting bool VAR into
3950 an integer value or NULL_TREE if no such type exists.
3951 The type is chosen so that the converted value has the
3952 same number of elements as VAR's vector type. */
42fd8198
IE
3953
3954static tree
0c3ea6b3 3955integer_type_for_mask (tree var, vec_info *vinfo)
42fd8198 3956{
2568d8a1 3957 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
42fd8198
IE
3958 return NULL_TREE;
3959
25927307 3960 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
0c3ea6b3 3961 if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
42fd8198
IE
3962 return NULL_TREE;
3963
0c3ea6b3 3964 return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
5116b156 3965}
42fd8198 3966
71c92d17
JJ
3967/* Function vect_recog_bool_pattern
3968
3969 Try to find pattern like following:
3970
3971 bool a_b, b_b, c_b, d_b, e_b;
3972 TYPE f_T;
3973 loop:
3974 S1 a_b = x1 CMP1 y1;
3975 S2 b_b = x2 CMP2 y2;
3976 S3 c_b = a_b & b_b;
3977 S4 d_b = x3 CMP3 y3;
3978 S5 e_b = c_b | d_b;
3979 S6 f_T = (TYPE) e_b;
3980
52264dbf
RB
3981 where type 'TYPE' is an integral type. Or a similar pattern
3982 ending in
3983
3984 S6 f_Y = e_b ? r_Y : s_Y;
3985
3986 as results from if-conversion of a complex condition.
71c92d17
JJ
3987
3988 Input:
3989
ba9728b0
RS
3990 * STMT_VINFO: The stmt at the end from which the pattern
3991 search begins, i.e. cast of a bool to
3992 an integer type.
71c92d17
JJ
3993
3994 Output:
3995
71c92d17
JJ
3996 * TYPE_OUT: The type of the output of this pattern.
3997
3998 * Return value: A new stmt that will be used to replace the pattern.
3999
4000 Assuming size of TYPE is the same as size of all comparisons
4001 (otherwise some casts would be added where needed), the above
4002 sequence we create related pattern stmts:
4003 S1' a_T = x1 CMP1 y1 ? 1 : 0;
4004 S3' c_T = x2 CMP2 y2 ? a_T : 0;
4005 S4' d_T = x3 CMP3 y3 ? 1 : 0;
4006 S5' e_T = c_T | d_T;
4007 S6' f_T = e_T;
4008
4009 Instead of the above S3' we could emit:
4010 S2' b_T = x2 CMP2 y2 ? 1 : 0;
4011 S3' c_T = a_T | b_T;
4012 but the above is more efficient. */
4013
355fe088 4014static gimple *
ba9728b0 4015vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
71c92d17 4016{
ba9728b0 4017 gimple *last_stmt = stmt_vinfo->stmt;
71c92d17
JJ
4018 enum tree_code rhs_code;
4019 tree var, lhs, rhs, vectype;
310213d4 4020 vec_info *vinfo = stmt_vinfo->vinfo;
355fe088 4021 gimple *pattern_stmt;
71c92d17
JJ
4022
4023 if (!is_gimple_assign (last_stmt))
4024 return NULL;
4025
4026 var = gimple_assign_rhs1 (last_stmt);
4027 lhs = gimple_assign_lhs (last_stmt);
4028
2568d8a1 4029 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
71c92d17
JJ
4030 return NULL;
4031
fa2c9034
RB
4032 hash_set<gimple *> bool_stmts;
4033
71c92d17
JJ
4034 rhs_code = gimple_assign_rhs_code (last_stmt);
4035 if (CONVERT_EXPR_CODE_P (rhs_code))
4036 {
d362ac6c 4037 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
78048b1c 4038 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17 4039 return NULL;
7ed54790 4040 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
71c92d17
JJ
4041 if (vectype == NULL_TREE)
4042 return NULL;
4043
fa2c9034 4044 if (check_bool_pattern (var, vinfo, bool_stmts))
42fd8198 4045 {
86a91c0a 4046 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
42fd8198
IE
4047 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4048 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4049 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4050 else
4051 pattern_stmt
4052 = gimple_build_assign (lhs, NOP_EXPR, rhs);
4053 }
71c92d17 4054 else
42fd8198 4055 {
0c3ea6b3 4056 tree type = integer_type_for_mask (var, vinfo);
a414c77f 4057 tree cst0, cst1, tmp;
42fd8198
IE
4058
4059 if (!type)
4060 return NULL;
4061
4062 /* We may directly use cond with narrowed type to avoid
4063 multiple cond exprs with following result packing and
4064 perform single cond with packed mask instead. In case
4065 of widening we better make cond first and then extract
4066 results. */
4067 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
4068 type = TREE_TYPE (lhs);
4069
4070 cst0 = build_int_cst (type, 0);
4071 cst1 = build_int_cst (type, 1);
4072 tmp = vect_recog_temp_ssa_var (type, NULL);
a414c77f 4073 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
42fd8198
IE
4074
4075 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
4076 {
7ed54790 4077 tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
776bfcea 4078 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
42fd8198
IE
4079
4080 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4081 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
4082 }
4083 }
4084
71c92d17 4085 *type_out = vectype;
49d8df1b 4086 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
f5709183 4087
52264dbf
RB
4088 return pattern_stmt;
4089 }
4090 else if (rhs_code == COND_EXPR
4091 && TREE_CODE (var) == SSA_NAME)
4092 {
7ed54790 4093 vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
52264dbf
RB
4094 if (vectype == NULL_TREE)
4095 return NULL;
4096
4097 /* Build a scalar type for the boolean result that when
4098 vectorized matches the vector type of the result in
4099 size and number of elements. */
4100 unsigned prec
928686b1
RS
4101 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
4102 TYPE_VECTOR_SUBPARTS (vectype));
4103
52264dbf
RB
4104 tree type
4105 = build_nonstandard_integer_type (prec,
4106 TYPE_UNSIGNED (TREE_TYPE (var)));
7ed54790 4107 if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
52264dbf
RB
4108 return NULL;
4109
fa2c9034 4110 if (!check_bool_pattern (var, vinfo, bool_stmts))
a414c77f
IE
4111 return NULL;
4112
86a91c0a 4113 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
52264dbf 4114
52264dbf
RB
4115 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4116 pattern_stmt
a414c77f
IE
4117 = gimple_build_assign (lhs, COND_EXPR,
4118 build2 (NE_EXPR, boolean_type_node,
4119 rhs, build_int_cst (type, 0)),
0d0e4a03
JJ
4120 gimple_assign_rhs2 (last_stmt),
4121 gimple_assign_rhs3 (last_stmt));
52264dbf 4122 *type_out = vectype;
49d8df1b 4123 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
52264dbf 4124
71c92d17
JJ
4125 return pattern_stmt;
4126 }
ab0ef706
JJ
4127 else if (rhs_code == SSA_NAME
4128 && STMT_VINFO_DATA_REF (stmt_vinfo))
4129 {
4130 stmt_vec_info pattern_stmt_info;
9b75f56d
RS
4131 tree nunits_vectype;
4132 if (!vect_get_vector_types_for_stmt (stmt_vinfo, &vectype,
4133 &nunits_vectype)
4134 || !VECTOR_MODE_P (TYPE_MODE (vectype)))
78336739 4135 return NULL;
ab0ef706 4136
fa2c9034 4137 if (check_bool_pattern (var, vinfo, bool_stmts))
86a91c0a 4138 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
42fd8198
IE
4139 else
4140 {
0c3ea6b3 4141 tree type = integer_type_for_mask (var, vinfo);
a414c77f 4142 tree cst0, cst1, new_vectype;
42fd8198
IE
4143
4144 if (!type)
4145 return NULL;
4146
4147 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
4148 type = TREE_TYPE (vectype);
4149
4150 cst0 = build_int_cst (type, 0);
4151 cst1 = build_int_cst (type, 1);
7ed54790 4152 new_vectype = get_vectype_for_scalar_type (vinfo, type);
42fd8198
IE
4153
4154 rhs = vect_recog_temp_ssa_var (type, NULL);
a414c77f 4155 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
776bfcea 4156 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
42fd8198
IE
4157 }
4158
ab0ef706
JJ
4159 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
4160 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
4161 {
4162 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
355fe088 4163 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
42fd8198 4164 append_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
4165 rhs = rhs2;
4166 }
0d0e4a03 4167 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4fbeb363 4168 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
f44fb7aa 4169 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
ab0ef706 4170 *type_out = vectype;
49d8df1b
RS
4171 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4172
ab0ef706
JJ
4173 return pattern_stmt;
4174 }
71c92d17
JJ
4175 else
4176 return NULL;
4177}
4178
4179
e6f5c25d
IE
4180/* A helper for vect_recog_mask_conversion_pattern. Build
4181 conversion of MASK to a type suitable for masking VECTYPE.
4182 Built statement gets required vectype and is appended to
4183 a pattern sequence of STMT_VINFO.
4184
4185 Return converted mask. */
4186
4187static tree
776bfcea 4188build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
e6f5c25d
IE
4189{
4190 gimple *stmt;
4191 tree masktype, tmp;
e6f5c25d 4192
e8738f4e 4193 masktype = truth_type_for (vectype);
e6f5c25d
IE
4194 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
4195 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
0c3ea6b3 4196 append_pattern_def_seq (stmt_vinfo, stmt, masktype, TREE_TYPE (vectype));
e6f5c25d
IE
4197
4198 return tmp;
4199}
4200
4201
4202/* Function vect_recog_mask_conversion_pattern
4203
4204 Try to find statements which require boolean type
4205 converison. Additional conversion statements are
4206 added to handle such cases. For example:
4207
4208 bool m_1, m_2, m_3;
4209 int i_4, i_5;
4210 double d_6, d_7;
4211 char c_1, c_2, c_3;
4212
4213 S1 m_1 = i_4 > i_5;
4214 S2 m_2 = d_6 < d_7;
4215 S3 m_3 = m_1 & m_2;
4216 S4 c_1 = m_3 ? c_2 : c_3;
4217
4218 Will be transformed into:
4219
4220 S1 m_1 = i_4 > i_5;
4221 S2 m_2 = d_6 < d_7;
4222 S3'' m_2' = (_Bool[bitsize=32])m_2
4223 S3' m_3' = m_1 & m_2';
4224 S4'' m_3'' = (_Bool[bitsize=8])m_3'
4225 S4' c_1' = m_3'' ? c_2 : c_3; */
4226
4227static gimple *
ba9728b0 4228vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
e6f5c25d 4229{
ba9728b0 4230 gimple *last_stmt = stmt_vinfo->stmt;
e6f5c25d 4231 enum tree_code rhs_code;
310aba3b
ML
4232 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
4233 tree vectype1, vectype2;
e6f5c25d
IE
4234 stmt_vec_info pattern_stmt_info;
4235 vec_info *vinfo = stmt_vinfo->vinfo;
e6f5c25d
IE
4236
4237 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
4238 if (is_gimple_call (last_stmt)
2c58d42c 4239 && gimple_call_internal_p (last_stmt))
e6f5c25d 4240 {
a844293d 4241 gcall *pattern_stmt;
e6f5c25d 4242
2c58d42c
RS
4243 internal_fn ifn = gimple_call_internal_fn (last_stmt);
4244 int mask_argno = internal_fn_mask_index (ifn);
4245 if (mask_argno < 0)
4246 return NULL;
4247
4248 bool store_p = internal_store_fn_p (ifn);
4249 if (store_p)
e6f5c25d 4250 {
2c58d42c
RS
4251 int rhs_index = internal_fn_stored_value_index (ifn);
4252 tree rhs = gimple_call_arg (last_stmt, rhs_index);
7ed54790 4253 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
e6f5c25d
IE
4254 }
4255 else
4256 {
2c58d42c 4257 lhs = gimple_call_lhs (last_stmt);
7ed54790 4258 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
e6f5c25d
IE
4259 }
4260
2c58d42c 4261 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
0c3ea6b3 4262 tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
2c58d42c 4263 if (!mask_arg_type)
e6f5c25d 4264 return NULL;
1bd5196c 4265 vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
e6f5c25d
IE
4266
4267 if (!vectype1 || !vectype2
928686b1
RS
4268 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4269 TYPE_VECTOR_SUBPARTS (vectype2)))
e6f5c25d
IE
4270 return NULL;
4271
2c58d42c
RS
4272 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
4273
4274 auto_vec<tree, 8> args;
4275 unsigned int nargs = gimple_call_num_args (last_stmt);
4276 args.safe_grow (nargs);
4277 for (unsigned int i = 0; i < nargs; ++i)
4278 args[i] = ((int) i == mask_argno
4279 ? tmp
4280 : gimple_call_arg (last_stmt, i));
4281 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
e6f5c25d 4282
2c58d42c 4283 if (!store_p)
e6f5c25d
IE
4284 {
4285 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
e6f5c25d
IE
4286 gimple_call_set_lhs (pattern_stmt, lhs);
4287 }
a844293d 4288 gimple_call_set_nothrow (pattern_stmt, true);
e6f5c25d 4289
4fbeb363 4290 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
2c58d42c 4291 if (STMT_VINFO_DATA_REF (stmt_vinfo))
f44fb7aa 4292 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
e6f5c25d
IE
4293
4294 *type_out = vectype1;
49d8df1b 4295 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
4296
4297 return pattern_stmt;
4298 }
4299
4300 if (!is_gimple_assign (last_stmt))
4301 return NULL;
4302
a844293d 4303 gimple *pattern_stmt;
e6f5c25d
IE
4304 lhs = gimple_assign_lhs (last_stmt);
4305 rhs1 = gimple_assign_rhs1 (last_stmt);
4306 rhs_code = gimple_assign_rhs_code (last_stmt);
4307
4308 /* Check for cond expression requiring mask conversion. */
4309 if (rhs_code == COND_EXPR)
4310 {
7ed54790 4311 vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
e6f5c25d
IE
4312
4313 if (TREE_CODE (rhs1) == SSA_NAME)
4314 {
0c3ea6b3 4315 rhs1_type = integer_type_for_mask (rhs1, vinfo);
e6f5c25d
IE
4316 if (!rhs1_type)
4317 return NULL;
4318 }
dea60b59 4319 else if (COMPARISON_CLASS_P (rhs1))
6a3c127c
RS
4320 {
4321 /* Check whether we're comparing scalar booleans and (if so)
4322 whether a better mask type exists than the mask associated
4323 with boolean-sized elements. This avoids unnecessary packs
4324 and unpacks if the booleans are set from comparisons of
4325 wider types. E.g. in:
4326
4327 int x1, x2, x3, x4, y1, y1;
4328 ...
4329 bool b1 = (x1 == x2);
4330 bool b2 = (x3 == x4);
4331 ... = b1 == b2 ? y1 : y2;
4332
4333 it is better for b1 and b2 to use the mask type associated
4334 with int elements rather bool (byte) elements. */
0c3ea6b3 4335 rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
6a3c127c
RS
4336 if (!rhs1_type)
4337 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4338 }
dea60b59
JJ
4339 else
4340 return NULL;
e6f5c25d 4341
1bd5196c 4342 vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
e6f5c25d 4343
6a3c127c
RS
4344 if (!vectype1 || !vectype2)
4345 return NULL;
4346
4347 /* Continue if a conversion is needed. Also continue if we have
4348 a comparison whose vector type would normally be different from
4349 VECTYPE2 when considered in isolation. In that case we'll
4350 replace the comparison with an SSA name (so that we can record
4351 its vector type) and behave as though the comparison was an SSA
4352 name from the outset. */
4353 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4354 TYPE_VECTOR_SUBPARTS (vectype2))
4355 && (TREE_CODE (rhs1) == SSA_NAME
4356 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
e6f5c25d
IE
4357 return NULL;
4358
8da4c8d8
RB
4359 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4360 in place, we can handle it in vectorizable_condition. This avoids
4361 unnecessary promotion stmts and increased vectorization factor. */
4362 if (COMPARISON_CLASS_P (rhs1)
4363 && INTEGRAL_TYPE_P (rhs1_type)
928686b1
RS
4364 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4365 TYPE_VECTOR_SUBPARTS (vectype2)))
8da4c8d8 4366 {
8da4c8d8 4367 enum vect_def_type dt;
894dd753 4368 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
8da4c8d8 4369 && dt == vect_external_def
894dd753 4370 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
8da4c8d8
RB
4371 && (dt == vect_external_def
4372 || dt == vect_constant_def))
4373 {
4374 tree wide_scalar_type = build_nonstandard_integer_type
4375 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4376 TYPE_UNSIGNED (rhs1_type));
7ed54790
RS
4377 tree vectype3 = get_vectype_for_scalar_type (vinfo,
4378 wide_scalar_type);
8da4c8d8
RB
4379 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4380 return NULL;
4381 }
4382 }
4383
e6f5c25d
IE
4384 /* If rhs1 is a comparison we need to move it into a
4385 separate statement. */
4386 if (TREE_CODE (rhs1) != SSA_NAME)
4387 {
4388 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4389 pattern_stmt = gimple_build_assign (tmp, rhs1);
4390 rhs1 = tmp;
0c3ea6b3
RS
4391 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2,
4392 rhs1_type);
e6f5c25d
IE
4393 }
4394
6a3c127c
RS
4395 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4396 TYPE_VECTOR_SUBPARTS (vectype2)))
776bfcea 4397 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
6a3c127c
RS
4398 else
4399 tmp = rhs1;
e6f5c25d
IE
4400
4401 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4402 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4403 gimple_assign_rhs2 (last_stmt),
4404 gimple_assign_rhs3 (last_stmt));
4405
4406 *type_out = vectype1;
49d8df1b 4407 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
4408
4409 return pattern_stmt;
4410 }
4411
4412 /* Now check for binary boolean operations requiring conversion for
4413 one of operands. */
2568d8a1 4414 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
e6f5c25d
IE
4415 return NULL;
4416
4417 if (rhs_code != BIT_IOR_EXPR
4418 && rhs_code != BIT_XOR_EXPR
49e76ff1
IE
4419 && rhs_code != BIT_AND_EXPR
4420 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
e6f5c25d
IE
4421 return NULL;
4422
4423 rhs2 = gimple_assign_rhs2 (last_stmt);
4424
0c3ea6b3
RS
4425 rhs1_type = integer_type_for_mask (rhs1, vinfo);
4426 rhs2_type = integer_type_for_mask (rhs2, vinfo);
e6f5c25d
IE
4427
4428 if (!rhs1_type || !rhs2_type
4429 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4430 return NULL;
4431
4432 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4433 {
1bd5196c 4434 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
e6f5c25d
IE
4435 if (!vectype1)
4436 return NULL;
776bfcea 4437 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
e6f5c25d
IE
4438 }
4439 else
4440 {
1bd5196c 4441 vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
e6f5c25d
IE
4442 if (!vectype1)
4443 return NULL;
776bfcea 4444 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
e6f5c25d
IE
4445 }
4446
4447 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4448 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4449
4450 *type_out = vectype1;
49d8df1b 4451 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
4452
4453 return pattern_stmt;
4454}
4455
32e8e429 4456/* STMT_INFO is a load or store. If the load or store is conditional, return
bfaa08b7
RS
4457 the boolean condition under which it occurs, otherwise return null. */
4458
4459static tree
32e8e429 4460vect_get_load_store_mask (stmt_vec_info stmt_info)
bfaa08b7 4461{
32e8e429 4462 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
bfaa08b7
RS
4463 {
4464 gcc_assert (gimple_assign_single_p (def_assign));
4465 return NULL_TREE;
4466 }
4467
32e8e429 4468 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
bfaa08b7
RS
4469 {
4470 internal_fn ifn = gimple_call_internal_fn (def_call);
4471 int mask_index = internal_fn_mask_index (ifn);
4472 return gimple_call_arg (def_call, mask_index);
4473 }
4474
4475 gcc_unreachable ();
4476}
4477
bfaa08b7
RS
4478/* Return MASK if MASK is suitable for masking an operation on vectors
4479 of type VECTYPE, otherwise convert it into such a form and return
4480 the result. Associate any conversion statements with STMT_INFO's
4481 pattern. */
4482
4483static tree
4484vect_convert_mask_for_vectype (tree mask, tree vectype,
4485 stmt_vec_info stmt_info, vec_info *vinfo)
4486{
0c3ea6b3 4487 tree mask_type = integer_type_for_mask (mask, vinfo);
bfaa08b7
RS
4488 if (mask_type)
4489 {
1bd5196c 4490 tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
bfaa08b7
RS
4491 if (mask_vectype
4492 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4493 TYPE_VECTOR_SUBPARTS (mask_vectype)))
776bfcea 4494 mask = build_mask_conversion (mask, vectype, stmt_info);
bfaa08b7
RS
4495 }
4496 return mask;
4497}
4498
4499/* Return the equivalent of:
4500
4501 fold_convert (TYPE, VALUE)
4502
4503 with the expectation that the operation will be vectorized.
4504 If new statements are needed, add them as pattern statements
4505 to STMT_INFO. */
4506
4507static tree
776bfcea 4508vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
bfaa08b7
RS
4509{
4510 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4511 return value;
4512
7ed54790 4513 vec_info *vinfo = stmt_info->vinfo;
bfaa08b7
RS
4514 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4515 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
776bfcea 4516 append_pattern_def_seq (stmt_info, conversion,
7ed54790 4517 get_vectype_for_scalar_type (vinfo, type));
bfaa08b7
RS
4518 return new_value;
4519}
4520
ba9728b0 4521/* Try to convert STMT_INFO into a call to a gather load or scatter store
bfaa08b7 4522 internal function. Return the final statement on success and set
1cbfeccc 4523 *TYPE_OUT to the vector type being loaded or stored.
bfaa08b7
RS
4524
4525 This function only handles gathers and scatters that were recognized
4526 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4527
4528static gimple *
ba9728b0 4529vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
bfaa08b7
RS
4530{
4531 /* Currently we only support this for loop vectorization. */
bfaa08b7
RS
4532 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4533 if (!loop_vinfo)
4534 return NULL;
4535
4536 /* Make sure that we're looking at a gather load or scatter store. */
4537 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4538 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4539 return NULL;
4540
bfaa08b7
RS
4541 /* Get the boolean that controls whether the load or store happens.
4542 This is null if the operation is unconditional. */
86a91c0a 4543 tree mask = vect_get_load_store_mask (stmt_info);
bfaa08b7
RS
4544
4545 /* Make sure that the target supports an appropriate internal
4546 function for the gather/scatter operation. */
4547 gather_scatter_info gs_info;
86a91c0a 4548 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
bfaa08b7
RS
4549 || gs_info.decl)
4550 return NULL;
4551
4552 /* Convert the mask to the right form. */
7ed54790
RS
4553 tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
4554 gs_info.element_type);
bfaa08b7 4555 if (mask)
ba9728b0 4556 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
bfaa08b7
RS
4557 loop_vinfo);
4558
4559 /* Get the invariant base and non-invariant offset, converting the
4560 latter to the same width as the vector elements. */
4561 tree base = gs_info.base;
09eb042a 4562 tree offset_type = TREE_TYPE (gs_info.offset_vectype);
776bfcea
RS
4563 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4564 stmt_info);
bfaa08b7
RS
4565
4566 /* Build the new pattern statement. */
4567 tree scale = size_int (gs_info.scale);
4568 gcall *pattern_stmt;
4569 if (DR_IS_READ (dr))
4570 {
09eb042a 4571 tree zero = build_zero_cst (gs_info.element_type);
bfaa08b7 4572 if (mask != NULL)
09eb042a
RS
4573 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 5, base,
4574 offset, scale, zero, mask);
bfaa08b7 4575 else
09eb042a
RS
4576 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4577 offset, scale, zero);
bfaa08b7
RS
4578 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4579 gimple_call_set_lhs (pattern_stmt, load_lhs);
4580 }
4581 else
f307441a 4582 {
86a91c0a 4583 tree rhs = vect_get_store_rhs (stmt_info);
f307441a
RS
4584 if (mask != NULL)
4585 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4586 base, offset, scale, rhs,
4587 mask);
4588 else
4589 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4590 base, offset, scale, rhs);
4591 }
bfaa08b7
RS
4592 gimple_call_set_nothrow (pattern_stmt, true);
4593
4594 /* Copy across relevant vectorization info and associate DR with the
4595 new pattern statement instead of the original statement. */
4fbeb363 4596 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
f44fb7aa 4597 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
bfaa08b7
RS
4598
4599 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4600 *type_out = vectype;
86a91c0a 4601 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
bfaa08b7
RS
4602
4603 return pattern_stmt;
4604}
4605
370c2ebe
RS
4606/* Return true if TYPE is a non-boolean integer type. These are the types
4607 that we want to consider for narrowing. */
4608
4609static bool
4610vect_narrowable_type_p (tree type)
4611{
4612 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4613}
4614
4615/* Return true if the operation given by CODE can be truncated to N bits
4616 when only N bits of the output are needed. This is only true if bit N+1
4617 of the inputs has no effect on the low N bits of the result. */
4618
4619static bool
4620vect_truncatable_operation_p (tree_code code)
4621{
4622 switch (code)
4623 {
4624 case PLUS_EXPR:
4625 case MINUS_EXPR:
4626 case MULT_EXPR:
4627 case BIT_AND_EXPR:
4628 case BIT_IOR_EXPR:
4629 case BIT_XOR_EXPR:
4630 case COND_EXPR:
4631 return true;
4632
4633 default:
4634 return false;
4635 }
4636}
4637
4638/* Record that STMT_INFO could be changed from operating on TYPE to
4639 operating on a type with the precision and sign given by PRECISION
4640 and SIGN respectively. PRECISION is an arbitrary bit precision;
4641 it might not be a whole number of bytes. */
4642
4643static void
4644vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4645 unsigned int precision, signop sign)
4646{
4647 /* Round the precision up to a whole number of bytes. */
4648 precision = vect_element_precision (precision);
4649 if (precision < TYPE_PRECISION (type)
4650 && (!stmt_info->operation_precision
4651 || stmt_info->operation_precision > precision))
4652 {
4653 stmt_info->operation_precision = precision;
4654 stmt_info->operation_sign = sign;
4655 }
4656}
4657
4658/* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4659 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4660 is an arbitrary bit precision; it might not be a whole number of bytes. */
4661
4662static void
4663vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4664 unsigned int min_input_precision)
4665{
4666 /* This operation in isolation only requires the inputs to have
4667 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4668 that MIN_INPUT_PRECISION is a natural precision for the chain
4669 as a whole. E.g. consider something like:
4670
4671 unsigned short *x, *y;
4672 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4673
4674 The right shift can be done on unsigned chars, and only requires the
4675 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4676 approach would mean turning a natural chain of single-vector unsigned
4677 short operations into one that truncates "*x" and then extends
4678 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4679 operation and one vector for each unsigned char operation.
4680 This would be a significant pessimization.
4681
4682 Instead only propagate the maximum of this precision and the precision
4683 required by the users of the result. This means that we don't pessimize
4684 the case above but continue to optimize things like:
4685
4686 unsigned char *y;
4687 unsigned short *x;
4688 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4689
4690 Here we would truncate two vectors of *x to a single vector of
4691 unsigned chars and use single-vector unsigned char operations for
4692 everything else, rather than doing two unsigned short copies of
4693 "(*x & 0xf0) >> 4" and then truncating the result. */
4694 min_input_precision = MAX (min_input_precision,
4695 stmt_info->min_output_precision);
4696
4697 if (min_input_precision < TYPE_PRECISION (type)
4698 && (!stmt_info->min_input_precision
4699 || stmt_info->min_input_precision > min_input_precision))
4700 stmt_info->min_input_precision = min_input_precision;
4701}
4702
4703/* Subroutine of vect_determine_min_output_precision. Return true if
4704 we can calculate a reduced number of output bits for STMT_INFO,
4705 whose result is LHS. */
4706
4707static bool
4708vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4709{
4710 /* Take the maximum precision required by users of the result. */
6585ff8f 4711 vec_info *vinfo = stmt_info->vinfo;
370c2ebe
RS
4712 unsigned int precision = 0;
4713 imm_use_iterator iter;
4714 use_operand_p use;
4715 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4716 {
4717 gimple *use_stmt = USE_STMT (use);
4718 if (is_gimple_debug (use_stmt))
4719 continue;
6585ff8f
RS
4720 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4721 if (!use_stmt_info || !use_stmt_info->min_input_precision)
370c2ebe 4722 return false;
047fba34
RS
4723 /* The input precision recorded for COND_EXPRs applies only to the
4724 "then" and "else" values. */
4725 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4726 if (assign
4727 && gimple_assign_rhs_code (assign) == COND_EXPR
4728 && use->use != gimple_assign_rhs2_ptr (assign)
4729 && use->use != gimple_assign_rhs3_ptr (assign))
4730 return false;
370c2ebe
RS
4731 precision = MAX (precision, use_stmt_info->min_input_precision);
4732 }
4733
4734 if (dump_enabled_p ())
3c2a8ed0
DM
4735 dump_printf_loc (MSG_NOTE, vect_location,
4736 "only the low %d bits of %T are significant\n",
4737 precision, lhs);
370c2ebe
RS
4738 stmt_info->min_output_precision = precision;
4739 return true;
4740}
4741
4742/* Calculate min_output_precision for STMT_INFO. */
4743
4744static void
4745vect_determine_min_output_precision (stmt_vec_info stmt_info)
4746{
4747 /* We're only interested in statements with a narrowable result. */
4748 tree lhs = gimple_get_lhs (stmt_info->stmt);
4749 if (!lhs
4750 || TREE_CODE (lhs) != SSA_NAME
4751 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4752 return;
4753
4754 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4755 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4756}
4757
4758/* Use range information to decide whether STMT (described by STMT_INFO)
4759 could be done in a narrower type. This is effectively a forward
4760 propagation, since it uses context-independent information that applies
4761 to all users of an SSA name. */
4762
4763static void
4764vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4765{
4766 tree lhs = gimple_assign_lhs (stmt);
4767 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4768 return;
4769
4770 tree type = TREE_TYPE (lhs);
4771 if (!vect_narrowable_type_p (type))
4772 return;
4773
4774 /* First see whether we have any useful range information for the result. */
4775 unsigned int precision = TYPE_PRECISION (type);
4776 signop sign = TYPE_SIGN (type);
4777 wide_int min_value, max_value;
4778 if (!vect_get_range_info (lhs, &min_value, &max_value))
4779 return;
4780
4781 tree_code code = gimple_assign_rhs_code (stmt);
4782 unsigned int nops = gimple_num_ops (stmt);
4783
4784 if (!vect_truncatable_operation_p (code))
4785 /* Check that all relevant input operands are compatible, and update
4786 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4787 for (unsigned int i = 1; i < nops; ++i)
4788 {
4789 tree op = gimple_op (stmt, i);
4790 if (TREE_CODE (op) == INTEGER_CST)
4791 {
4792 /* Don't require the integer to have RHS_TYPE (which it might
4793 not for things like shift amounts, etc.), but do require it
4794 to fit the type. */
4795 if (!int_fits_type_p (op, type))
4796 return;
4797
4798 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4799 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4800 }
4801 else if (TREE_CODE (op) == SSA_NAME)
4802 {
4803 /* Ignore codes that don't take uniform arguments. */
4804 if (!types_compatible_p (TREE_TYPE (op), type))
4805 return;
4806
4807 wide_int op_min_value, op_max_value;
4808 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4809 return;
4810
4811 min_value = wi::min (min_value, op_min_value, sign);
4812 max_value = wi::max (max_value, op_max_value, sign);
4813 }
4814 else
4815 return;
4816 }
4817
4818 /* Try to switch signed types for unsigned types if we can.
4819 This is better for two reasons. First, unsigned ops tend
4820 to be cheaper than signed ops. Second, it means that we can
4821 handle things like:
4822
4823 signed char c;
4824 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4825
4826 as:
4827
4828 signed char c;
4829 unsigned short res_1 = (unsigned short) c & 0xff00;
4830 int res = (int) res_1;
4831
4832 where the intermediate result res_1 has unsigned rather than
4833 signed type. */
4834 if (sign == SIGNED && !wi::neg_p (min_value))
4835 sign = UNSIGNED;
4836
4837 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4838 unsigned int precision1 = wi::min_precision (min_value, sign);
4839 unsigned int precision2 = wi::min_precision (max_value, sign);
4840 unsigned int value_precision = MAX (precision1, precision2);
4841 if (value_precision >= precision)
4842 return;
4843
4844 if (dump_enabled_p ())
3c2a8ed0
DM
4845 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4846 " without loss of precision: %G",
4847 sign == SIGNED ? "signed" : "unsigned",
4848 value_precision, stmt);
370c2ebe
RS
4849
4850 vect_set_operation_type (stmt_info, type, value_precision, sign);
4851 vect_set_min_input_precision (stmt_info, type, value_precision);
4852}
4853
4854/* Use information about the users of STMT's result to decide whether
4855 STMT (described by STMT_INFO) could be done in a narrower type.
4856 This is effectively a backward propagation. */
4857
4858static void
4859vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4860{
4861 tree_code code = gimple_assign_rhs_code (stmt);
4862 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4863 tree type = TREE_TYPE (gimple_op (stmt, opno));
4864 if (!vect_narrowable_type_p (type))
4865 return;
4866
4867 unsigned int precision = TYPE_PRECISION (type);
4868 unsigned int operation_precision, min_input_precision;
4869 switch (code)
4870 {
4871 CASE_CONVERT:
4872 /* Only the bits that contribute to the output matter. Don't change
4873 the precision of the operation itself. */
4874 operation_precision = precision;
4875 min_input_precision = stmt_info->min_output_precision;
4876 break;
4877
4878 case LSHIFT_EXPR:
4879 case RSHIFT_EXPR:
4880 {
4881 tree shift = gimple_assign_rhs2 (stmt);
4882 if (TREE_CODE (shift) != INTEGER_CST
4883 || !wi::ltu_p (wi::to_widest (shift), precision))
4884 return;
4885 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4886 if (code == LSHIFT_EXPR)
4887 {
4888 /* We need CONST_SHIFT fewer bits of the input. */
4889 operation_precision = stmt_info->min_output_precision;
4890 min_input_precision = (MAX (operation_precision, const_shift)
4891 - const_shift);
4892 }
4893 else
4894 {
4895 /* We need CONST_SHIFT extra bits to do the operation. */
4896 operation_precision = (stmt_info->min_output_precision
4897 + const_shift);
4898 min_input_precision = operation_precision;
4899 }
4900 break;
4901 }
4902
4903 default:
4904 if (vect_truncatable_operation_p (code))
4905 {
4906 /* Input bit N has no effect on output bits N-1 and lower. */
4907 operation_precision = stmt_info->min_output_precision;
4908 min_input_precision = operation_precision;
4909 break;
4910 }
4911 return;
4912 }
4913
4914 if (operation_precision < precision)
4915 {
4916 if (dump_enabled_p ())
3c2a8ed0
DM
4917 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4918 " without affecting users: %G",
4919 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4920 operation_precision, stmt);
370c2ebe
RS
4921 vect_set_operation_type (stmt_info, type, operation_precision,
4922 TYPE_SIGN (type));
4923 }
4924 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4925}
4926
0c3ea6b3
RS
4927/* Return true if the statement described by STMT_INFO sets a boolean
4928 SSA_NAME and if we know how to vectorize this kind of statement using
4929 vector mask types. */
4930
4931static bool
4932possible_vector_mask_operation_p (stmt_vec_info stmt_info)
4933{
4934 tree lhs = gimple_get_lhs (stmt_info->stmt);
4935 if (!lhs
4936 || TREE_CODE (lhs) != SSA_NAME
4937 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
4938 return false;
4939
4940 if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
4941 {
4942 tree_code rhs_code = gimple_assign_rhs_code (assign);
4943 switch (rhs_code)
4944 {
4945 CASE_CONVERT:
4946 case SSA_NAME:
4947 case BIT_NOT_EXPR:
4948 case BIT_IOR_EXPR:
4949 case BIT_XOR_EXPR:
4950 case BIT_AND_EXPR:
4951 return true;
4952
4953 default:
4954 return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
4955 }
4956 }
4957 return false;
4958}
4959
4960/* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
4961 a vector mask type instead of a normal vector type. Record the
4962 result in STMT_INFO->mask_precision. */
4963
4964static void
4965vect_determine_mask_precision (stmt_vec_info stmt_info)
4966{
4967 vec_info *vinfo = stmt_info->vinfo;
4968
4969 if (!possible_vector_mask_operation_p (stmt_info)
4970 || stmt_info->mask_precision)
4971 return;
4972
4973 auto_vec<stmt_vec_info, 32> worklist;
4974 worklist.quick_push (stmt_info);
4975 while (!worklist.is_empty ())
4976 {
4977 stmt_info = worklist.last ();
4978 unsigned int orig_length = worklist.length ();
4979
4980 /* If at least one boolean input uses a vector mask type,
4981 pick the mask type with the narrowest elements.
4982
4983 ??? This is the traditional behavior. It should always produce
4984 the smallest number of operations, but isn't necessarily the
4985 optimal choice. For example, if we have:
4986
4987 a = b & c
4988
4989 where:
4990
4991 - the user of a wants it to have a mask type for 16-bit elements (M16)
4992 - b also uses M16
4993 - c uses a mask type for 8-bit elements (M8)
4994
4995 then picking M8 gives:
4996
4997 - 1 M16->M8 pack for b
4998 - 1 M8 AND for a
4999 - 2 M8->M16 unpacks for the user of a
5000
5001 whereas picking M16 would have given:
5002
5003 - 2 M8->M16 unpacks for c
5004 - 2 M16 ANDs for a
5005
5006 The number of operations are equal, but M16 would have given
5007 a shorter dependency chain and allowed more ILP. */
5008 unsigned int precision = ~0U;
5009 gassign *assign = as_a <gassign *> (stmt_info->stmt);
5010 unsigned int nops = gimple_num_ops (assign);
5011 for (unsigned int i = 1; i < nops; ++i)
5012 {
5013 tree rhs = gimple_op (assign, i);
5014 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
5015 continue;
5016
5017 stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
5018 if (!def_stmt_info)
5019 /* Don't let external or constant operands influence the choice.
5020 We can convert them to whichever vector type we pick. */
5021 continue;
5022
5023 if (def_stmt_info->mask_precision)
5024 {
5025 if (precision > def_stmt_info->mask_precision)
5026 precision = def_stmt_info->mask_precision;
5027 }
5028 else if (possible_vector_mask_operation_p (def_stmt_info))
5029 worklist.safe_push (def_stmt_info);
5030 }
5031
5032 /* Defer the choice if we need to visit operands first. */
5033 if (orig_length != worklist.length ())
5034 continue;
5035
5036 /* If the statement compares two values that shouldn't use vector masks,
5037 try comparing the values as normal scalars instead. */
5038 tree_code rhs_code = gimple_assign_rhs_code (assign);
5039 if (precision == ~0U
5040 && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
5041 {
5042 tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
5043 scalar_mode mode;
5044 tree vectype, mask_type;
5045 if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
5046 && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
5047 && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
5048 && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
5049 precision = GET_MODE_BITSIZE (mode);
5050 }
5051
5052 if (dump_enabled_p ())
5053 {
5054 if (precision == ~0U)
5055 dump_printf_loc (MSG_NOTE, vect_location,
5056 "using normal nonmask vectors for %G",
5057 stmt_info->stmt);
5058 else
5059 dump_printf_loc (MSG_NOTE, vect_location,
5060 "using boolean precision %d for %G",
5061 precision, stmt_info->stmt);
5062 }
5063
5064 stmt_info->mask_precision = precision;
5065 worklist.pop ();
5066 }
5067}
5068
370c2ebe
RS
5069/* Handle vect_determine_precisions for STMT_INFO, given that we
5070 have already done so for the users of its result. */
5071
5072void
5073vect_determine_stmt_precisions (stmt_vec_info stmt_info)
5074{
5075 vect_determine_min_output_precision (stmt_info);
5076 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
5077 {
5078 vect_determine_precisions_from_range (stmt_info, stmt);
5079 vect_determine_precisions_from_users (stmt_info, stmt);
5080 }
0c3ea6b3 5081 vect_determine_mask_precision (stmt_info);
370c2ebe
RS
5082}
5083
5084/* Walk backwards through the vectorizable region to determine the
5085 values of these fields:
5086
5087 - min_output_precision
5088 - min_input_precision
5089 - operation_precision
5090 - operation_sign. */
5091
5092void
5093vect_determine_precisions (vec_info *vinfo)
5094{
5095 DUMP_VECT_SCOPE ("vect_determine_precisions");
5096
5097 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
5098 {
99b1c316 5099 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
370c2ebe
RS
5100 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
5101 unsigned int nbbs = loop->num_nodes;
5102
5103 for (unsigned int i = 0; i < nbbs; i++)
5104 {
5105 basic_block bb = bbs[nbbs - i - 1];
5106 for (gimple_stmt_iterator si = gsi_last_bb (bb);
5107 !gsi_end_p (si); gsi_prev (&si))
6585ff8f
RS
5108 vect_determine_stmt_precisions
5109 (vinfo->lookup_stmt (gsi_stmt (si)));
370c2ebe
RS
5110 }
5111 }
5112 else
5113 {
5114 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5115 gimple_stmt_iterator si = bb_vinfo->region_end;
5116 gimple *stmt;
5117 do
5118 {
5119 if (!gsi_stmt (si))
5120 si = gsi_last_bb (bb_vinfo->bb);
5121 else
5122 gsi_prev (&si);
5123 stmt = gsi_stmt (si);
6585ff8f 5124 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
370c2ebe
RS
5125 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
5126 vect_determine_stmt_precisions (stmt_info);
5127 }
5128 while (stmt != gsi_stmt (bb_vinfo->region_begin));
5129 }
5130}
5131
ba9728b0 5132typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
1cbfeccc
RS
5133
5134struct vect_recog_func
5135{
5136 vect_recog_func_ptr fn;
5137 const char *name;
5138};
5139
5140/* Note that ordering matters - the first pattern matching on a stmt is
5141 taken which means usually the more complex one needs to preceed the
5142 less comples onex (widen_sum only after dot_prod or sad for example). */
5143static vect_recog_func vect_vect_recog_func_ptrs[] = {
370c2ebe 5144 { vect_recog_over_widening_pattern, "over_widening" },
0267732b
RS
5145 /* Must come after over_widening, which narrows the shift as much as
5146 possible beforehand. */
5147 { vect_recog_average_pattern, "average" },
58cc9876 5148 { vect_recog_mulhs_pattern, "mult_high" },
370c2ebe 5149 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
1cbfeccc
RS
5150 { vect_recog_widen_mult_pattern, "widen_mult" },
5151 { vect_recog_dot_prod_pattern, "dot_prod" },
5152 { vect_recog_sad_pattern, "sad" },
5153 { vect_recog_widen_sum_pattern, "widen_sum" },
5154 { vect_recog_pow_pattern, "pow" },
5155 { vect_recog_widen_shift_pattern, "widen_shift" },
1cbfeccc
RS
5156 { vect_recog_rotate_pattern, "rotate" },
5157 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
5158 { vect_recog_divmod_pattern, "divmod" },
5159 { vect_recog_mult_pattern, "mult" },
5160 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
5161 { vect_recog_bool_pattern, "bool" },
5162 /* This must come before mask conversion, and includes the parts
5163 of mask conversion that are needed for gather and scatter
5164 internal functions. */
5165 { vect_recog_gather_scatter_pattern, "gather_scatter" },
5166 { vect_recog_mask_conversion_pattern, "mask_conversion" }
5167};
5168
5169const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
5170
1107f3ae
IR
5171/* Mark statements that are involved in a pattern. */
5172
5173static inline void
cef6cac8 5174vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
1107f3ae
IR
5175 tree pattern_vectype)
5176{
ea133b14 5177 stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
41949de9 5178 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
1107f3ae 5179
cef6cac8
RS
5180 gimple *orig_pattern_stmt = NULL;
5181 if (is_pattern_stmt_p (orig_stmt_info))
ab0ef706 5182 {
41949de9
RS
5183 /* We're replacing a statement in an existing pattern definition
5184 sequence. */
cef6cac8 5185 orig_pattern_stmt = orig_stmt_info->stmt;
41949de9 5186 if (dump_enabled_p ())
3c2a8ed0
DM
5187 dump_printf_loc (MSG_NOTE, vect_location,
5188 "replacing earlier pattern %G", orig_pattern_stmt);
41949de9
RS
5189
5190 /* To keep the book-keeping simple, just swap the lhs of the
5191 old and new statements, so that the old one has a valid but
5192 unused lhs. */
cef6cac8
RS
5193 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
5194 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
41949de9
RS
5195 gimple_set_lhs (pattern_stmt, old_lhs);
5196
5197 if (dump_enabled_p ())
3c2a8ed0 5198 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
41949de9
RS
5199
5200 /* Switch to the statement that ORIG replaces. */
10681ce8 5201 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
41949de9
RS
5202
5203 /* We shouldn't be replacing the main pattern statement. */
cef6cac8
RS
5204 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
5205 != orig_pattern_stmt);
ab0ef706 5206 }
1107f3ae 5207
41949de9 5208 if (def_seq)
e3947d80
RS
5209 for (gimple_stmt_iterator si = gsi_start (def_seq);
5210 !gsi_end_p (si); gsi_next (&si))
154fb72b 5211 {
97c6bea8
RB
5212 if (dump_enabled_p ())
5213 dump_printf_loc (MSG_NOTE, vect_location,
5214 "extra pattern stmt: %G", gsi_stmt (si));
154fb72b
RB
5215 stmt_vec_info pattern_stmt_info
5216 = vect_init_pattern_stmt (gsi_stmt (si),
5217 orig_stmt_info, pattern_vectype);
5218 /* Stmts in the def sequence are not vectorizable cycle or
5219 induction defs, instead they should all be vect_internal_def
5220 feeding the main pattern stmt which retains this def type. */
5221 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
5222 }
41949de9 5223
cef6cac8 5224 if (orig_pattern_stmt)
41949de9
RS
5225 {
5226 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
5227
5228 /* Insert all the new pattern statements before the original one. */
5229 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
cef6cac8
RS
5230 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
5231 orig_def_seq);
41949de9
RS
5232 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
5233 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
3239dde9
RS
5234
5235 /* Remove the pattern statement that this new pattern replaces. */
5236 gsi_remove (&gsi, false);
41949de9
RS
5237 }
5238 else
5239 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
ea133b14
RB
5240
5241 /* Transfer reduction path info to the pattern. */
5242 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
5243 {
5244 vec_info *vinfo = orig_stmt_info_saved->vinfo;
5245 tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
5246 1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
5247 /* Search the pattern def sequence and the main pattern stmt. Note
5248 we may have inserted all into a containing pattern def sequence
5249 so the following is a bit awkward. */
5250 gimple_stmt_iterator si;
5251 gimple *s;
5252 if (def_seq)
5253 {
5254 si = gsi_start (def_seq);
5255 s = gsi_stmt (si);
5256 gsi_next (&si);
5257 }
5258 else
5259 {
5260 si = gsi_none ();
5261 s = pattern_stmt;
5262 }
5263 do
5264 {
5265 bool found = false;
5266 for (unsigned i = 1; i < gimple_num_ops (s); ++i)
5267 if (gimple_op (s, i) == lookfor)
5268 {
5269 STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
5270 lookfor = gimple_get_lhs (s);
5271 found = true;
5272 break;
5273 }
ea133b14 5274 if (s == pattern_stmt)
97c6bea8
RB
5275 {
5276 if (!found && dump_enabled_p ())
5277 dump_printf_loc (MSG_NOTE, vect_location,
5278 "failed to update reduction index.\n");
5279 break;
5280 }
ea133b14
RB
5281 if (gsi_end_p (si))
5282 s = pattern_stmt;
5283 else
5284 {
5285 s = gsi_stmt (si);
5286 if (s == pattern_stmt)
5287 /* Found the end inside a bigger pattern def seq. */
5288 si = gsi_none ();
5289 else
5290 gsi_next (&si);
5291 }
5292 } while (1);
5293 }
1107f3ae
IR
5294}
5295
b8698a0f 5296/* Function vect_pattern_recog_1
20f06221
DN
5297
5298 Input:
5299 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
5300 computation pattern.
cef6cac8 5301 STMT_INFO: A stmt from which the pattern search should start.
20f06221 5302
1cbfeccc
RS
5303 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
5304 a sequence of statements that has the same functionality and can be
cef6cac8
RS
5305 used to replace STMT_INFO. It returns the last statement in the sequence
5306 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
1cbfeccc
RS
5307 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
5308 statement, having first checked that the target supports the new operation
5309 in that type.
20f06221 5310
b8698a0f 5311 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
5312 for vect_recog_pattern. */
5313
41949de9 5314static void
cef6cac8 5315vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
20f06221 5316{
cef6cac8
RS
5317 vec_info *vinfo = stmt_info->vinfo;
5318 gimple *pattern_stmt;
383d9c83 5319 loop_vec_info loop_vinfo;
20f06221 5320 tree pattern_vectype;
20f06221 5321
41949de9
RS
5322 /* If this statement has already been replaced with pattern statements,
5323 leave the original statement alone, since the first match wins.
5324 Instead try to match against the definition statements that feed
5325 the main pattern statement. */
41949de9
RS
5326 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
5327 {
5328 gimple_stmt_iterator gsi;
5329 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
5330 !gsi_end_p (gsi); gsi_next (&gsi))
cef6cac8 5331 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
41949de9
RS
5332 return;
5333 }
5334
9c58fb7a 5335 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
ba9728b0 5336 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
726a989a 5337 if (!pattern_stmt)
df0aef6d 5338 {
3239dde9
RS
5339 /* Clear any half-formed pattern definition sequence. */
5340 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
41949de9 5341 return;
df0aef6d 5342 }
b8698a0f 5343
383d9c83 5344 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1cbfeccc 5345 gcc_assert (pattern_vectype);
383d9c83 5346
20f06221 5347 /* Found a vectorizable pattern. */
73fbfcad 5348 if (dump_enabled_p ())
3c2a8ed0
DM
5349 dump_printf_loc (MSG_NOTE, vect_location,
5350 "%s pattern recognized: %G",
5351 recog_func->name, pattern_stmt);
b8698a0f 5352
726a989a 5353 /* Mark the stmts that are involved in the pattern. */
cef6cac8 5354 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
20f06221 5355
b5aeb3bb
IR
5356 /* Patterns cannot be vectorized using SLP, because they change the order of
5357 computation. */
f5709183 5358 if (loop_vinfo)
b94c2dc1
TV
5359 {
5360 unsigned ix, ix2;
32c91dfc 5361 stmt_vec_info *elem_ptr;
b94c2dc1 5362 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
32c91dfc 5363 elem_ptr, *elem_ptr == stmt_info);
b94c2dc1 5364 }
20f06221
DN
5365}
5366
5367
5368/* Function vect_pattern_recog
5369
5370 Input:
5371 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
5372 computation idioms.
5373
9d5e7640
IR
5374 Output - for each computation idiom that is detected we create a new stmt
5375 that provides the same functionality and that can be vectorized. We
20f06221
DN
5376 also record some information in the struct_stmt_info of the relevant
5377 stmts, as explained below:
5378
5379 At the entry to this function we have the following stmts, with the
5380 following initial value in the STMT_VINFO fields:
5381
5382 stmt in_pattern_p related_stmt vec_stmt
5383 S1: a_i = .... - - -
5384 S2: a_2 = ..use(a_i).. - - -
5385 S3: a_1 = ..use(a_2).. - - -
5386 S4: a_0 = ..use(a_1).. - - -
5387 S5: ... = ..use(a_0).. - - -
5388
5389 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
5390 represented by a single stmt. We then:
5391 - create a new stmt S6 equivalent to the pattern (the stmt is not
5392 inserted into the code)
20f06221
DN
5393 - fill in the STMT_VINFO fields as follows:
5394
5395 in_pattern_p related_stmt vec_stmt
b8698a0f 5396 S1: a_i = .... - - -
20f06221
DN
5397 S2: a_2 = ..use(a_i).. - - -
5398 S3: a_1 = ..use(a_2).. - - -
20f06221 5399 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 5400 '---> S6: a_new = .... - S4 -
20f06221
DN
5401 S5: ... = ..use(a_0).. - - -
5402
5403 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 5404 to each other through the RELATED_STMT field).
20f06221
DN
5405
5406 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
5407 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
5408 remain irrelevant unless used by stmts other than S4.
5409
5410 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 5411 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
5412 a pointer to the new vector stmt VS6 from S6 (as usual).
5413 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
5414
5415 in_pattern_p related_stmt vec_stmt
5416 S1: a_i = .... - - -
5417 S2: a_2 = ..use(a_i).. - - -
5418 S3: a_1 = ..use(a_2).. - - -
5419 > VS6: va_new = .... - - -
20f06221 5420 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 5421 '---> S6: a_new = .... - S4 VS6
20f06221
DN
5422 > VS5: ... = ..vuse(va_new).. - - -
5423 S5: ... = ..use(a_0).. - - -
5424
9d5e7640 5425 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
5426 elsewhere), and we'll end up with:
5427
b8698a0f 5428 VS6: va_new = ....
83197f37
IR
5429 VS5: ... = ..vuse(va_new)..
5430
5431 In case of more than one pattern statements, e.g., widen-mult with
5432 intermediate type:
5433
5434 S1 a_t = ;
5435 S2 a_T = (TYPE) a_t;
5436 '--> S3: a_it = (interm_type) a_t;
5437 S4 prod_T = a_T * CONST;
5438 '--> S5: prod_T' = a_it w* CONST;
5439
5440 there may be other users of a_T outside the pattern. In that case S2 will
5441 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
5442 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
5443 be recorded in S3. */
20f06221
DN
5444
5445void
310213d4 5446vect_pattern_recog (vec_info *vinfo)
20f06221 5447{
99b1c316 5448 class loop *loop;
772e61e1 5449 basic_block *bbs;
f5709183 5450 unsigned int nbbs;
726a989a 5451 gimple_stmt_iterator si;
20f06221 5452 unsigned int i, j;
20f06221 5453
370c2ebe
RS
5454 vect_determine_precisions (vinfo);
5455
adac3a68 5456 DUMP_VECT_SCOPE ("vect_pattern_recog");
20f06221 5457
310213d4 5458 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
f5709183
IR
5459 {
5460 loop = LOOP_VINFO_LOOP (loop_vinfo);
5461 bbs = LOOP_VINFO_BBS (loop_vinfo);
5462 nbbs = loop->num_nodes;
61d371eb
RB
5463
5464 /* Scan through the loop stmts, applying the pattern recognition
5465 functions starting at each stmt visited: */
5466 for (i = 0; i < nbbs; i++)
5467 {
5468 basic_block bb = bbs[i];
5469 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
cef6cac8
RS
5470 {
5471 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
5472 /* Scan over all generic vect_recog_xxx_pattern functions. */
5473 for (j = 0; j < NUM_PATTERNS; j++)
5474 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
5475 stmt_info);
5476 }
61d371eb 5477 }
f5709183
IR
5478 }
5479 else
5480 {
61d371eb
RB
5481 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
5482 for (si = bb_vinfo->region_begin;
5483 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
5484 {
df0aef6d 5485 gimple *stmt = gsi_stmt (si);
6585ff8f 5486 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
41949de9 5487 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
61d371eb 5488 continue;
f5709183 5489
61d371eb
RB
5490 /* Scan over all generic vect_recog_xxx_pattern functions. */
5491 for (j = 0; j < NUM_PATTERNS; j++)
cef6cac8 5492 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
61d371eb 5493 }
20f06221
DN
5494 }
5495}