]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
testsuite: turn down verbosity of "process-message"
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
85ec4feb 2 Copyright (C) 2006-2018 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
957060b5 25#include "rtl.h"
20f06221 26#include "tree.h"
c7131fb2 27#include "gimple.h"
c7131fb2 28#include "ssa.h"
957060b5
AM
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
40e23961 33#include "fold-const.h"
d8a2d370 34#include "stor-layout.h"
2fb9a547 35#include "tree-eh.h"
45b0be94 36#include "gimplify.h"
5be5c238 37#include "gimple-iterator.h"
20f06221 38#include "cfgloop.h"
20f06221 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
9b2b7279 41#include "builtins.h"
b4e5bc47 42#include "internal-fn.h"
7a31e5ef 43#include "case-cfn-macros.h"
848bb6fc
JJ
44#include "fold-const-call.h"
45#include "attribs.h"
46#include "cgraph.h"
47#include "omp-simd-clone.h"
8f76f377 48#include "predict.h"
20f06221 49
370c2ebe
RS
50/* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
52
53static bool
54vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
55{
54994253 56 value_range_kind vr_type = get_range_info (var, min_value, max_value);
370c2ebe
RS
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
61 {
62 if (dump_enabled_p ())
63 {
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
70 }
71 return true;
72 }
73 else
74 {
75 if (dump_enabled_p ())
76 {
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
79 }
80 return false;
81 }
82}
83
49d8df1b
RS
84/* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
86
87static void
88vect_pattern_detected (const char *name, gimple *stmt)
89{
90 if (dump_enabled_p ())
3c2a8ed0 91 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
49d8df1b
RS
92}
93
10681ce8
RS
94/* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
41949de9 97
10681ce8 98static stmt_vec_info
41949de9
RS
99vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 tree vectype)
101{
6585ff8f
RS
102 vec_info *vinfo = orig_stmt_info->vinfo;
103 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
ddf98a96 104 if (pattern_stmt_info == NULL)
4fbeb363 105 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
41949de9
RS
106 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
107
634e7150 108 pattern_stmt_info->pattern_stmt_p = true;
10681ce8 109 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
41949de9
RS
110 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
10681ce8 114 return pattern_stmt_info;
41949de9
RS
115}
116
117/* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119 have one already. */
120
121static void
122vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 tree vectype)
124{
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
10681ce8
RS
126 STMT_VINFO_RELATED_STMT (orig_stmt_info)
127 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
41949de9
RS
128}
129
00347934
RS
130/* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
133
083481d8 134static inline void
00347934
RS
135append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 tree vectype = NULL_TREE)
083481d8 137{
00347934
RS
138 vec_info *vinfo = stmt_info->vinfo;
139 if (vectype)
140 {
4fbeb363 141 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
00347934
RS
142 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
143 }
a1a6c5b2 144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
00347934 145 new_stmt);
083481d8
JJ
146}
147
3330053e
RS
148/* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
153
154static edge
155vect_get_external_def_edge (vec_info *vinfo, tree var)
156{
157 edge e = NULL;
158 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
159 {
160 e = loop_preheader_edge (loop_vinfo->loop);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var))
162 {
163 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 if (bb == NULL
165 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 e = NULL;
167 }
168 }
169 return e;
170}
171
1cbfeccc
RS
172/* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
176
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
181 must have.
182
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185 is nonnull. */
186
187static bool
188vect_supportable_direct_optab_p (tree otype, tree_code code,
189 tree itype, tree *vecotype_out,
190 tree *vecitype_out = NULL)
191{
192 tree vecitype = get_vectype_for_scalar_type (itype);
193 if (!vecitype)
194 return false;
195
196 tree vecotype = get_vectype_for_scalar_type (otype);
197 if (!vecotype)
198 return false;
199
200 optab optab = optab_for_tree_code (code, vecitype, optab_default);
201 if (!optab)
202 return false;
203
204 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205 if (icode == CODE_FOR_nothing
206 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207 return false;
208
209 *vecotype_out = vecotype;
210 if (vecitype_out)
211 *vecitype_out = vecitype;
212 return true;
213}
214
00347934
RS
215/* Round bit precision PRECISION up to a full element. */
216
217static unsigned int
218vect_element_precision (unsigned int precision)
219{
220 precision = 1 << ceil_log2 (precision);
221 return MAX (precision, BITS_PER_UNIT);
222}
223
25927307
RS
224/* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
226
227static stmt_vec_info
228vect_get_internal_def (vec_info *vinfo, tree op)
229{
c98d0595
RS
230 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231 if (def_stmt_info
232 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233 return def_stmt_info;
234 return NULL;
25927307
RS
235}
236
32e8e429 237/* Check whether NAME, an ssa-name used in STMT_VINFO,
79d652a5 238 is a result of a type promotion, such that:
20f06221 239 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
241 unsigned. */
20f06221
DN
242
243static bool
32e8e429 244type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
355fe088 245 tree *orig_type, gimple **def_stmt, bool *promotion)
20f06221 246{
20f06221
DN
247 tree type = TREE_TYPE (name);
248 tree oprnd0;
249 enum vect_def_type dt;
20f06221 250
fef96d8e
RS
251 stmt_vec_info def_stmt_info;
252 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 def_stmt))
20f06221
DN
254 return false;
255
8644a673
IR
256 if (dt != vect_internal_def
257 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
258 return false;
259
bc4fb355 260 if (!*def_stmt)
20f06221
DN
261 return false;
262
726a989a 263 if (!is_gimple_assign (*def_stmt))
20f06221
DN
264 return false;
265
bc4fb355 266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
267 return false;
268
726a989a 269 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 270
bc4fb355
IR
271 *orig_type = TREE_TYPE (oprnd0);
272 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274 return false;
275
276 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277 *promotion = true;
bc4fb355 278 else
79d652a5 279 *promotion = false;
20f06221 280
894dd753 281 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
20f06221
DN
282 return false;
283
20f06221
DN
284 return true;
285}
286
00347934
RS
287/* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
289struct vect_unpromoted_value {
290 vect_unpromoted_value ();
291
292 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
293
294 /* The value obtained after peeling away zero or more casts. */
295 tree op;
296
297 /* The type of OP. */
298 tree type;
299
300 /* The definition type of OP. */
301 vect_def_type dt;
302
303 /* If OP is the result of peeling at least one cast, and if the cast
304 of OP itself is a vectorizable statement, CASTER identifies that
305 statement, otherwise it is null. */
306 stmt_vec_info caster;
307};
308
309inline vect_unpromoted_value::vect_unpromoted_value ()
310 : op (NULL_TREE),
311 type (NULL_TREE),
312 dt (vect_uninitialized_def),
313 caster (NULL)
314{
315}
316
317/* Set the operand to OP_IN, its definition type to DT_IN, and the
318 statement that casts it to CASTER_IN. */
319
320inline void
321vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
322 stmt_vec_info caster_in)
323{
324 op = op_in;
325 type = TREE_TYPE (op);
326 dt = dt_in;
327 caster = caster_in;
328}
329
330/* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331 to reach some vectorizable inner operand OP', continuing as long as it
332 is possible to convert OP' back to OP using a possible sign change
333 followed by a possible promotion P. Return this OP', or null if OP is
334 not a vectorizable SSA name. If there is a promotion P, describe its
370c2ebe
RS
335 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
336 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337 have more than one user.
00347934
RS
338
339 A successful return means that it is possible to go from OP' to OP
340 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
341 whereas the cast from UNPROM to OP might be a promotion, a sign
342 change, or a nop.
343
344 E.g. say we have:
345
346 signed short *ptr = ...;
347 signed short C = *ptr;
348 unsigned short B = (unsigned short) C; // sign change
349 signed int A = (signed int) B; // unsigned promotion
350 ...possible other uses of A...
351 unsigned int OP = (unsigned int) A; // sign change
352
353 In this case it's possible to go directly from C to OP using:
354
355 OP = (unsigned int) (unsigned short) C;
356 +------------+ +--------------+
357 promotion sign change
358
359 so OP' would be C. The input to the promotion is B, so UNPROM
360 would describe B. */
361
362static tree
363vect_look_through_possible_promotion (vec_info *vinfo, tree op,
370c2ebe
RS
364 vect_unpromoted_value *unprom,
365 bool *single_use_p = NULL)
00347934
RS
366{
367 tree res = NULL_TREE;
368 tree op_type = TREE_TYPE (op);
369 unsigned int orig_precision = TYPE_PRECISION (op_type);
d11be094 370 unsigned int min_precision = orig_precision;
00347934
RS
371 stmt_vec_info caster = NULL;
372 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
373 {
374 /* See whether OP is simple enough to vectorize. */
fef96d8e 375 stmt_vec_info def_stmt_info;
00347934
RS
376 gimple *def_stmt;
377 vect_def_type dt;
fef96d8e 378 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
00347934
RS
379 break;
380
381 /* If OP is the input of a demotion, skip over it to see whether
382 OP is itself the result of a promotion. If so, the combined
383 effect of the promotion and the demotion might fit the required
384 pattern, otherwise neither operation fits.
385
386 This copes with cases such as the result of an arithmetic
387 operation being truncated before being stored, and where that
388 arithmetic operation has been recognized as an over-widened one. */
d11be094 389 if (TYPE_PRECISION (op_type) <= min_precision)
00347934
RS
390 {
391 /* Use OP as the UNPROM described above if we haven't yet
392 found a promotion, or if using the new input preserves the
393 sign of the previous promotion. */
394 if (!res
395 || TYPE_PRECISION (unprom->type) == orig_precision
396 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
d11be094
JJ
397 {
398 unprom->set_op (op, dt, caster);
399 min_precision = TYPE_PRECISION (op_type);
400 }
00347934
RS
401 /* Stop if we've already seen a promotion and if this
402 conversion does more than change the sign. */
403 else if (TYPE_PRECISION (op_type)
404 != TYPE_PRECISION (unprom->type))
405 break;
406
407 /* The sequence now extends to OP. */
408 res = op;
409 }
410
411 /* See whether OP is defined by a cast. Record it as CASTER if
412 the cast is potentially vectorizable. */
413 if (!def_stmt)
414 break;
fef96d8e
RS
415 caster = def_stmt_info;
416
417 /* Ignore pattern statements, since we don't link uses for them. */
418 if (caster
419 && single_use_p
420 && !STMT_VINFO_RELATED_STMT (caster)
421 && !has_single_use (res))
422 *single_use_p = false;
423
00347934
RS
424 gassign *assign = dyn_cast <gassign *> (def_stmt);
425 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
426 break;
427
428 /* Continue with the input to the cast. */
429 op = gimple_assign_rhs1 (def_stmt);
430 op_type = TREE_TYPE (op);
431 }
432 return res;
433}
434
435/* OP is an integer operand to an operation that returns TYPE, and we
436 want to treat the operation as a widening one. So far we can treat
437 it as widening from *COMMON_TYPE.
438
439 Return true if OP is suitable for such a widening operation,
440 either widening from *COMMON_TYPE or from some supertype of it.
441 Update *COMMON_TYPE to the supertype in the latter case.
442
443 SHIFT_P is true if OP is a shift amount. */
444
445static bool
446vect_joust_widened_integer (tree type, bool shift_p, tree op,
447 tree *common_type)
448{
449 /* Calculate the minimum precision required by OP, without changing
450 the sign of either operand. */
451 unsigned int precision;
452 if (shift_p)
453 {
454 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
455 return false;
456 precision = TREE_INT_CST_LOW (op);
457 }
458 else
459 {
460 precision = wi::min_precision (wi::to_widest (op),
461 TYPE_SIGN (*common_type));
462 if (precision * 2 > TYPE_PRECISION (type))
463 return false;
464 }
465
466 /* If OP requires a wider type, switch to that type. The checks
467 above ensure that this is still narrower than the result. */
468 precision = vect_element_precision (precision);
469 if (TYPE_PRECISION (*common_type) < precision)
470 *common_type = build_nonstandard_integer_type
471 (precision, TYPE_UNSIGNED (*common_type));
472 return true;
473}
474
475/* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
477
478static bool
479vect_joust_widened_type (tree type, tree new_type, tree *common_type)
480{
481 if (types_compatible_p (*common_type, new_type))
482 return true;
483
484 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
485 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
486 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
487 return true;
488
489 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
490 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
491 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
492 {
493 *common_type = new_type;
494 return true;
495 }
496
497 /* We have mismatched signs, with the signed type being
498 no wider than the unsigned type. In this case we need
499 a wider signed type. */
500 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
501 TYPE_PRECISION (new_type));
502 precision *= 2;
503 if (precision * 2 > TYPE_PRECISION (type))
504 return false;
505
506 *common_type = build_nonstandard_integer_type (precision, false);
507 return true;
508}
509
510/* Check whether STMT_INFO can be viewed as a tree of integer operations
511 in which each node either performs CODE or WIDENED_CODE, and where
512 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
513 specifies the maximum number of leaf operands. SHIFT_P says whether
514 CODE and WIDENED_CODE are some sort of shift.
515
516 If STMT_INFO is such a tree, return the number of leaf operands
517 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
518 to a type that (a) is narrower than the result of STMT_INFO and
519 (b) can hold all leaf operand values.
520
521 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
522 exists. */
523
524static unsigned int
525vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
526 tree_code widened_code, bool shift_p,
527 unsigned int max_nops,
528 vect_unpromoted_value *unprom, tree *common_type)
529{
530 /* Check for an integer operation with the right code. */
c98d0595 531 vec_info *vinfo = stmt_info->vinfo;
00347934
RS
532 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
533 if (!assign)
534 return 0;
535
536 tree_code rhs_code = gimple_assign_rhs_code (assign);
537 if (rhs_code != code && rhs_code != widened_code)
538 return 0;
539
540 tree type = gimple_expr_type (assign);
541 if (!INTEGRAL_TYPE_P (type))
542 return 0;
543
544 /* Assume that both operands will be leaf operands. */
545 max_nops -= 2;
546
547 /* Check the operands. */
548 unsigned int next_op = 0;
549 for (unsigned int i = 0; i < 2; ++i)
550 {
551 vect_unpromoted_value *this_unprom = &unprom[next_op];
552 unsigned int nops = 1;
553 tree op = gimple_op (assign, i + 1);
554 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
555 {
556 /* We already have a common type from earlier operands.
557 Update it to account for OP. */
558 this_unprom->set_op (op, vect_constant_def);
559 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
560 return 0;
561 }
562 else
563 {
564 /* Only allow shifts by constants. */
565 if (shift_p && i == 1)
566 return 0;
567
568 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
569 this_unprom))
570 return 0;
571
572 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
573 {
574 /* The operand isn't widened. If STMT_INFO has the code
575 for an unwidened operation, recursively check whether
576 this operand is a node of the tree. */
577 if (rhs_code != code
578 || max_nops == 0
579 || this_unprom->dt != vect_internal_def)
580 return 0;
581
582 /* Give back the leaf slot allocated above now that we're
583 not treating this as a leaf operand. */
584 max_nops += 1;
585
586 /* Recursively process the definition of the operand. */
587 stmt_vec_info def_stmt_info
c98d0595 588 = vinfo->lookup_def (this_unprom->op);
00347934
RS
589 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
590 shift_p, max_nops, this_unprom,
591 common_type);
592 if (nops == 0)
593 return 0;
594
595 max_nops -= nops;
596 }
597 else
598 {
599 /* Make sure that the operand is narrower than the result. */
600 if (TYPE_PRECISION (this_unprom->type) * 2
601 > TYPE_PRECISION (type))
602 return 0;
603
604 /* Update COMMON_TYPE for the new operand. */
605 if (i == 0)
606 *common_type = this_unprom->type;
607 else if (!vect_joust_widened_type (type, this_unprom->type,
608 common_type))
609 return 0;
610 }
611 }
612 next_op += nops;
613 }
614 return next_op;
615}
616
726a989a
RB
617/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
618 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
619
620static tree
355fe088 621vect_recog_temp_ssa_var (tree type, gimple *stmt)
726a989a 622{
83d5977e 623 return make_temp_ssa_name (type, stmt, "patt");
726a989a 624}
20f06221 625
4ef79c96
RS
626/* STMT2_INFO describes a type conversion that could be split into STMT1
627 followed by a version of STMT2_INFO that takes NEW_RHS as its first
628 input. Try to do this using pattern statements, returning true on
629 success. */
630
631static bool
632vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
633 gimple *stmt1, tree vectype)
634{
635 if (is_pattern_stmt_p (stmt2_info))
636 {
637 /* STMT2_INFO is part of a pattern. Get the statement to which
638 the pattern is attached. */
10681ce8 639 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
4ef79c96
RS
640 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
641
642 if (dump_enabled_p ())
3c2a8ed0
DM
643 dump_printf_loc (MSG_NOTE, vect_location,
644 "Splitting pattern statement: %G", stmt2_info->stmt);
4ef79c96
RS
645
646 /* Since STMT2_INFO is a pattern statement, we can change it
647 in-situ without worrying about changing the code for the
648 containing block. */
649 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
650
651 if (dump_enabled_p ())
652 {
3c2a8ed0
DM
653 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
654 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
655 stmt2_info->stmt);
4ef79c96
RS
656 }
657
658 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
10681ce8 659 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
4ef79c96
RS
660 /* STMT2_INFO is the actual pattern statement. Add STMT1
661 to the end of the definition sequence. */
662 gimple_seq_add_stmt_without_update (def_seq, stmt1);
663 else
664 {
665 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
666 before it. */
667 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
668 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
669 }
670 return true;
671 }
672 else
673 {
674 /* STMT2_INFO doesn't yet have a pattern. Try to create a
675 two-statement pattern now. */
676 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
677 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
678 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
679 if (!lhs_vectype)
680 return false;
681
682 if (dump_enabled_p ())
3c2a8ed0
DM
683 dump_printf_loc (MSG_NOTE, vect_location,
684 "Splitting statement: %G", stmt2_info->stmt);
4ef79c96
RS
685
686 /* Add STMT1 as a singleton pattern definition sequence. */
687 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
688 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
689 gimple_seq_add_stmt_without_update (def_seq, stmt1);
690
691 /* Build the second of the two pattern statements. */
692 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
693 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
694 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
695
696 if (dump_enabled_p ())
697 {
698 dump_printf_loc (MSG_NOTE, vect_location,
3c2a8ed0
DM
699 "into pattern statements: %G", stmt1);
700 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
4ef79c96
RS
701 }
702
703 return true;
704 }
705}
706
00347934
RS
707/* Convert UNPROM to TYPE and return the result, adding new statements
708 to STMT_INFO's pattern definition statements if no better way is
709 available. VECTYPE is the vector form of TYPE. */
710
711static tree
712vect_convert_input (stmt_vec_info stmt_info, tree type,
713 vect_unpromoted_value *unprom, tree vectype)
714{
715 /* Check for a no-op conversion. */
716 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
717 return unprom->op;
718
719 /* Allow the caller to create constant vect_unpromoted_values. */
720 if (TREE_CODE (unprom->op) == INTEGER_CST)
721 return wide_int_to_tree (type, wi::to_widest (unprom->op));
722
723 /* See if we can reuse an existing result. */
724 if (unprom->caster)
725 {
726 tree lhs = gimple_get_lhs (unprom->caster->stmt);
727 if (types_compatible_p (TREE_TYPE (lhs), type))
728 return lhs;
729 }
730
731 /* We need a new conversion statement. */
732 tree new_op = vect_recog_temp_ssa_var (type, NULL);
733 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, unprom->op);
734
4ef79c96
RS
735 /* If the operation is the input to a vectorizable cast, try splitting
736 that cast into two, taking the required result as a mid-way point. */
737 if (unprom->caster)
738 {
739 tree lhs = gimple_get_lhs (unprom->caster->stmt);
740 if (TYPE_PRECISION (TREE_TYPE (lhs)) > TYPE_PRECISION (type)
741 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type)
742 && (TYPE_UNSIGNED (unprom->type) || !TYPE_UNSIGNED (type))
743 && vect_split_statement (unprom->caster, new_op, new_stmt, vectype))
744 return new_op;
745 }
746
3330053e
RS
747 /* If OP is an external value, see if we can insert the new statement
748 on an incoming edge. */
749 if (unprom->dt == vect_external_def)
750 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, unprom->op))
751 {
752 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
753 gcc_assert (!new_bb);
754 return new_op;
755 }
756
00347934
RS
757 /* As a (common) last resort, add the statement to the pattern itself. */
758 append_pattern_def_seq (stmt_info, new_stmt, vectype);
759 return new_op;
760}
761
762/* Invoke vect_convert_input for N elements of UNPROM and store the
763 result in the corresponding elements of RESULT. */
764
765static void
766vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
767 tree *result, tree type, vect_unpromoted_value *unprom,
768 tree vectype)
769{
770 for (unsigned int i = 0; i < n; ++i)
771 {
772 unsigned int j;
773 for (j = 0; j < i; ++j)
774 if (unprom[j].op == unprom[i].op)
775 break;
776 if (j < i)
777 result[i] = result[j];
778 else
779 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
780 }
781}
782
783/* The caller has created a (possibly empty) sequence of pattern definition
784 statements followed by a single statement PATTERN_STMT. Cast the result
785 of this final statement to TYPE. If a new statement is needed, add
786 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
787 and return the new statement, otherwise return PATTERN_STMT as-is.
788 VECITYPE is the vector form of PATTERN_STMT's result type. */
789
790static gimple *
791vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
792 tree vecitype)
793{
794 tree lhs = gimple_get_lhs (pattern_stmt);
795 if (!types_compatible_p (type, TREE_TYPE (lhs)))
796 {
797 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
798 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
799 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
800 }
801 return pattern_stmt;
802}
803
97e52238 804/* Return true if STMT_VINFO describes a reduction for which reassociation
d99dcb77
RS
805 is allowed. If STMT_INFO is part of a group, assume that it's part of
806 a reduction chain and optimistically assume that all statements
807 except the last allow reassociation. */
97e52238
RS
808
809static bool
810vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
811{
812 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
d99dcb77 813 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
ddf98a96 814 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
97e52238
RS
815}
816
7b98e98a
RS
817/* As above, but also require it to have code CODE and to be a reduction
818 in the outermost loop. When returning true, store the operands in
819 *OP0_OUT and *OP1_OUT. */
820
821static bool
822vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
823 tree *op0_out, tree *op1_out)
824{
825 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
826 if (!loop_info)
827 return false;
828
829 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
830 if (!assign || gimple_assign_rhs_code (assign) != code)
831 return false;
832
833 /* We don't allow changing the order of the computation in the inner-loop
834 when doing outer-loop vectorization. */
835 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
86a91c0a 836 if (loop && nested_in_vect_loop_p (loop, stmt_info))
7b98e98a
RS
837 return false;
838
839 if (!vect_reassociating_reduction_p (stmt_info))
840 return false;
841
842 *op0_out = gimple_assign_rhs1 (assign);
843 *op1_out = gimple_assign_rhs2 (assign);
844 return true;
845}
846
20f06221
DN
847/* Function vect_recog_dot_prod_pattern
848
849 Try to find the following pattern:
850
851 type x_t, y_t;
852 TYPE1 prod;
853 TYPE2 sum = init;
854 loop:
855 sum_0 = phi <init, sum_1>
856 S1 x_t = ...
857 S2 y_t = ...
858 S3 x_T = (TYPE1) x_t;
859 S4 y_T = (TYPE1) y_t;
860 S5 prod = x_T * y_T;
861 [S6 prod = (TYPE2) prod; #optional]
862 S7 sum_1 = prod + sum_0;
863
b8698a0f
L
864 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
865 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 866 computation.
b8698a0f 867
20f06221
DN
868 Input:
869
ba9728b0 870 * STMT_VINFO: The stmt from which the pattern search begins. In the
51312233
IR
871 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
872 will be detected.
20f06221
DN
873
874 Output:
875
20f06221
DN
876 * TYPE_OUT: The type of the output of this pattern.
877
878 * Return value: A new stmt that will be used to replace the sequence of
879 stmts that constitute the pattern. In this case it will be:
880 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
881
882 Note: The dot-prod idiom is a widening reduction pattern that is
883 vectorized without preserving all the intermediate results. It
884 produces only N/2 (widened) results (by summing up pairs of
885 intermediate results) rather than all N results. Therefore, we
886 cannot allow this pattern when we want to get all the results and in
887 the correct order (as is the case when this computation is in an
888 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 889
355fe088 890static gimple *
ba9728b0 891vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
20f06221 892{
20f06221 893 tree oprnd0, oprnd1;
ba9728b0 894 gimple *last_stmt = stmt_vinfo->stmt;
25927307 895 vec_info *vinfo = stmt_vinfo->vinfo;
20f06221 896 tree type, half_type;
355fe088 897 gimple *pattern_stmt;
f471fe72 898 tree var;
20f06221 899
b8698a0f 900 /* Look for the following pattern
20f06221
DN
901 DX = (TYPE1) X;
902 DY = (TYPE1) Y;
b8698a0f 903 DPROD = DX * DY;
20f06221
DN
904 DDPROD = (TYPE2) DPROD;
905 sum_1 = DDPROD + sum_0;
b8698a0f 906 In which
20f06221
DN
907 - DX is double the size of X
908 - DY is double the size of Y
909 - DX, DY, DPROD all have the same type
910 - sum is the same size of DPROD or bigger
911 - sum has been recognized as a reduction variable.
912
913 This is equivalent to:
914 DPROD = X w* Y; #widen mult
915 sum_1 = DPROD w+ sum_0; #widen summation
916 or
917 DPROD = X w* Y; #widen mult
918 sum_1 = DPROD + sum_0; #summation
919 */
920
921 /* Starting from LAST_STMT, follow the defs of its uses in search
922 of the above pattern. */
923
7b98e98a
RS
924 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
925 &oprnd0, &oprnd1))
1f786170
RS
926 return NULL;
927
7b98e98a 928 type = gimple_expr_type (last_stmt);
1f786170 929
00347934
RS
930 vect_unpromoted_value unprom_mult;
931 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
20f06221 932
51312233 933 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
934 we know that oprnd1 is the reduction variable (defined by a loop-header
935 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
936 Left to check that oprnd0 is defined by a (widen_)mult_expr */
00347934 937 if (!oprnd0)
ba02d3bc 938 return NULL;
20f06221 939
25927307
RS
940 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
941 if (!mult_vinfo)
3cb35c12
CF
942 return NULL;
943
b8698a0f 944 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 945 inside the loop (in case we are analyzing an outer-loop). */
00347934
RS
946 vect_unpromoted_value unprom0[2];
947 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
948 false, 2, unprom0, &half_type))
b8698a0f 949 return NULL;
0f8c840c 950
00347934
RS
951 /* If there are two widening operations, make sure they agree on
952 the sign of the extension. */
953 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
954 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
955 return NULL;
20f06221 956
49d8df1b
RS
957 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
958
00347934 959 tree half_vectype;
1cbfeccc 960 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
00347934 961 type_out, &half_vectype))
1cbfeccc 962 return NULL;
b8698a0f 963
00347934 964 /* Get the inputs in the appropriate types. */
00347934
RS
965 tree mult_oprnd[2];
966 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
967 unprom0, half_vectype);
968
726a989a 969 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 970 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
00347934 971 mult_oprnd[0], mult_oprnd[1], oprnd1);
b8698a0f 972
726a989a 973 return pattern_stmt;
20f06221 974}
b8698a0f 975
51312233 976
79d652a5
CH
977/* Function vect_recog_sad_pattern
978
979 Try to find the following Sum of Absolute Difference (SAD) pattern:
980
981 type x_t, y_t;
982 signed TYPE1 diff, abs_diff;
983 TYPE2 sum = init;
984 loop:
985 sum_0 = phi <init, sum_1>
986 S1 x_t = ...
987 S2 y_t = ...
988 S3 x_T = (TYPE1) x_t;
989 S4 y_T = (TYPE1) y_t;
990 S5 diff = x_T - y_T;
991 S6 abs_diff = ABS_EXPR <diff>;
992 [S7 abs_diff = (TYPE2) abs_diff; #optional]
993 S8 sum_1 = abs_diff + sum_0;
994
995 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
996 same size of 'TYPE1' or bigger. This is a special case of a reduction
997 computation.
998
999 Input:
1000
ba9728b0 1001 * STMT_VINFO: The stmt from which the pattern search begins. In the
79d652a5
CH
1002 example, when this function is called with S8, the pattern
1003 {S3,S4,S5,S6,S7,S8} will be detected.
1004
1005 Output:
1006
79d652a5
CH
1007 * TYPE_OUT: The type of the output of this pattern.
1008
1009 * Return value: A new stmt that will be used to replace the sequence of
1010 stmts that constitute the pattern. In this case it will be:
1011 SAD_EXPR <x_t, y_t, sum_0>
1012 */
1013
355fe088 1014static gimple *
ba9728b0 1015vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
79d652a5 1016{
ba9728b0 1017 gimple *last_stmt = stmt_vinfo->stmt;
25927307 1018 vec_info *vinfo = stmt_vinfo->vinfo;
79d652a5 1019 tree half_type;
79d652a5 1020
79d652a5
CH
1021 /* Look for the following pattern
1022 DX = (TYPE1) X;
1023 DY = (TYPE1) Y;
1024 DDIFF = DX - DY;
1025 DAD = ABS_EXPR <DDIFF>;
1026 DDPROD = (TYPE2) DPROD;
1027 sum_1 = DAD + sum_0;
1028 In which
1029 - DX is at least double the size of X
1030 - DY is at least double the size of Y
1031 - DX, DY, DDIFF, DAD all have the same type
1032 - sum is the same size of DAD or bigger
1033 - sum has been recognized as a reduction variable.
1034
1035 This is equivalent to:
1036 DDIFF = X w- Y; #widen sub
1037 DAD = ABS_EXPR <DDIFF>;
1038 sum_1 = DAD w+ sum_0; #widen summation
1039 or
1040 DDIFF = X w- Y; #widen sub
1041 DAD = ABS_EXPR <DDIFF>;
1042 sum_1 = DAD + sum_0; #summation
1043 */
1044
1045 /* Starting from LAST_STMT, follow the defs of its uses in search
1046 of the above pattern. */
1047
7b98e98a
RS
1048 tree plus_oprnd0, plus_oprnd1;
1049 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1050 &plus_oprnd0, &plus_oprnd1))
1f786170 1051 return NULL;
79d652a5 1052
7b98e98a 1053 tree sum_type = gimple_expr_type (last_stmt);
1f786170 1054
00347934
RS
1055 /* Any non-truncating sequence of conversions is OK here, since
1056 with a successful match, the result of the ABS(U) is known to fit
1057 within the nonnegative range of the result type. (It cannot be the
1058 negative of the minimum signed value due to the range of the widening
1059 MINUS_EXPR.) */
1060 vect_unpromoted_value unprom_abs;
1061 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1062 &unprom_abs);
79d652a5
CH
1063
1064 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1065 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1066 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1067 Then check that plus_oprnd0 is defined by an abs_expr. */
1068
00347934 1069 if (!plus_oprnd0)
79d652a5
CH
1070 return NULL;
1071
25927307
RS
1072 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1073 if (!abs_stmt_vinfo)
79d652a5
CH
1074 return NULL;
1075
1076 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1077 inside the loop (in case we are analyzing an outer-loop). */
25927307
RS
1078 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1079 if (!abs_stmt
1080 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1081 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
79d652a5
CH
1082 return NULL;
1083
1084 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
00347934 1085 tree abs_type = TREE_TYPE (abs_oprnd);
79d652a5
CH
1086 if (TYPE_UNSIGNED (abs_type))
1087 return NULL;
1088
00347934
RS
1089 /* Peel off conversions from the ABS input. This can involve sign
1090 changes (e.g. from an unsigned subtraction to a signed ABS input)
1091 or signed promotion, but it can't include unsigned promotion.
1092 (Note that ABS of an unsigned promotion should have been folded
1093 away before now anyway.) */
1094 vect_unpromoted_value unprom_diff;
1095 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1096 &unprom_diff);
1097 if (!abs_oprnd)
1098 return NULL;
1099 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1100 && TYPE_UNSIGNED (unprom_diff.type))
79d652a5
CH
1101 return NULL;
1102
00347934 1103 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
25927307
RS
1104 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1105 if (!diff_stmt_vinfo)
79d652a5
CH
1106 return NULL;
1107
1108 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1109 inside the loop (in case we are analyzing an outer-loop). */
00347934
RS
1110 vect_unpromoted_value unprom[2];
1111 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1112 false, 2, unprom, &half_type))
79d652a5
CH
1113 return NULL;
1114
49d8df1b
RS
1115 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1116
00347934
RS
1117 tree half_vectype;
1118 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1119 type_out, &half_vectype))
1cbfeccc 1120 return NULL;
79d652a5 1121
00347934 1122 /* Get the inputs to the SAD_EXPR in the appropriate types. */
00347934
RS
1123 tree sad_oprnd[2];
1124 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1125 unprom, half_vectype);
1126
79d652a5 1127 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
00347934
RS
1128 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1129 sad_oprnd[1], plus_oprnd1);
79d652a5 1130
79d652a5
CH
1131 return pattern_stmt;
1132}
1133
00347934
RS
1134/* Recognize an operation that performs ORIG_CODE on widened inputs,
1135 so that it can be treated as though it had the form:
79d652a5 1136
00347934
RS
1137 A_TYPE a;
1138 B_TYPE b;
1139 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1140 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1141 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1142 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1143 | RES_TYPE res = a_extend ORIG_CODE b_extend;
36ba4aae 1144
00347934 1145 Try to replace the pattern with:
51312233 1146
00347934
RS
1147 A_TYPE a;
1148 B_TYPE b;
1149 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1150 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1151 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1152 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
51312233 1153
00347934 1154 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
51312233 1155
00347934
RS
1156 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1157 name of the pattern being matched, for dump purposes. */
20f06221 1158
355fe088 1159static gimple *
ba9728b0 1160vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
00347934
RS
1161 tree_code orig_code, tree_code wide_code,
1162 bool shift_p, const char *name)
20f06221 1163{
ba9728b0 1164 gimple *last_stmt = last_stmt_info->stmt;
89d67cca 1165
00347934
RS
1166 vect_unpromoted_value unprom[2];
1167 tree half_type;
1168 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1169 shift_p, 2, unprom, &half_type))
89d67cca
DN
1170 return NULL;
1171
00347934
RS
1172 /* Pattern detected. */
1173 vect_pattern_detected (name, last_stmt);
89d67cca 1174
00347934 1175 tree type = gimple_expr_type (last_stmt);
d367387c 1176 tree itype = type;
00347934
RS
1177 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1178 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1179 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1180 TYPE_UNSIGNED (half_type));
89d67cca
DN
1181
1182 /* Check target support */
00347934
RS
1183 tree vectype = get_vectype_for_scalar_type (half_type);
1184 tree vecitype = get_vectype_for_scalar_type (itype);
1185 enum tree_code dummy_code;
1186 int dummy_int;
1187 auto_vec<tree> dummy_vec;
03d3e953 1188 if (!vectype
d367387c 1189 || !vecitype
86a91c0a 1190 || !supportable_widening_operation (wide_code, last_stmt_info,
d367387c 1191 vecitype, vectype,
a86ec597
RH
1192 &dummy_code, &dummy_code,
1193 &dummy_int, &dummy_vec))
89d67cca
DN
1194 return NULL;
1195
d367387c 1196 *type_out = get_vectype_for_scalar_type (type);
1cbfeccc
RS
1197 if (!*type_out)
1198 return NULL;
89d67cca 1199
00347934
RS
1200 tree oprnd[2];
1201 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
d367387c 1202
00347934
RS
1203 tree var = vect_recog_temp_ssa_var (itype, NULL);
1204 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1205 oprnd[0], oprnd[1]);
d367387c 1206
00347934 1207 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
20f06221
DN
1208}
1209
00347934
RS
1210/* Try to detect multiplication on widened inputs, converting MULT_EXPR
1211 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1212
1213static gimple *
ba9728b0 1214vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
00347934 1215{
ba9728b0 1216 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
00347934
RS
1217 WIDEN_MULT_EXPR, false,
1218 "vect_recog_widen_mult_pattern");
1219}
20f06221 1220
0b2229b0
RG
1221/* Function vect_recog_pow_pattern
1222
1223 Try to find the following pattern:
1224
1225 x = POW (y, N);
1226
1227 with POW being one of pow, powf, powi, powif and N being
1228 either 2 or 0.5.
1229
1230 Input:
1231
ba9728b0 1232 * STMT_VINFO: The stmt from which the pattern search begins.
0b2229b0
RG
1233
1234 Output:
1235
0b2229b0
RG
1236 * TYPE_OUT: The type of the output of this pattern.
1237
1238 * Return value: A new stmt that will be used to replace the sequence of
1239 stmts that constitute the pattern. In this case it will be:
726a989a 1240 x = x * x
0b2229b0 1241 or
726a989a 1242 x = sqrt (x)
0b2229b0
RG
1243*/
1244
355fe088 1245static gimple *
ba9728b0 1246vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
0b2229b0 1247{
ba9728b0 1248 gimple *last_stmt = stmt_vinfo->stmt;
848bb6fc 1249 tree base, exp;
355fe088 1250 gimple *stmt;
726a989a 1251 tree var;
0b2229b0 1252
51312233 1253 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1254 return NULL;
1255
7a31e5ef 1256 switch (gimple_call_combined_fn (last_stmt))
0b2229b0 1257 {
7a31e5ef
RS
1258 CASE_CFN_POW:
1259 CASE_CFN_POWI:
0b2229b0
RG
1260 break;
1261
726a989a
RB
1262 default:
1263 return NULL;
0b2229b0
RG
1264 }
1265
848bb6fc
JJ
1266 base = gimple_call_arg (last_stmt, 0);
1267 exp = gimple_call_arg (last_stmt, 1);
1268 if (TREE_CODE (exp) != REAL_CST
1269 && TREE_CODE (exp) != INTEGER_CST)
1270 {
1271 if (flag_unsafe_math_optimizations
1272 && TREE_CODE (base) == REAL_CST
1273 && !gimple_call_internal_p (last_stmt))
1274 {
1275 combined_fn log_cfn;
1276 built_in_function exp_bfn;
1277 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1278 {
1279 case BUILT_IN_POW:
1280 log_cfn = CFN_BUILT_IN_LOG;
1281 exp_bfn = BUILT_IN_EXP;
1282 break;
1283 case BUILT_IN_POWF:
1284 log_cfn = CFN_BUILT_IN_LOGF;
1285 exp_bfn = BUILT_IN_EXPF;
1286 break;
1287 case BUILT_IN_POWL:
1288 log_cfn = CFN_BUILT_IN_LOGL;
1289 exp_bfn = BUILT_IN_EXPL;
1290 break;
1291 default:
1292 return NULL;
1293 }
1294 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1295 tree exp_decl = builtin_decl_implicit (exp_bfn);
1296 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1297 does that, but if C is a power of 2, we want to use
1298 exp2 (log2 (C) * x) in the non-vectorized version, but for
1299 vectorization we don't have vectorized exp2. */
1300 if (logc
1301 && TREE_CODE (logc) == REAL_CST
1302 && exp_decl
1303 && lookup_attribute ("omp declare simd",
1304 DECL_ATTRIBUTES (exp_decl)))
1305 {
1306 cgraph_node *node = cgraph_node::get_create (exp_decl);
1307 if (node->simd_clones == NULL)
1308 {
73829f90
JJ
1309 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1310 || node->definition)
848bb6fc
JJ
1311 return NULL;
1312 expand_simd_clones (node);
1313 if (node->simd_clones == NULL)
1314 return NULL;
1315 }
1cbfeccc
RS
1316 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1317 if (!*type_out)
1318 return NULL;
848bb6fc
JJ
1319 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1320 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
9c58fb7a 1321 append_pattern_def_seq (stmt_vinfo, g);
848bb6fc
JJ
1322 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1323 g = gimple_build_call (exp_decl, 1, def);
1324 gimple_call_set_lhs (g, res);
1325 return g;
1326 }
1327 }
1328
1329 return NULL;
1330 }
1331
0b2229b0
RG
1332 /* We now have a pow or powi builtin function call with a constant
1333 exponent. */
1334
0b2229b0 1335 /* Catch squaring. */
9541ffee 1336 if ((tree_fits_shwi_p (exp)
9439e9a1 1337 && tree_to_shwi (exp) == 2)
0b2229b0 1338 || (TREE_CODE (exp) == REAL_CST
624d31fe 1339 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
c6b1b49b 1340 {
1cbfeccc
RS
1341 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1342 TREE_TYPE (base), type_out))
1343 return NULL;
726a989a
RB
1344
1345 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1346 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1347 return stmt;
c6b1b49b 1348 }
0b2229b0
RG
1349
1350 /* Catch square root. */
1351 if (TREE_CODE (exp) == REAL_CST
624d31fe 1352 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
0b2229b0 1353 {
1cbfeccc
RS
1354 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1355 if (*type_out
1356 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
d95ab70a 1357 OPTIMIZE_FOR_SPEED))
c6b1b49b 1358 {
b4e5bc47
RS
1359 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1360 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1361 gimple_call_set_lhs (stmt, var);
a844293d 1362 gimple_call_set_nothrow (stmt, true);
b4e5bc47 1363 return stmt;
c6b1b49b 1364 }
0b2229b0
RG
1365 }
1366
726a989a 1367 return NULL;
0b2229b0
RG
1368}
1369
1370
20f06221
DN
1371/* Function vect_recog_widen_sum_pattern
1372
1373 Try to find the following pattern:
1374
b8698a0f 1375 type x_t;
20f06221
DN
1376 TYPE x_T, sum = init;
1377 loop:
1378 sum_0 = phi <init, sum_1>
1379 S1 x_t = *p;
1380 S2 x_T = (TYPE) x_t;
1381 S3 sum_1 = x_T + sum_0;
1382
b8698a0f 1383 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1384 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1385 a special case of a reduction computation.
20f06221
DN
1386
1387 Input:
1388
ba9728b0 1389 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
20f06221 1390 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1391
20f06221 1392 Output:
b8698a0f 1393
20f06221
DN
1394 * TYPE_OUT: The type of the output of this pattern.
1395
1396 * Return value: A new stmt that will be used to replace the sequence of
1397 stmts that constitute the pattern. In this case it will be:
1398 WIDEN_SUM <x_t, sum_0>
d29de1bf 1399
b8698a0f 1400 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1401 vectorized without preserving all the intermediate results. It
b8698a0f
L
1402 produces only N/2 (widened) results (by summing up pairs of
1403 intermediate results) rather than all N results. Therefore, we
1404 cannot allow this pattern when we want to get all the results and in
1405 the correct order (as is the case when this computation is in an
d29de1bf 1406 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1407
355fe088 1408static gimple *
ba9728b0 1409vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
20f06221 1410{
ba9728b0 1411 gimple *last_stmt = stmt_vinfo->stmt;
20f06221 1412 tree oprnd0, oprnd1;
00347934
RS
1413 vec_info *vinfo = stmt_vinfo->vinfo;
1414 tree type;
355fe088 1415 gimple *pattern_stmt;
726a989a 1416 tree var;
20f06221 1417
20f06221
DN
1418 /* Look for the following pattern
1419 DX = (TYPE) X;
1420 sum_1 = DX + sum_0;
1421 In which DX is at least double the size of X, and sum_1 has been
1422 recognized as a reduction variable.
1423 */
1424
1425 /* Starting from LAST_STMT, follow the defs of its uses in search
1426 of the above pattern. */
1427
7b98e98a
RS
1428 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1429 &oprnd0, &oprnd1))
b308d872
RB
1430 return NULL;
1431
7b98e98a 1432 type = gimple_expr_type (last_stmt);
20f06221 1433
51312233 1434 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1435 we know that oprnd1 is the reduction variable (defined by a loop-header
1436 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1437 Left to check that oprnd0 is defined by a cast from type 'type' to type
1438 'TYPE'. */
1439
00347934
RS
1440 vect_unpromoted_value unprom0;
1441 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1442 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1cbfeccc 1443 return NULL;
20f06221 1444
49d8df1b
RS
1445 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1446
00347934 1447 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1cbfeccc
RS
1448 type_out))
1449 return NULL;
20f06221 1450
726a989a 1451 var = vect_recog_temp_ssa_var (type, NULL);
00347934 1452 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
726a989a 1453
726a989a 1454 return pattern_stmt;
20f06221
DN
1455}
1456
370c2ebe
RS
1457/* Recognize cases in which an operation is performed in one type WTYPE
1458 but could be done more efficiently in a narrower type NTYPE. For example,
1459 if we have:
20f06221 1460
370c2ebe
RS
1461 ATYPE a; // narrower than NTYPE
1462 BTYPE b; // narrower than NTYPE
1463 WTYPE aw = (WTYPE) a;
1464 WTYPE bw = (WTYPE) b;
1465 WTYPE res = aw + bw; // only uses of aw and bw
1107f3ae 1466
370c2ebe 1467 then it would be more efficient to do:
1107f3ae 1468
370c2ebe
RS
1469 NTYPE an = (NTYPE) a;
1470 NTYPE bn = (NTYPE) b;
1471 NTYPE resn = an + bn;
1472 WTYPE res = (WTYPE) resn;
1107f3ae 1473
370c2ebe 1474 Other situations include things like:
f5709183 1475
370c2ebe
RS
1476 ATYPE a; // NTYPE or narrower
1477 WTYPE aw = (WTYPE) a;
1478 WTYPE res = aw + b;
1107f3ae 1479
370c2ebe
RS
1480 when only "(NTYPE) res" is significant. In that case it's more efficient
1481 to truncate "b" and do the operation on NTYPE instead:
1107f3ae 1482
370c2ebe
RS
1483 NTYPE an = (NTYPE) a;
1484 NTYPE bn = (NTYPE) b; // truncation
1485 NTYPE resn = an + bn;
1486 WTYPE res = (WTYPE) resn;
1107f3ae 1487
370c2ebe
RS
1488 All users of "res" should then use "resn" instead, making the final
1489 statement dead (not marked as relevant). The final statement is still
1490 needed to maintain the type correctness of the IR.
1107f3ae 1491
370c2ebe
RS
1492 vect_determine_precisions has already determined the minimum
1493 precison of the operation and the minimum precision required
1494 by users of the result. */
1107f3ae 1495
370c2ebe 1496static gimple *
ba9728b0 1497vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
370c2ebe 1498{
ba9728b0 1499 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
370c2ebe
RS
1500 if (!last_stmt)
1501 return NULL;
9ef7adc0 1502
370c2ebe
RS
1503 /* See whether we have found that this operation can be done on a
1504 narrower type without changing its semantics. */
370c2ebe
RS
1505 unsigned int new_precision = last_stmt_info->operation_precision;
1506 if (!new_precision)
1507 return NULL;
1107f3ae 1508
370c2ebe
RS
1509 vec_info *vinfo = last_stmt_info->vinfo;
1510 tree lhs = gimple_assign_lhs (last_stmt);
1511 tree type = TREE_TYPE (lhs);
1512 tree_code code = gimple_assign_rhs_code (last_stmt);
1513
1514 /* Keep the first operand of a COND_EXPR as-is: only the other two
1515 operands are interesting. */
1516 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1517
1518 /* Check the operands. */
1519 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1520 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1521 unprom.quick_grow (nops);
1522 unsigned int min_precision = 0;
1523 bool single_use_p = false;
1524 for (unsigned int i = 0; i < nops; ++i)
1107f3ae 1525 {
370c2ebe
RS
1526 tree op = gimple_op (last_stmt, first_op + i);
1527 if (TREE_CODE (op) == INTEGER_CST)
1528 unprom[i].set_op (op, vect_constant_def);
1529 else if (TREE_CODE (op) == SSA_NAME)
1530 {
1531 bool op_single_use_p = true;
1532 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1533 &op_single_use_p))
1534 return NULL;
1535 /* If:
1107f3ae 1536
370c2ebe
RS
1537 (1) N bits of the result are needed;
1538 (2) all inputs are widened from M<N bits; and
1539 (3) one operand OP is a single-use SSA name
1107f3ae 1540
370c2ebe
RS
1541 we can shift the M->N widening from OP to the output
1542 without changing the number or type of extensions involved.
1543 This then reduces the number of copies of STMT_INFO.
1107f3ae 1544
370c2ebe
RS
1545 If instead of (3) more than one operand is a single-use SSA name,
1546 shifting the extension to the output is even more of a win.
1107f3ae 1547
370c2ebe 1548 If instead:
1107f3ae 1549
370c2ebe
RS
1550 (1) N bits of the result are needed;
1551 (2) one operand OP2 is widened from M2<N bits;
1552 (3) another operand OP1 is widened from M1<M2 bits; and
1553 (4) both OP1 and OP2 are single-use
1107f3ae 1554
370c2ebe 1555 the choice is between:
1107f3ae 1556
370c2ebe
RS
1557 (a) truncating OP2 to M1, doing the operation on M1,
1558 and then widening the result to N
1107f3ae 1559
370c2ebe
RS
1560 (b) widening OP1 to M2, doing the operation on M2, and then
1561 widening the result to N
1107f3ae 1562
370c2ebe
RS
1563 Both shift the M2->N widening of the inputs to the output.
1564 (a) additionally shifts the M1->M2 widening to the output;
1565 it requires fewer copies of STMT_INFO but requires an extra
1566 M2->M1 truncation.
1107f3ae 1567
370c2ebe
RS
1568 Which is better will depend on the complexity and cost of
1569 STMT_INFO, which is hard to predict at this stage. However,
1570 a clear tie-breaker in favor of (b) is the fact that the
1571 truncation in (a) increases the length of the operation chain.
1107f3ae 1572
370c2ebe
RS
1573 If instead of (4) only one of OP1 or OP2 is single-use,
1574 (b) is still a win over doing the operation in N bits:
1575 it still shifts the M2->N widening on the single-use operand
1576 to the output and reduces the number of STMT_INFO copies.
1107f3ae 1577
370c2ebe
RS
1578 If neither operand is single-use then operating on fewer than
1579 N bits might lead to more extensions overall. Whether it does
1580 or not depends on global information about the vectorization
1581 region, and whether that's a good trade-off would again
1582 depend on the complexity and cost of the statements involved,
1583 as well as things like register pressure that are not normally
1584 modelled at this stage. We therefore ignore these cases
1585 and just optimize the clear single-use wins above.
1107f3ae 1586
370c2ebe
RS
1587 Thus we take the maximum precision of the unpromoted operands
1588 and record whether any operand is single-use. */
1589 if (unprom[i].dt == vect_internal_def)
1590 {
1591 min_precision = MAX (min_precision,
1592 TYPE_PRECISION (unprom[i].type));
1593 single_use_p |= op_single_use_p;
1594 }
1595 }
1107f3ae
IR
1596 }
1597
370c2ebe
RS
1598 /* Although the operation could be done in operation_precision, we have
1599 to balance that against introducing extra truncations or extensions.
1600 Calculate the minimum precision that can be handled efficiently.
1601
1602 The loop above determined that the operation could be handled
1603 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1604 extension from the inputs to the output without introducing more
1605 instructions, and would reduce the number of instructions required
1606 for STMT_INFO itself.
1607
1608 vect_determine_precisions has also determined that the result only
1609 needs min_output_precision bits. Truncating by a factor of N times
1610 requires a tree of N - 1 instructions, so if TYPE is N times wider
1611 than min_output_precision, doing the operation in TYPE and truncating
1612 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1613 In contrast:
1614
1615 - truncating the input to a unary operation and doing the operation
1616 in the new type requires at most N - 1 + 1 = N instructions per
1617 output vector
1618
1619 - doing the same for a binary operation requires at most
1620 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1621
1622 Both unary and binary operations require fewer instructions than
1623 this if the operands were extended from a suitable truncated form.
1624 Thus there is usually nothing to lose by doing operations in
1625 min_output_precision bits, but there can be something to gain. */
1626 if (!single_use_p)
1627 min_precision = last_stmt_info->min_output_precision;
1107f3ae 1628 else
370c2ebe
RS
1629 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1630
1631 /* Apply the minimum efficient precision we just calculated. */
1632 if (new_precision < min_precision)
1633 new_precision = min_precision;
1634 if (new_precision >= TYPE_PRECISION (type))
1635 return NULL;
1636
1637 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1638
1639 *type_out = get_vectype_for_scalar_type (type);
1640 if (!*type_out)
1641 return NULL;
1642
1643 /* We've found a viable pattern. Get the new type of the operation. */
1644 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1645 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1646
1647 /* We specifically don't check here whether the target supports the
1648 new operation, since it might be something that a later pattern
1649 wants to rewrite anyway. If targets have a minimum element size
1650 for some optabs, we should pattern-match smaller ops to larger ops
1651 where beneficial. */
1652 tree new_vectype = get_vectype_for_scalar_type (new_type);
1653 if (!new_vectype)
1654 return NULL;
1107f3ae 1655
370c2ebe 1656 if (dump_enabled_p ())
3c2a8ed0
DM
1657 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1658 type, new_type);
1107f3ae 1659
370c2ebe 1660 /* Calculate the rhs operands for an operation on NEW_TYPE. */
370c2ebe
RS
1661 tree ops[3] = {};
1662 for (unsigned int i = 1; i < first_op; ++i)
1663 ops[i - 1] = gimple_op (last_stmt, i);
1664 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
1665 new_type, &unprom[0], new_vectype);
1666
1667 /* Use the operation to produce a result of type NEW_TYPE. */
1668 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1669 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1670 ops[0], ops[1], ops[2]);
1671 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1672
1673 if (dump_enabled_p ())
3c2a8ed0
DM
1674 dump_printf_loc (MSG_NOTE, vect_location,
1675 "created pattern stmt: %G", pattern_stmt);
1107f3ae 1676
370c2ebe
RS
1677 pattern_stmt = vect_convert_output (last_stmt_info, type,
1678 pattern_stmt, new_vectype);
1107f3ae 1679
370c2ebe 1680 return pattern_stmt;
1107f3ae
IR
1681}
1682
0267732b
RS
1683/* Recognize the patterns:
1684
1685 ATYPE a; // narrower than TYPE
1686 BTYPE b; // narrower than TYPE
1687 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1688 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1689
1690 where only the bottom half of avg is used. Try to transform them into:
1691
1692 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1693 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1694
1695 followed by:
1696
1697 TYPE avg = (TYPE) avg';
1698
1699 where NTYPE is no wider than half of TYPE. Since only the bottom half
1700 of avg is used, all or part of the cast of avg' should become redundant. */
1701
1702static gimple *
ba9728b0 1703vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
0267732b
RS
1704{
1705 /* Check for a shift right by one bit. */
ba9728b0
RS
1706 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1707 vec_info *vinfo = last_stmt_info->vinfo;
0267732b
RS
1708 if (!last_stmt
1709 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1710 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1711 return NULL;
1712
0267732b
RS
1713 /* Check that the shift result is wider than the users of the
1714 result need (i.e. that narrowing would be a natural choice). */
1715 tree lhs = gimple_assign_lhs (last_stmt);
1716 tree type = TREE_TYPE (lhs);
1717 unsigned int target_precision
1718 = vect_element_precision (last_stmt_info->min_output_precision);
1719 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1720 return NULL;
1721
1722 /* Get the definition of the shift input. */
1723 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
1724 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1725 if (!plus_stmt_info)
1726 return NULL;
1727
1728 /* Check whether the shift input can be seen as a tree of additions on
1729 2 or 3 widened inputs.
1730
1731 Note that the pattern should be a win even if the result of one or
1732 more additions is reused elsewhere: if the pattern matches, we'd be
1733 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1734 internal_fn ifn = IFN_AVG_FLOOR;
1735 vect_unpromoted_value unprom[3];
1736 tree new_type;
1737 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1738 PLUS_EXPR, false, 3,
1739 unprom, &new_type);
1740 if (nops == 0)
1741 return NULL;
1742 if (nops == 3)
1743 {
1744 /* Check that one operand is 1. */
1745 unsigned int i;
1746 for (i = 0; i < 3; ++i)
1747 if (integer_onep (unprom[i].op))
1748 break;
1749 if (i == 3)
1750 return NULL;
1751 /* Throw away the 1 operand and keep the other two. */
1752 if (i < 2)
1753 unprom[i] = unprom[2];
1754 ifn = IFN_AVG_CEIL;
1755 }
1756
1757 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1758
1759 /* We know that:
1760
1761 (a) the operation can be viewed as:
1762
1763 TYPE widened0 = (TYPE) UNPROM[0];
1764 TYPE widened1 = (TYPE) UNPROM[1];
1765 TYPE tmp1 = widened0 + widened1 {+ 1};
1766 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1767
1768 (b) the first two statements are equivalent to:
1769
1770 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1771 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1772
1773 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1774 where sensible;
1775
1776 (d) all the operations can be performed correctly at twice the width of
1777 NEW_TYPE, due to the nature of the average operation; and
1778
1779 (e) users of the result of the right shift need only TARGET_PRECISION
1780 bits, where TARGET_PRECISION is no more than half of TYPE's
1781 precision.
1782
1783 Under these circumstances, the only situation in which NEW_TYPE
1784 could be narrower than TARGET_PRECISION is if widened0, widened1
1785 and an addition result are all used more than once. Thus we can
1786 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1787 as "free", whereas widening the result of the average instruction
1788 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1789 therefore better not to go narrower than TARGET_PRECISION. */
1790 if (TYPE_PRECISION (new_type) < target_precision)
1791 new_type = build_nonstandard_integer_type (target_precision,
1792 TYPE_UNSIGNED (new_type));
1793
1794 /* Check for target support. */
1795 tree new_vectype = get_vectype_for_scalar_type (new_type);
1796 if (!new_vectype
1797 || !direct_internal_fn_supported_p (ifn, new_vectype,
1798 OPTIMIZE_FOR_SPEED))
1799 return NULL;
1800
1801 /* The IR requires a valid vector type for the cast result, even though
1802 it's likely to be discarded. */
1803 *type_out = get_vectype_for_scalar_type (type);
1804 if (!*type_out)
1805 return NULL;
1806
1807 /* Generate the IFN_AVG* call. */
1808 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1809 tree new_ops[2];
1810 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1811 unprom, new_vectype);
1812 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1813 new_ops[1]);
1814 gimple_call_set_lhs (average_stmt, new_var);
1815 gimple_set_location (average_stmt, gimple_location (last_stmt));
1816
1817 if (dump_enabled_p ())
3c2a8ed0
DM
1818 dump_printf_loc (MSG_NOTE, vect_location,
1819 "created pattern stmt: %G", average_stmt);
0267732b 1820
0267732b
RS
1821 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1822}
1823
370c2ebe
RS
1824/* Recognize cases in which the input to a cast is wider than its
1825 output, and the input is fed by a widening operation. Fold this
1826 by removing the unnecessary intermediate widening. E.g.:
1107f3ae 1827
370c2ebe
RS
1828 unsigned char a;
1829 unsigned int b = (unsigned int) a;
1830 unsigned short c = (unsigned short) b;
1107f3ae 1831
370c2ebe 1832 -->
1107f3ae 1833
370c2ebe 1834 unsigned short c = (unsigned short) a;
1107f3ae 1835
370c2ebe
RS
1836 Although this is rare in input IR, it is an expected side-effect
1837 of the over-widening pattern above.
1107f3ae 1838
370c2ebe
RS
1839 This is beneficial also for integer-to-float conversions, if the
1840 widened integer has more bits than the float, and if the unwidened
1841 input doesn't. */
1107f3ae 1842
370c2ebe 1843static gimple *
ba9728b0 1844vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
370c2ebe
RS
1845{
1846 /* Check for a cast, including an integer-to-float conversion. */
ba9728b0 1847 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
370c2ebe
RS
1848 if (!last_stmt)
1849 return NULL;
1850 tree_code code = gimple_assign_rhs_code (last_stmt);
1851 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1852 return NULL;
1107f3ae 1853
370c2ebe
RS
1854 /* Make sure that the rhs is a scalar with a natural bitsize. */
1855 tree lhs = gimple_assign_lhs (last_stmt);
1856 if (!lhs)
1857 return NULL;
1858 tree lhs_type = TREE_TYPE (lhs);
1859 scalar_mode lhs_mode;
1860 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1861 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1862 return NULL;
1107f3ae 1863
370c2ebe
RS
1864 /* Check for a narrowing operation (from a vector point of view). */
1865 tree rhs = gimple_assign_rhs1 (last_stmt);
1866 tree rhs_type = TREE_TYPE (rhs);
1867 if (!INTEGRAL_TYPE_P (rhs_type)
1868 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1869 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1870 return NULL;
82db3d43 1871
370c2ebe 1872 /* Try to find an unpromoted input. */
370c2ebe
RS
1873 vec_info *vinfo = last_stmt_info->vinfo;
1874 vect_unpromoted_value unprom;
1875 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1876 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1877 return NULL;
1cbfeccc 1878
370c2ebe
RS
1879 /* If the bits above RHS_TYPE matter, make sure that they're the
1880 same when extending from UNPROM as they are when extending from RHS. */
1881 if (!INTEGRAL_TYPE_P (lhs_type)
1882 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1883 return NULL;
1107f3ae 1884
370c2ebe
RS
1885 /* We can get the same result by casting UNPROM directly, to avoid
1886 the unnecessary widening and narrowing. */
1887 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1107f3ae 1888
370c2ebe
RS
1889 *type_out = get_vectype_for_scalar_type (lhs_type);
1890 if (!*type_out)
1107f3ae
IR
1891 return NULL;
1892
370c2ebe
RS
1893 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1894 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1895 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1107f3ae
IR
1896
1897 return pattern_stmt;
1898}
1899
00347934
RS
1900/* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1901 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
36ba4aae 1902
355fe088 1903static gimple *
ba9728b0 1904vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
36ba4aae 1905{
ba9728b0 1906 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
00347934 1907 WIDEN_LSHIFT_EXPR, true,
d379ac22 1908 "vect_recog_widen_shift_pattern");
7e9a3abb
JJ
1909}
1910
1911/* Detect a rotate pattern wouldn't be otherwise vectorized:
1912
1913 type a_t, b_t, c_t;
1914
1915 S0 a_t = b_t r<< c_t;
1916
1917 Input/Output:
1918
ba9728b0 1919 * STMT_VINFO: The stmt from which the pattern search begins,
7e9a3abb
JJ
1920 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1921 with a sequence:
1922
1923 S1 d_t = -c_t;
1924 S2 e_t = d_t & (B - 1);
1925 S3 f_t = b_t << c_t;
1926 S4 g_t = b_t >> e_t;
1927 S0 a_t = f_t | g_t;
1928
1929 where B is element bitsize of type.
1930
1931 Output:
1932
7e9a3abb
JJ
1933 * TYPE_OUT: The type of the output of this pattern.
1934
1935 * Return value: A new stmt that will be used to replace the rotate
1936 S0 stmt. */
1937
355fe088 1938static gimple *
ba9728b0 1939vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
7e9a3abb 1940{
ba9728b0 1941 gimple *last_stmt = stmt_vinfo->stmt;
7e9a3abb 1942 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
355fe088 1943 gimple *pattern_stmt, *def_stmt;
7e9a3abb 1944 enum tree_code rhs_code;
310213d4 1945 vec_info *vinfo = stmt_vinfo->vinfo;
7e9a3abb
JJ
1946 enum vect_def_type dt;
1947 optab optab1, optab2;
68119618 1948 edge ext_def = NULL;
7e9a3abb
JJ
1949
1950 if (!is_gimple_assign (last_stmt))
1951 return NULL;
1952
1953 rhs_code = gimple_assign_rhs_code (last_stmt);
1954 switch (rhs_code)
1955 {
1956 case LROTATE_EXPR:
1957 case RROTATE_EXPR:
1958 break;
1959 default:
1960 return NULL;
1961 }
1962
7e9a3abb
JJ
1963 lhs = gimple_assign_lhs (last_stmt);
1964 oprnd0 = gimple_assign_rhs1 (last_stmt);
1965 type = TREE_TYPE (oprnd0);
1966 oprnd1 = gimple_assign_rhs2 (last_stmt);
1967 if (TREE_CODE (oprnd0) != SSA_NAME
1968 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1969 || !INTEGRAL_TYPE_P (type)
1970 || !TYPE_UNSIGNED (type))
1971 return NULL;
1972
fef96d8e
RS
1973 stmt_vec_info def_stmt_info;
1974 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
7e9a3abb
JJ
1975 return NULL;
1976
1977 if (dt != vect_internal_def
1978 && dt != vect_constant_def
1979 && dt != vect_external_def)
1980 return NULL;
1981
1982 vectype = get_vectype_for_scalar_type (type);
1983 if (vectype == NULL_TREE)
1984 return NULL;
1985
1986 /* If vector/vector or vector/scalar rotate is supported by the target,
1987 don't do anything here. */
1988 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1989 if (optab1
1990 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1991 return NULL;
1992
310213d4 1993 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
7e9a3abb
JJ
1994 {
1995 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1996 if (optab2
1997 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1998 return NULL;
1999 }
2000
2001 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2002 don't do anything here either. */
2003 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2004 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2005 if (!optab1
2006 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2007 || !optab2
2008 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2009 {
310213d4 2010 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
7e9a3abb
JJ
2011 return NULL;
2012 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2013 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2014 if (!optab1
2015 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2016 || !optab2
2017 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2018 return NULL;
2019 }
2020
7e9a3abb 2021 *type_out = vectype;
7e9a3abb 2022
68119618 2023 if (dt == vect_external_def
3330053e
RS
2024 && TREE_CODE (oprnd1) == SSA_NAME)
2025 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
68119618 2026
7e9a3abb 2027 def = NULL_TREE;
7a504f33 2028 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
7e9a3abb 2029 if (TREE_CODE (oprnd1) == INTEGER_CST
7a504f33 2030 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
7e9a3abb
JJ
2031 def = oprnd1;
2032 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2033 {
2034 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7a504f33 2035 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
7e9a3abb
JJ
2036 && TYPE_PRECISION (TREE_TYPE (rhs1))
2037 == TYPE_PRECISION (type))
2038 def = rhs1;
2039 }
2040
7e9a3abb
JJ
2041 if (def == NULL_TREE)
2042 {
2043 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2044 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
68119618
JJ
2045 if (ext_def)
2046 {
2047 basic_block new_bb
2048 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2049 gcc_assert (!new_bb);
2050 }
2051 else
2052 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
2053 }
2054 stype = TREE_TYPE (def);
7a504f33 2055 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
7e9a3abb
JJ
2056
2057 if (TREE_CODE (def) == INTEGER_CST)
2058 {
cc269bb6 2059 if (!tree_fits_uhwi_p (def)
7a504f33 2060 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
7e9a3abb
JJ
2061 || integer_zerop (def))
2062 return NULL;
2063 def2 = build_int_cst (stype,
7a504f33 2064 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
7e9a3abb
JJ
2065 }
2066 else
2067 {
2068 tree vecstype = get_vectype_for_scalar_type (stype);
7e9a3abb
JJ
2069
2070 if (vecstype == NULL_TREE)
2071 return NULL;
2072 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 2073 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
2074 if (ext_def)
2075 {
2076 basic_block new_bb
2077 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2078 gcc_assert (!new_bb);
2079 }
2080 else
776bfcea 2081 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
7e9a3abb
JJ
2082
2083 def2 = vect_recog_temp_ssa_var (stype, NULL);
7a504f33 2084 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
0d0e4a03
JJ
2085 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2086 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
2087 if (ext_def)
2088 {
2089 basic_block new_bb
2090 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2091 gcc_assert (!new_bb);
2092 }
2093 else
776bfcea 2094 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
7e9a3abb
JJ
2095 }
2096
2097 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2098 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2099 ? LSHIFT_EXPR : RSHIFT_EXPR,
2100 oprnd0, def);
7e9a3abb
JJ
2101 append_pattern_def_seq (stmt_vinfo, def_stmt);
2102
2103 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2104 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2105 ? RSHIFT_EXPR : LSHIFT_EXPR,
2106 oprnd0, def2);
7e9a3abb
JJ
2107 append_pattern_def_seq (stmt_vinfo, def_stmt);
2108
2109 /* Pattern detected. */
49d8df1b 2110 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
7e9a3abb
JJ
2111
2112 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2113 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2114 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb 2115
36ba4aae
IR
2116 return pattern_stmt;
2117}
1107f3ae 2118
732a0ad3
JJ
2119/* Detect a vector by vector shift pattern that wouldn't be otherwise
2120 vectorized:
2121
2122 type a_t;
2123 TYPE b_T, res_T;
2124
2125 S1 a_t = ;
2126 S2 b_T = ;
2127 S3 res_T = b_T op a_t;
2128
2129 where type 'TYPE' is a type with different size than 'type',
2130 and op is <<, >> or rotate.
2131
2132 Also detect cases:
2133
2134 type a_t;
2135 TYPE b_T, c_T, res_T;
2136
2137 S0 c_T = ;
2138 S1 a_t = (type) c_T;
2139 S2 b_T = ;
2140 S3 res_T = b_T op a_t;
2141
2142 Input/Output:
2143
ba9728b0 2144 * STMT_VINFO: The stmt from which the pattern search begins,
732a0ad3
JJ
2145 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2146 with a shift/rotate which has same type on both operands, in the
2147 second case just b_T op c_T, in the first case with added cast
363477c0 2148 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2149
2150 Output:
2151
732a0ad3
JJ
2152 * TYPE_OUT: The type of the output of this pattern.
2153
2154 * Return value: A new stmt that will be used to replace the shift/rotate
2155 S3 stmt. */
2156
355fe088 2157static gimple *
ba9728b0
RS
2158vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2159 tree *type_out)
732a0ad3 2160{
ba9728b0 2161 gimple *last_stmt = stmt_vinfo->stmt;
732a0ad3 2162 tree oprnd0, oprnd1, lhs, var;
25927307 2163 gimple *pattern_stmt;
732a0ad3 2164 enum tree_code rhs_code;
310213d4 2165 vec_info *vinfo = stmt_vinfo->vinfo;
732a0ad3
JJ
2166
2167 if (!is_gimple_assign (last_stmt))
2168 return NULL;
2169
2170 rhs_code = gimple_assign_rhs_code (last_stmt);
2171 switch (rhs_code)
2172 {
2173 case LSHIFT_EXPR:
2174 case RSHIFT_EXPR:
2175 case LROTATE_EXPR:
2176 case RROTATE_EXPR:
2177 break;
2178 default:
2179 return NULL;
2180 }
2181
732a0ad3
JJ
2182 lhs = gimple_assign_lhs (last_stmt);
2183 oprnd0 = gimple_assign_rhs1 (last_stmt);
2184 oprnd1 = gimple_assign_rhs2 (last_stmt);
2185 if (TREE_CODE (oprnd0) != SSA_NAME
2186 || TREE_CODE (oprnd1) != SSA_NAME
2187 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2be65d9e 2188 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
732a0ad3
JJ
2189 || TYPE_PRECISION (TREE_TYPE (lhs))
2190 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2191 return NULL;
2192
25927307
RS
2193 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2194 if (!def_vinfo)
732a0ad3
JJ
2195 return NULL;
2196
1cbfeccc
RS
2197 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2198 if (*type_out == NULL_TREE)
732a0ad3
JJ
2199 return NULL;
2200
81c40241 2201 tree def = NULL_TREE;
25927307 2202 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
0f8c840c 2203 if (def_stmt && gimple_assign_cast_p (def_stmt))
732a0ad3
JJ
2204 {
2205 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2206 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2207 && TYPE_PRECISION (TREE_TYPE (rhs1))
2208 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
0179520a
JJ
2209 {
2210 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2211 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2212 def = rhs1;
2213 else
2214 {
2215 tree mask
2216 = build_low_bits_mask (TREE_TYPE (rhs1),
2217 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2218 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2219 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
776bfcea
RS
2220 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2221 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
0179520a
JJ
2222 }
2223 }
732a0ad3
JJ
2224 }
2225
2226 if (def == NULL_TREE)
2227 {
2228 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2229 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
9c58fb7a 2230 append_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2231 }
2232
2233 /* Pattern detected. */
49d8df1b 2234 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
732a0ad3
JJ
2235
2236 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2237 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2238 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2239
732a0ad3
JJ
2240 return pattern_stmt;
2241}
2242
53109ba8
KT
2243/* Return true iff the target has a vector optab implementing the operation
2244 CODE on type VECTYPE. */
47486460 2245
53109ba8
KT
2246static bool
2247target_has_vecop_for_code (tree_code code, tree vectype)
2248{
2249 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2250 return voptab
2251 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2252}
47486460 2253
53109ba8
KT
2254/* Verify that the target has optabs of VECTYPE to perform all the steps
2255 needed by the multiplication-by-immediate synthesis algorithm described by
2256 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2257 present. Return true iff the target supports all the steps. */
2258
2259static bool
2260target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2261 tree vectype, bool synth_shift_p)
2262{
2263 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2264 return false;
2265
2266 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2267 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2268
2269 if (var == negate_variant
2270 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2271 return false;
2272
2273 /* If we must synthesize shifts with additions make sure that vector
2274 addition is available. */
2275 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2276 return false;
2277
2278 for (int i = 1; i < alg->ops; i++)
2279 {
2280 switch (alg->op[i])
2281 {
2282 case alg_shift:
2283 break;
2284 case alg_add_t_m2:
2285 case alg_add_t2_m:
2286 case alg_add_factor:
2287 if (!supports_vplus)
2288 return false;
2289 break;
2290 case alg_sub_t_m2:
2291 case alg_sub_t2_m:
2292 case alg_sub_factor:
2293 if (!supports_vminus)
2294 return false;
2295 break;
2296 case alg_unknown:
2297 case alg_m:
2298 case alg_zero:
2299 case alg_impossible:
2300 return false;
2301 default:
2302 gcc_unreachable ();
2303 }
2304 }
2305
2306 return true;
2307}
2308
2309/* Synthesize a left shift of OP by AMNT bits using a series of additions and
2310 putting the final result in DEST. Append all statements but the last into
2311 VINFO. Return the last statement. */
2312
2313static gimple *
2314synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2315 stmt_vec_info vinfo)
2316{
2317 HOST_WIDE_INT i;
2318 tree itype = TREE_TYPE (op);
2319 tree prev_res = op;
2320 gcc_assert (amnt >= 0);
2321 for (i = 0; i < amnt; i++)
2322 {
2323 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2324 : dest;
2325 gimple *stmt
2326 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2327 prev_res = tmp_var;
2328 if (i < amnt - 1)
2329 append_pattern_def_seq (vinfo, stmt);
2330 else
2331 return stmt;
2332 }
2333 gcc_unreachable ();
2334 return NULL;
2335}
2336
2337/* Helper for vect_synth_mult_by_constant. Apply a binary operation
2338 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2339 the process if necessary. Append the resulting assignment statements
2340 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2341 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2342 left shifts using additions. */
2343
2344static tree
2345apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2346 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2347{
2348 if (integer_zerop (op2)
2349 && (code == LSHIFT_EXPR
2350 || code == PLUS_EXPR))
2351 {
2352 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2353 return op1;
2354 }
2355
2356 gimple *stmt;
2357 tree itype = TREE_TYPE (op1);
2358 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2359
2360 if (code == LSHIFT_EXPR
2361 && synth_shift_p)
2362 {
2363 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2364 stmt_vinfo);
2365 append_pattern_def_seq (stmt_vinfo, stmt);
2366 return tmp_var;
2367 }
2368
2369 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2370 append_pattern_def_seq (stmt_vinfo, stmt);
2371 return tmp_var;
2372}
2373
2374/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2375 and simple arithmetic operations to be vectorized. Record the statements
2376 produced in STMT_VINFO and return the last statement in the sequence or
2377 NULL if it's not possible to synthesize such a multiplication.
2378 This function mirrors the behavior of expand_mult_const in expmed.c but
2379 works on tree-ssa form. */
2380
2381static gimple *
2382vect_synth_mult_by_constant (tree op, tree val,
2383 stmt_vec_info stmt_vinfo)
2384{
2385 tree itype = TREE_TYPE (op);
2386 machine_mode mode = TYPE_MODE (itype);
2387 struct algorithm alg;
2388 mult_variant variant;
2389 if (!tree_fits_shwi_p (val))
2390 return NULL;
2391
2392 /* Multiplication synthesis by shifts, adds and subs can introduce
2393 signed overflow where the original operation didn't. Perform the
2394 operations on an unsigned type and cast back to avoid this.
2395 In the future we may want to relax this for synthesis algorithms
2396 that we can prove do not cause unexpected overflow. */
2397 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2398
2399 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
47486460 2400
53109ba8
KT
2401 /* Targets that don't support vector shifts but support vector additions
2402 can synthesize shifts that way. */
2403 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2404
2405 HOST_WIDE_INT hwval = tree_to_shwi (val);
2406 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2407 The vectorizer's benefit analysis will decide whether it's beneficial
2408 to do this. */
2409 bool possible = choose_mult_variant (mode, hwval, &alg,
2410 &variant, MAX_COST);
2411 if (!possible)
2412 return NULL;
2413
2414 tree vectype = get_vectype_for_scalar_type (multtype);
2415
2416 if (!vectype
2417 || !target_supports_mult_synth_alg (&alg, variant,
2418 vectype, synth_shift_p))
2419 return NULL;
2420
2421 tree accumulator;
2422
2423 /* Clear out the sequence of statements so we can populate it below. */
53109ba8
KT
2424 gimple *stmt = NULL;
2425
2426 if (cast_to_unsigned_p)
2427 {
2428 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2429 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2430 append_pattern_def_seq (stmt_vinfo, stmt);
2431 op = tmp_op;
2432 }
2433
2434 if (alg.op[0] == alg_zero)
2435 accumulator = build_int_cst (multtype, 0);
2436 else
2437 accumulator = op;
2438
2439 bool needs_fixup = (variant == negate_variant)
2440 || (variant == add_variant);
2441
2442 for (int i = 1; i < alg.ops; i++)
2443 {
2444 tree shft_log = build_int_cst (multtype, alg.log[i]);
2445 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2446 tree tmp_var = NULL_TREE;
2447
2448 switch (alg.op[i])
2449 {
2450 case alg_shift:
2451 if (synth_shift_p)
2452 stmt
2453 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2454 stmt_vinfo);
2455 else
2456 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2457 shft_log);
2458 break;
2459 case alg_add_t_m2:
2460 tmp_var
2461 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2462 stmt_vinfo, synth_shift_p);
2463 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2464 tmp_var);
2465 break;
2466 case alg_sub_t_m2:
2467 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2468 shft_log, stmt_vinfo,
2469 synth_shift_p);
2470 /* In some algorithms the first step involves zeroing the
2471 accumulator. If subtracting from such an accumulator
2472 just emit the negation directly. */
2473 if (integer_zerop (accumulator))
2474 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2475 else
2476 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2477 tmp_var);
2478 break;
2479 case alg_add_t2_m:
2480 tmp_var
2481 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2482 stmt_vinfo, synth_shift_p);
2483 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2484 break;
2485 case alg_sub_t2_m:
2486 tmp_var
2487 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2488 stmt_vinfo, synth_shift_p);
2489 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2490 break;
2491 case alg_add_factor:
2492 tmp_var
2493 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2494 stmt_vinfo, synth_shift_p);
2495 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2496 tmp_var);
2497 break;
2498 case alg_sub_factor:
2499 tmp_var
2500 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2501 stmt_vinfo, synth_shift_p);
2502 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2503 accumulator);
2504 break;
2505 default:
2506 gcc_unreachable ();
2507 }
2508 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2509 but rather return it directly. */
2510
2511 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2512 append_pattern_def_seq (stmt_vinfo, stmt);
2513 accumulator = accum_tmp;
2514 }
2515 if (variant == negate_variant)
2516 {
2517 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2518 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2519 accumulator = accum_tmp;
2520 if (cast_to_unsigned_p)
2521 append_pattern_def_seq (stmt_vinfo, stmt);
2522 }
2523 else if (variant == add_variant)
2524 {
2525 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2526 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2527 accumulator = accum_tmp;
2528 if (cast_to_unsigned_p)
2529 append_pattern_def_seq (stmt_vinfo, stmt);
2530 }
2531 /* Move back to a signed if needed. */
2532 if (cast_to_unsigned_p)
2533 {
2534 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2535 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2536 }
2537
2538 return stmt;
2539}
2540
2541/* Detect multiplication by constant and convert it into a sequence of
2542 shifts and additions, subtractions, negations. We reuse the
2543 choose_mult_variant algorithms from expmed.c
47486460
VK
2544
2545 Input/Output:
2546
ba9728b0 2547 STMT_VINFO: The stmt from which the pattern search begins,
53109ba8 2548 i.e. the mult stmt.
47486460
VK
2549
2550 Output:
2551
47486460
VK
2552 * TYPE_OUT: The type of the output of this pattern.
2553
53109ba8
KT
2554 * Return value: A new stmt that will be used to replace
2555 the multiplication. */
47486460 2556
355fe088 2557static gimple *
ba9728b0 2558vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
47486460 2559{
ba9728b0 2560 gimple *last_stmt = stmt_vinfo->stmt;
47486460 2561 tree oprnd0, oprnd1, vectype, itype;
53109ba8 2562 gimple *pattern_stmt;
47486460
VK
2563
2564 if (!is_gimple_assign (last_stmt))
2565 return NULL;
2566
2567 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2568 return NULL;
2569
2570 oprnd0 = gimple_assign_rhs1 (last_stmt);
2571 oprnd1 = gimple_assign_rhs2 (last_stmt);
2572 itype = TREE_TYPE (oprnd0);
2573
2574 if (TREE_CODE (oprnd0) != SSA_NAME
2575 || TREE_CODE (oprnd1) != INTEGER_CST
2576 || !INTEGRAL_TYPE_P (itype)
2be65d9e 2577 || !type_has_mode_precision_p (itype))
47486460
VK
2578 return NULL;
2579
2580 vectype = get_vectype_for_scalar_type (itype);
2581 if (vectype == NULL_TREE)
2582 return NULL;
2583
2584 /* If the target can handle vectorized multiplication natively,
2585 don't attempt to optimize this. */
53109ba8
KT
2586 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2587 if (mul_optab != unknown_optab)
47486460
VK
2588 {
2589 machine_mode vec_mode = TYPE_MODE (vectype);
53109ba8 2590 int icode = (int) optab_handler (mul_optab, vec_mode);
47486460 2591 if (icode != CODE_FOR_nothing)
53109ba8 2592 return NULL;
47486460
VK
2593 }
2594
53109ba8
KT
2595 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2596 if (!pattern_stmt)
47486460
VK
2597 return NULL;
2598
2599 /* Pattern detected. */
49d8df1b 2600 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
47486460 2601
47486460
VK
2602 *type_out = vectype;
2603
2604 return pattern_stmt;
2605}
2606
079c527f 2607/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
2608 otherwise vectorized:
2609
2610 type a_t, b_t;
2611
2612 S1 a_t = b_t / N;
2613
079c527f 2614 where type 'type' is an integral type and N is a constant.
363477c0 2615
079c527f 2616 Similarly handle modulo by a constant:
363477c0
JJ
2617
2618 S4 a_t = b_t % N;
2619
2620 Input/Output:
2621
ba9728b0 2622 * STMT_VINFO: The stmt from which the pattern search begins,
079c527f
JJ
2623 i.e. the division stmt. S1 is replaced by if N is a power
2624 of two constant and type is signed:
363477c0
JJ
2625 S3 y_t = b_t < 0 ? N - 1 : 0;
2626 S2 x_t = b_t + y_t;
2627 S1' a_t = x_t >> log2 (N);
2628
079c527f
JJ
2629 S4 is replaced if N is a power of two constant and
2630 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
2631 S9 y_T = b_t < 0 ? -1U : 0U;
2632 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2633 S7 z_t = (type) z_T;
2634 S6 w_t = b_t + z_t;
2635 S5 x_t = w_t & (N - 1);
2636 S4' a_t = x_t - z_t;
2637
2638 Output:
2639
363477c0
JJ
2640 * TYPE_OUT: The type of the output of this pattern.
2641
2642 * Return value: A new stmt that will be used to replace the division
2643 S1 or modulo S4 stmt. */
2644
355fe088 2645static gimple *
ba9728b0 2646vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
363477c0 2647{
ba9728b0 2648 gimple *last_stmt = stmt_vinfo->stmt;
5deb57cb 2649 tree oprnd0, oprnd1, vectype, itype, cond;
355fe088 2650 gimple *pattern_stmt, *def_stmt;
363477c0 2651 enum tree_code rhs_code;
363477c0 2652 optab optab;
00f07b86 2653 tree q;
079c527f 2654 int dummy_int, prec;
363477c0
JJ
2655
2656 if (!is_gimple_assign (last_stmt))
2657 return NULL;
2658
2659 rhs_code = gimple_assign_rhs_code (last_stmt);
2660 switch (rhs_code)
2661 {
2662 case TRUNC_DIV_EXPR:
eb592645 2663 case EXACT_DIV_EXPR:
363477c0
JJ
2664 case TRUNC_MOD_EXPR:
2665 break;
2666 default:
2667 return NULL;
2668 }
2669
363477c0
JJ
2670 oprnd0 = gimple_assign_rhs1 (last_stmt);
2671 oprnd1 = gimple_assign_rhs2 (last_stmt);
2672 itype = TREE_TYPE (oprnd0);
2673 if (TREE_CODE (oprnd0) != SSA_NAME
2674 || TREE_CODE (oprnd1) != INTEGER_CST
2675 || TREE_CODE (itype) != INTEGER_TYPE
2be65d9e 2676 || !type_has_mode_precision_p (itype))
363477c0
JJ
2677 return NULL;
2678
7a504f33 2679 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
363477c0
JJ
2680 vectype = get_vectype_for_scalar_type (itype);
2681 if (vectype == NULL_TREE)
2682 return NULL;
2683
8f76f377 2684 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
363477c0 2685 {
8f76f377
RS
2686 /* If the target can handle vectorized division or modulo natively,
2687 don't attempt to optimize this, since native division is likely
2688 to give smaller code. */
2689 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2690 if (optab != unknown_optab)
2691 {
2692 machine_mode vec_mode = TYPE_MODE (vectype);
2693 int icode = (int) optab_handler (optab, vec_mode);
2694 if (icode != CODE_FOR_nothing)
2695 return NULL;
2696 }
363477c0
JJ
2697 }
2698
079c527f
JJ
2699 prec = TYPE_PRECISION (itype);
2700 if (integer_pow2p (oprnd1))
363477c0 2701 {
079c527f
JJ
2702 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2703 return NULL;
363477c0 2704
079c527f 2705 /* Pattern detected. */
49d8df1b 2706 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
079c527f
JJ
2707
2708 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2709 build_int_cst (itype, 0));
eb592645
RB
2710 if (rhs_code == TRUNC_DIV_EXPR
2711 || rhs_code == EXACT_DIV_EXPR)
079c527f
JJ
2712 {
2713 tree var = vect_recog_temp_ssa_var (itype, NULL);
2714 tree shift;
2715 def_stmt
0d0e4a03
JJ
2716 = gimple_build_assign (var, COND_EXPR, cond,
2717 fold_build2 (MINUS_EXPR, itype, oprnd1,
2718 build_int_cst (itype, 1)),
2719 build_int_cst (itype, 0));
9c58fb7a 2720 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2721 var = vect_recog_temp_ssa_var (itype, NULL);
2722 def_stmt
0d0e4a03
JJ
2723 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2724 gimple_assign_lhs (def_stmt));
079c527f
JJ
2725 append_pattern_def_seq (stmt_vinfo, def_stmt);
2726
2727 shift = build_int_cst (itype, tree_log2 (oprnd1));
2728 pattern_stmt
0d0e4a03
JJ
2729 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2730 RSHIFT_EXPR, var, shift);
079c527f
JJ
2731 }
2732 else
2733 {
2734 tree signmask;
079c527f
JJ
2735 if (compare_tree_int (oprnd1, 2) == 0)
2736 {
2737 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2738 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2739 build_int_cst (itype, 1),
2740 build_int_cst (itype, 0));
079c527f
JJ
2741 append_pattern_def_seq (stmt_vinfo, def_stmt);
2742 }
2743 else
2744 {
2745 tree utype
2746 = build_nonstandard_integer_type (prec, 1);
2747 tree vecutype = get_vectype_for_scalar_type (utype);
2748 tree shift
7a504f33 2749 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
079c527f
JJ
2750 - tree_log2 (oprnd1));
2751 tree var = vect_recog_temp_ssa_var (utype, NULL);
2752
0d0e4a03
JJ
2753 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2754 build_int_cst (utype, -1),
2755 build_int_cst (utype, 0));
776bfcea 2756 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
079c527f 2757 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
2758 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2759 gimple_assign_lhs (def_stmt),
2760 shift);
776bfcea 2761 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
079c527f
JJ
2762 signmask = vect_recog_temp_ssa_var (itype, NULL);
2763 def_stmt
0d0e4a03 2764 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
2765 append_pattern_def_seq (stmt_vinfo, def_stmt);
2766 }
2767 def_stmt
0d0e4a03
JJ
2768 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2769 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
2770 append_pattern_def_seq (stmt_vinfo, def_stmt);
2771 def_stmt
0d0e4a03
JJ
2772 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2773 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2774 fold_build2 (MINUS_EXPR, itype, oprnd1,
2775 build_int_cst (itype, 1)));
079c527f
JJ
2776 append_pattern_def_seq (stmt_vinfo, def_stmt);
2777
2778 pattern_stmt
0d0e4a03
JJ
2779 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2780 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2781 signmask);
079c527f
JJ
2782 }
2783
079c527f
JJ
2784 *type_out = vectype;
2785 return pattern_stmt;
363477c0 2786 }
079c527f 2787
6b58915b
RS
2788 if (prec > HOST_BITS_PER_WIDE_INT
2789 || integer_zerop (oprnd1))
079c527f
JJ
2790 return NULL;
2791
00f07b86
RH
2792 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2793 return NULL;
079c527f 2794
079c527f 2795 if (TYPE_UNSIGNED (itype))
363477c0 2796 {
079c527f
JJ
2797 unsigned HOST_WIDE_INT mh, ml;
2798 int pre_shift, post_shift;
6b58915b 2799 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
7a504f33 2800 & GET_MODE_MASK (itype_mode));
5deb57cb 2801 tree t1, t2, t3, t4;
079c527f 2802
fecfbfa4 2803 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
079c527f
JJ
2804 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2805 return NULL;
2806
2807 /* Find a suitable multiplier and right shift count
2808 instead of multiplying with D. */
2809 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2810
2811 /* If the suggested multiplier is more than SIZE bits, we can do better
2812 for even divisors, using an initial right shift. */
2813 if (mh != 0 && (d & 1) == 0)
363477c0 2814 {
146ec50f 2815 pre_shift = ctz_or_zero (d);
079c527f
JJ
2816 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2817 &ml, &post_shift, &dummy_int);
2818 gcc_assert (!mh);
2819 }
2820 else
2821 pre_shift = 0;
2822
2823 if (mh != 0)
2824 {
2825 if (post_shift - 1 >= prec)
2826 return NULL;
2827
5deb57cb
JJ
2828 /* t1 = oprnd0 h* ml;
2829 t2 = oprnd0 - t1;
2830 t3 = t2 >> 1;
2831 t4 = t1 + t3;
2832 q = t4 >> (post_shift - 1); */
2833 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2834 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2835 build_int_cst (itype, ml));
079c527f 2836 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2837
5deb57cb 2838 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2839 def_stmt
0d0e4a03 2840 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 2841 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2842
2843 t3 = vect_recog_temp_ssa_var (itype, NULL);
2844 def_stmt
0d0e4a03 2845 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
2846 append_pattern_def_seq (stmt_vinfo, def_stmt);
2847
5deb57cb 2848 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2849 def_stmt
0d0e4a03 2850 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
2851
2852 if (post_shift != 1)
2853 {
2854 append_pattern_def_seq (stmt_vinfo, def_stmt);
2855
5deb57cb 2856 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2857 pattern_stmt
0d0e4a03
JJ
2858 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2859 build_int_cst (itype, post_shift - 1));
079c527f
JJ
2860 }
2861 else
2862 {
5deb57cb 2863 q = t4;
079c527f
JJ
2864 pattern_stmt = def_stmt;
2865 }
363477c0
JJ
2866 }
2867 else
2868 {
079c527f
JJ
2869 if (pre_shift >= prec || post_shift >= prec)
2870 return NULL;
2871
2872 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2873 t2 = t1 h* ml;
2874 q = t2 >> post_shift; */
079c527f
JJ
2875 if (pre_shift)
2876 {
2877 t1 = vect_recog_temp_ssa_var (itype, NULL);
2878 def_stmt
0d0e4a03
JJ
2879 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2880 build_int_cst (NULL, pre_shift));
079c527f
JJ
2881 append_pattern_def_seq (stmt_vinfo, def_stmt);
2882 }
2883 else
2884 t1 = oprnd0;
363477c0 2885
5deb57cb 2886 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2887 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2888 build_int_cst (itype, ml));
079c527f 2889
5deb57cb
JJ
2890 if (post_shift)
2891 {
2892 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2893
5deb57cb
JJ
2894 q = vect_recog_temp_ssa_var (itype, NULL);
2895 def_stmt
0d0e4a03
JJ
2896 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2897 build_int_cst (itype, post_shift));
5deb57cb
JJ
2898 }
2899 else
2900 q = t2;
2901
2902 pattern_stmt = def_stmt;
079c527f
JJ
2903 }
2904 }
2905 else
2906 {
2907 unsigned HOST_WIDE_INT ml;
4ee4c52c 2908 int post_shift;
6b58915b 2909 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2910 unsigned HOST_WIDE_INT abs_d;
2911 bool add = false;
5deb57cb 2912 tree t1, t2, t3, t4;
079c527f
JJ
2913
2914 /* Give up for -1. */
2915 if (d == -1)
2916 return NULL;
2917
079c527f
JJ
2918 /* Since d might be INT_MIN, we have to cast to
2919 unsigned HOST_WIDE_INT before negating to avoid
2920 undefined signed overflow. */
2921 abs_d = (d >= 0
2922 ? (unsigned HOST_WIDE_INT) d
2923 : - (unsigned HOST_WIDE_INT) d);
2924
2925 /* n rem d = n rem -d */
2926 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2927 {
2928 d = abs_d;
2929 oprnd1 = build_int_cst (itype, abs_d);
2930 }
2931 else if (HOST_BITS_PER_WIDE_INT >= prec
fecfbfa4 2932 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
2933 /* This case is not handled correctly below. */
2934 return NULL;
2935
4ee4c52c 2936 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
fecfbfa4 2937 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
2938 {
2939 add = true;
dd4786fe 2940 ml |= HOST_WIDE_INT_M1U << (prec - 1);
079c527f
JJ
2941 }
2942 if (post_shift >= prec)
2943 return NULL;
2944
7abed779 2945 /* t1 = oprnd0 h* ml; */
5deb57cb 2946 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2947 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2948 build_int_cst (itype, ml));
079c527f
JJ
2949
2950 if (add)
2951 {
5deb57cb 2952 /* t2 = t1 + oprnd0; */
7abed779 2953 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2954 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2955 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
2956 }
2957 else
5deb57cb 2958 t2 = t1;
079c527f 2959
5deb57cb 2960 if (post_shift)
079c527f 2961 {
5deb57cb 2962 /* t3 = t2 >> post_shift; */
7abed779 2963 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2964 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2965 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2966 build_int_cst (itype, post_shift));
363477c0 2967 }
079c527f 2968 else
5deb57cb 2969 t3 = t2;
079c527f 2970
807e902e 2971 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
2972 int msb = 1;
2973 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2974 {
807e902e 2975 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 2976 msb = 0;
807e902e 2977 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
2978 msb = -1;
2979 }
079c527f 2980
7abed779
JJ
2981 if (msb == 0 && d >= 0)
2982 {
2983 /* q = t3; */
2984 q = t3;
2985 pattern_stmt = def_stmt;
2986 }
2987 else
2988 {
2989 /* t4 = oprnd0 >> (prec - 1);
2990 or if we know from VRP that oprnd0 >= 0
2991 t4 = 0;
2992 or if we know from VRP that oprnd0 < 0
2993 t4 = -1; */
2994 append_pattern_def_seq (stmt_vinfo, def_stmt);
2995 t4 = vect_recog_temp_ssa_var (itype, NULL);
2996 if (msb != 1)
0d0e4a03
JJ
2997 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2998 build_int_cst (itype, msb));
7abed779 2999 else
0d0e4a03
JJ
3000 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3001 build_int_cst (itype, prec - 1));
7abed779
JJ
3002 append_pattern_def_seq (stmt_vinfo, def_stmt);
3003
3004 /* q = t3 - t4; or q = t4 - t3; */
3005 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3006 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3007 d < 0 ? t3 : t4);
7abed779 3008 }
079c527f
JJ
3009 }
3010
3011 if (rhs_code == TRUNC_MOD_EXPR)
3012 {
3013 tree r, t1;
3014
3015 /* We divided. Now finish by:
3016 t1 = q * oprnd1;
3017 r = oprnd0 - t1; */
3018 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3019
3020 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3021 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 3022 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 3023
079c527f 3024 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3025 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
3026 }
3027
079c527f 3028 /* Pattern detected. */
49d8df1b 3029 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
363477c0 3030
363477c0
JJ
3031 *type_out = vectype;
3032 return pattern_stmt;
3033}
3034
69d2aade
JJ
3035/* Function vect_recog_mixed_size_cond_pattern
3036
3037 Try to find the following pattern:
3038
3039 type x_t, y_t;
3040 TYPE a_T, b_T, c_T;
3041 loop:
3042 S1 a_T = x_t CMP y_t ? b_T : c_T;
3043
3044 where type 'TYPE' is an integral type which has different size
bc4fb355 3045 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 3046 than 'type', the constants need to fit into an integer type
bc4fb355 3047 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
3048
3049 Input:
3050
ba9728b0 3051 * STMT_VINFO: The stmt from which the pattern search begins.
69d2aade
JJ
3052
3053 Output:
3054
69d2aade
JJ
3055 * TYPE_OUT: The type of the output of this pattern.
3056
3057 * Return value: A new stmt that will be used to replace the pattern.
3058 Additionally a def_stmt is added.
3059
3060 a_it = x_t CMP y_t ? b_it : c_it;
3061 a_T = (TYPE) a_it; */
3062
355fe088 3063static gimple *
ba9728b0 3064vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
69d2aade 3065{
ba9728b0 3066 gimple *last_stmt = stmt_vinfo->stmt;
69d2aade 3067 tree cond_expr, then_clause, else_clause;
bc4fb355 3068 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
355fe088 3069 gimple *pattern_stmt, *def_stmt;
bc4fb355 3070 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
355fe088 3071 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
bc4fb355
IR
3072 bool promotion;
3073 tree comp_scalar_type;
69d2aade
JJ
3074
3075 if (!is_gimple_assign (last_stmt)
3076 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3077 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3078 return NULL;
3079
3080 cond_expr = gimple_assign_rhs1 (last_stmt);
3081 then_clause = gimple_assign_rhs2 (last_stmt);
3082 else_clause = gimple_assign_rhs3 (last_stmt);
3083
87aab9b2
JJ
3084 if (!COMPARISON_CLASS_P (cond_expr))
3085 return NULL;
3086
bc4fb355
IR
3087 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3088 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 3089 if (comp_vectype == NULL_TREE)
69d2aade
JJ
3090 return NULL;
3091
3092 type = gimple_expr_type (last_stmt);
bc4fb355
IR
3093 if (types_compatible_p (type, comp_scalar_type)
3094 || ((TREE_CODE (then_clause) != INTEGER_CST
3095 || TREE_CODE (else_clause) != INTEGER_CST)
3096 && !INTEGRAL_TYPE_P (comp_scalar_type))
3097 || !INTEGRAL_TYPE_P (type))
3098 return NULL;
3099
3100 if ((TREE_CODE (then_clause) != INTEGER_CST
86a91c0a
RS
3101 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3102 &def_stmt0, &promotion))
bc4fb355 3103 || (TREE_CODE (else_clause) != INTEGER_CST
86a91c0a
RS
3104 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3105 &def_stmt1, &promotion)))
bc4fb355
IR
3106 return NULL;
3107
3108 if (orig_type0 && orig_type1
3109 && !types_compatible_p (orig_type0, orig_type1))
3110 return NULL;
3111
3112 if (orig_type0)
3113 {
3114 if (!types_compatible_p (orig_type0, comp_scalar_type))
3115 return NULL;
3116 then_clause = gimple_assign_rhs1 (def_stmt0);
3117 itype = orig_type0;
3118 }
3119
3120 if (orig_type1)
3121 {
3122 if (!types_compatible_p (orig_type1, comp_scalar_type))
3123 return NULL;
3124 else_clause = gimple_assign_rhs1 (def_stmt1);
3125 itype = orig_type1;
3126 }
3127
69d2aade 3128
6c825cd4
DS
3129 HOST_WIDE_INT cmp_mode_size
3130 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3131
7a504f33
RS
3132 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3133 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
69d2aade
JJ
3134 return NULL;
3135
3136 vectype = get_vectype_for_scalar_type (type);
3137 if (vectype == NULL_TREE)
3138 return NULL;
3139
96592eed 3140 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3141 return NULL;
3142
bc4fb355 3143 if (itype == NULL_TREE)
6c825cd4 3144 itype = build_nonstandard_integer_type (cmp_mode_size,
bc4fb355
IR
3145 TYPE_UNSIGNED (type));
3146
69d2aade 3147 if (itype == NULL_TREE
b397965c 3148 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
69d2aade
JJ
3149 return NULL;
3150
3151 vecitype = get_vectype_for_scalar_type (itype);
3152 if (vecitype == NULL_TREE)
3153 return NULL;
3154
96592eed 3155 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3156 return NULL;
3157
7a504f33 3158 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
69d2aade 3159 {
bc4fb355
IR
3160 if ((TREE_CODE (then_clause) == INTEGER_CST
3161 && !int_fits_type_p (then_clause, itype))
3162 || (TREE_CODE (else_clause) == INTEGER_CST
3163 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
3164 return NULL;
3165 }
3166
0d0e4a03
JJ
3167 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3168 COND_EXPR, unshare_expr (cond_expr),
3169 fold_convert (itype, then_clause),
3170 fold_convert (itype, else_clause));
3171 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3172 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 3173
776bfcea 3174 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
69d2aade
JJ
3175 *type_out = vectype;
3176
49d8df1b 3177 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
f5709183 3178
69d2aade
JJ
3179 return pattern_stmt;
3180}
3181
3182
71c92d17 3183/* Helper function of vect_recog_bool_pattern. Called recursively, return
42fd8198
IE
3184 true if bool VAR can and should be optimized that way. Assume it shouldn't
3185 in case it's a result of a comparison which can be directly vectorized into
fa2c9034
RB
3186 a vector comparison. Fills in STMTS with all stmts visited during the
3187 walk. */
71c92d17
JJ
3188
3189static bool
fa2c9034 3190check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
71c92d17 3191{
81c40241 3192 tree rhs1;
71c92d17
JJ
3193 enum tree_code rhs_code;
3194
25927307
RS
3195 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3196 if (!def_stmt_info)
71c92d17
JJ
3197 return false;
3198
25927307
RS
3199 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3200 if (!def_stmt)
71c92d17
JJ
3201 return false;
3202
fa2c9034
RB
3203 if (stmts.contains (def_stmt))
3204 return true;
71c92d17
JJ
3205
3206 rhs1 = gimple_assign_rhs1 (def_stmt);
3207 rhs_code = gimple_assign_rhs_code (def_stmt);
3208 switch (rhs_code)
3209 {
3210 case SSA_NAME:
fa2c9034
RB
3211 if (! check_bool_pattern (rhs1, vinfo, stmts))
3212 return false;
3213 break;
71c92d17
JJ
3214
3215 CASE_CONVERT:
2568d8a1 3216 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
71c92d17 3217 return false;
fa2c9034
RB
3218 if (! check_bool_pattern (rhs1, vinfo, stmts))
3219 return false;
3220 break;
71c92d17
JJ
3221
3222 case BIT_NOT_EXPR:
fa2c9034
RB
3223 if (! check_bool_pattern (rhs1, vinfo, stmts))
3224 return false;
3225 break;
71c92d17
JJ
3226
3227 case BIT_AND_EXPR:
3228 case BIT_IOR_EXPR:
3229 case BIT_XOR_EXPR:
fa2c9034
RB
3230 if (! check_bool_pattern (rhs1, vinfo, stmts)
3231 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
71c92d17 3232 return false;
fa2c9034 3233 break;
71c92d17
JJ
3234
3235 default:
3236 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3237 {
fa2c9034 3238 tree vecitype, comp_vectype;
71c92d17 3239
2f326699
JJ
3240 /* If the comparison can throw, then is_gimple_condexpr will be
3241 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
36bbc05d 3242 if (stmt_could_throw_p (cfun, def_stmt))
2f326699
JJ
3243 return false;
3244
71c92d17
JJ
3245 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3246 if (comp_vectype == NULL_TREE)
3247 return false;
3248
fa2c9034 3249 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
42fd8198 3250 if (mask_type
96592eed 3251 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
42fd8198
IE
3252 return false;
3253
71c92d17
JJ
3254 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3255 {
b397965c 3256 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3257 tree itype
ab0ef706 3258 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3259 vecitype = get_vectype_for_scalar_type (itype);
3260 if (vecitype == NULL_TREE)
3261 return false;
3262 }
3263 else
3264 vecitype = comp_vectype;
96592eed 3265 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
fa2c9034 3266 return false;
71c92d17 3267 }
fa2c9034
RB
3268 else
3269 return false;
3270 break;
71c92d17 3271 }
fa2c9034
RB
3272
3273 bool res = stmts.add (def_stmt);
3274 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3275 gcc_assert (!res);
3276
3277 return true;
71c92d17
JJ
3278}
3279
3280
3281/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
fa2c9034
RB
3282 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3283 pattern sequence. */
71c92d17
JJ
3284
3285static tree
fa2c9034 3286adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
71c92d17 3287{
fa2c9034
RB
3288 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3289 NOP_EXPR, var);
776bfcea
RS
3290 append_pattern_def_seq (stmt_info, cast_stmt,
3291 get_vectype_for_scalar_type (type));
71c92d17
JJ
3292 return gimple_assign_lhs (cast_stmt);
3293}
3294
fa2c9034
RB
3295/* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3296 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3297 type, OUT_TYPE is the desired final integer type of the whole pattern.
3298 STMT_INFO is the info of the pattern root and is where pattern stmts should
3299 be associated with. DEFS is a map of pattern defs. */
71c92d17 3300
fa2c9034
RB
3301static void
3302adjust_bool_pattern (tree var, tree out_type,
3303 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
71c92d17 3304{
355fe088 3305 gimple *stmt = SSA_NAME_DEF_STMT (var);
71c92d17
JJ
3306 enum tree_code rhs_code, def_rhs_code;
3307 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3308 location_t loc;
355fe088 3309 gimple *pattern_stmt, *def_stmt;
fa2c9034 3310 tree trueval = NULL_TREE;
71c92d17
JJ
3311
3312 rhs1 = gimple_assign_rhs1 (stmt);
3313 rhs2 = gimple_assign_rhs2 (stmt);
3314 rhs_code = gimple_assign_rhs_code (stmt);
3315 loc = gimple_location (stmt);
3316 switch (rhs_code)
3317 {
3318 case SSA_NAME:
3319 CASE_CONVERT:
fa2c9034 3320 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3321 itype = TREE_TYPE (irhs1);
3322 pattern_stmt
0d0e4a03
JJ
3323 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3324 SSA_NAME, irhs1);
71c92d17
JJ
3325 break;
3326
3327 case BIT_NOT_EXPR:
fa2c9034 3328 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3329 itype = TREE_TYPE (irhs1);
3330 pattern_stmt
0d0e4a03
JJ
3331 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3332 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
3333 break;
3334
3335 case BIT_AND_EXPR:
3336 /* Try to optimize x = y & (a < b ? 1 : 0); into
3337 x = (a < b ? y : 0);
3338
3339 E.g. for:
3340 bool a_b, b_b, c_b;
3341 TYPE d_T;
3342
3343 S1 a_b = x1 CMP1 y1;
3344 S2 b_b = x2 CMP2 y2;
3345 S3 c_b = a_b & b_b;
3346 S4 d_T = (TYPE) c_b;
3347
3348 we would normally emit:
3349
3350 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3351 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3352 S3' c_T = a_T & b_T;
3353 S4' d_T = c_T;
3354
3355 but we can save one stmt by using the
3356 result of one of the COND_EXPRs in the other COND_EXPR and leave
3357 BIT_AND_EXPR stmt out:
3358
3359 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3360 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3361 S4' f_T = c_T;
3362
3363 At least when VEC_COND_EXPR is implemented using masks
3364 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3365 computes the comparison masks and ands it, in one case with
3366 all ones vector, in the other case with a vector register.
3367 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3368 often more expensive. */
3369 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3370 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3371 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3372 {
fa2c9034 3373 irhs1 = *defs.get (rhs1);
71c92d17 3374 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17 3375 if (TYPE_PRECISION (TREE_TYPE (irhs1))
b397965c 3376 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
71c92d17 3377 {
fa2c9034
RB
3378 rhs_code = def_rhs_code;
3379 rhs1 = def_rhs1;
3380 rhs2 = gimple_assign_rhs2 (def_stmt);
3381 trueval = irhs1;
3382 goto do_compare;
71c92d17
JJ
3383 }
3384 else
fa2c9034 3385 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3386 goto and_ior_xor;
3387 }
3388 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3389 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3390 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3391 {
fa2c9034 3392 irhs2 = *defs.get (rhs2);
71c92d17 3393 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17 3394 if (TYPE_PRECISION (TREE_TYPE (irhs2))
b397965c 3395 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
71c92d17 3396 {
fa2c9034
RB
3397 rhs_code = def_rhs_code;
3398 rhs1 = def_rhs1;
3399 rhs2 = gimple_assign_rhs2 (def_stmt);
3400 trueval = irhs2;
3401 goto do_compare;
71c92d17
JJ
3402 }
3403 else
fa2c9034 3404 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3405 goto and_ior_xor;
3406 }
3407 /* FALLTHRU */
3408 case BIT_IOR_EXPR:
3409 case BIT_XOR_EXPR:
fa2c9034
RB
3410 irhs1 = *defs.get (rhs1);
3411 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3412 and_ior_xor:
3413 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3414 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3415 {
3416 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3417 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3418 int out_prec = TYPE_PRECISION (out_type);
3419 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
fa2c9034
RB
3420 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3421 stmt_info);
71c92d17 3422 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
fa2c9034
RB
3423 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3424 stmt_info);
71c92d17
JJ
3425 else
3426 {
fa2c9034
RB
3427 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3428 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
71c92d17
JJ
3429 }
3430 }
3431 itype = TREE_TYPE (irhs1);
3432 pattern_stmt
0d0e4a03
JJ
3433 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3434 rhs_code, irhs1, irhs2);
71c92d17
JJ
3435 break;
3436
3437 default:
fa2c9034 3438 do_compare:
71c92d17
JJ
3439 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3440 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2 3441 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
73a699ae
RS
3442 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3443 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3444 {
b397965c 3445 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3446 itype
ab0ef706 3447 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3448 }
3449 else
3450 itype = TREE_TYPE (rhs1);
3451 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3452 if (trueval == NULL_TREE)
3453 trueval = build_int_cst (itype, 1);
3454 else
3455 gcc_checking_assert (useless_type_conversion_p (itype,
3456 TREE_TYPE (trueval)));
3457 pattern_stmt
0d0e4a03
JJ
3458 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3459 COND_EXPR, cond_expr, trueval,
3460 build_int_cst (itype, 0));
71c92d17
JJ
3461 break;
3462 }
3463
71c92d17 3464 gimple_set_location (pattern_stmt, loc);
776bfcea
RS
3465 append_pattern_def_seq (stmt_info, pattern_stmt,
3466 get_vectype_for_scalar_type (itype));
fa2c9034
RB
3467 defs.put (var, gimple_assign_lhs (pattern_stmt));
3468}
3469
3470/* Comparison function to qsort a vector of gimple stmts after UID. */
3471
3472static int
3473sort_after_uid (const void *p1, const void *p2)
3474{
3475 const gimple *stmt1 = *(const gimple * const *)p1;
3476 const gimple *stmt2 = *(const gimple * const *)p2;
3477 return gimple_uid (stmt1) - gimple_uid (stmt2);
71c92d17
JJ
3478}
3479
fa2c9034 3480/* Create pattern stmts for all stmts participating in the bool pattern
32e8e429 3481 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
fa2c9034
RB
3482 OUT_TYPE. Return the def of the pattern root. */
3483
3484static tree
3485adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
32e8e429 3486 tree out_type, stmt_vec_info stmt_info)
fa2c9034
RB
3487{
3488 /* Gather original stmts in the bool pattern in their order of appearance
3489 in the IL. */
3490 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3491 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3492 i != bool_stmt_set.end (); ++i)
3493 bool_stmts.quick_push (*i);
3494 bool_stmts.qsort (sort_after_uid);
3495
3496 /* Now process them in that order, producing pattern stmts. */
3497 hash_map <tree, tree> defs;
3498 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3499 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
91987857 3500 out_type, stmt_info, defs);
fa2c9034
RB
3501
3502 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3503 gimple *pattern_stmt
91987857 3504 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
fa2c9034
RB
3505 return gimple_assign_lhs (pattern_stmt);
3506}
71c92d17 3507
5116b156 3508/* Helper for search_type_for_mask. */
42fd8198
IE
3509
3510static tree
5116b156
JJ
3511search_type_for_mask_1 (tree var, vec_info *vinfo,
3512 hash_map<gimple *, tree> &cache)
42fd8198 3513{
42fd8198
IE
3514 tree rhs1;
3515 enum tree_code rhs_code;
e6f5c25d 3516 tree res = NULL_TREE, res2;
42fd8198 3517
2568d8a1 3518 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
42fd8198
IE
3519 return NULL_TREE;
3520
25927307
RS
3521 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3522 if (!def_stmt_info)
42fd8198
IE
3523 return NULL_TREE;
3524
25927307
RS
3525 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3526 if (!def_stmt)
42fd8198
IE
3527 return NULL_TREE;
3528
5116b156
JJ
3529 tree *c = cache.get (def_stmt);
3530 if (c)
3531 return *c;
3532
42fd8198
IE
3533 rhs_code = gimple_assign_rhs_code (def_stmt);
3534 rhs1 = gimple_assign_rhs1 (def_stmt);
3535
3536 switch (rhs_code)
3537 {
3538 case SSA_NAME:
3539 case BIT_NOT_EXPR:
3540 CASE_CONVERT:
5116b156 3541 res = search_type_for_mask_1 (rhs1, vinfo, cache);
42fd8198
IE
3542 break;
3543
3544 case BIT_AND_EXPR:
3545 case BIT_IOR_EXPR:
3546 case BIT_XOR_EXPR:
5116b156
JJ
3547 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3548 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3549 cache);
e6f5c25d
IE
3550 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3551 res = res2;
42fd8198
IE
3552 break;
3553
3554 default:
3555 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3556 {
e6f5c25d
IE
3557 tree comp_vectype, mask_type;
3558
2568d8a1 3559 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
af3cdd34 3560 {
5116b156
JJ
3561 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3562 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3563 vinfo, cache);
af3cdd34
IE
3564 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3565 res = res2;
3566 break;
3567 }
3568
e6f5c25d
IE
3569 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3570 if (comp_vectype == NULL_TREE)
5116b156
JJ
3571 {
3572 res = NULL_TREE;
3573 break;
3574 }
e6f5c25d
IE
3575
3576 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3577 if (!mask_type
96592eed 3578 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5116b156
JJ
3579 {
3580 res = NULL_TREE;
3581 break;
3582 }
e6f5c25d 3583
42fd8198
IE
3584 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3585 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3586 {
b397965c 3587 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
42fd8198
IE
3588 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3589 }
3590 else
3591 res = TREE_TYPE (rhs1);
3592 }
3593 }
3594
5116b156 3595 cache.put (def_stmt, res);
42fd8198
IE
3596 return res;
3597}
3598
5116b156
JJ
3599/* Return the proper type for converting bool VAR into
3600 an integer value or NULL_TREE if no such type exists.
3601 The type is chosen so that converted value has the
3602 same number of elements as VAR's vector type. */
3603
3604static tree
3605search_type_for_mask (tree var, vec_info *vinfo)
3606{
3607 hash_map<gimple *, tree> cache;
3608 return search_type_for_mask_1 (var, vinfo, cache);
3609}
42fd8198 3610
71c92d17
JJ
3611/* Function vect_recog_bool_pattern
3612
3613 Try to find pattern like following:
3614
3615 bool a_b, b_b, c_b, d_b, e_b;
3616 TYPE f_T;
3617 loop:
3618 S1 a_b = x1 CMP1 y1;
3619 S2 b_b = x2 CMP2 y2;
3620 S3 c_b = a_b & b_b;
3621 S4 d_b = x3 CMP3 y3;
3622 S5 e_b = c_b | d_b;
3623 S6 f_T = (TYPE) e_b;
3624
52264dbf
RB
3625 where type 'TYPE' is an integral type. Or a similar pattern
3626 ending in
3627
3628 S6 f_Y = e_b ? r_Y : s_Y;
3629
3630 as results from if-conversion of a complex condition.
71c92d17
JJ
3631
3632 Input:
3633
ba9728b0
RS
3634 * STMT_VINFO: The stmt at the end from which the pattern
3635 search begins, i.e. cast of a bool to
3636 an integer type.
71c92d17
JJ
3637
3638 Output:
3639
71c92d17
JJ
3640 * TYPE_OUT: The type of the output of this pattern.
3641
3642 * Return value: A new stmt that will be used to replace the pattern.
3643
3644 Assuming size of TYPE is the same as size of all comparisons
3645 (otherwise some casts would be added where needed), the above
3646 sequence we create related pattern stmts:
3647 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3648 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3649 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3650 S5' e_T = c_T | d_T;
3651 S6' f_T = e_T;
3652
3653 Instead of the above S3' we could emit:
3654 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3655 S3' c_T = a_T | b_T;
3656 but the above is more efficient. */
3657
355fe088 3658static gimple *
ba9728b0 3659vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
71c92d17 3660{
ba9728b0 3661 gimple *last_stmt = stmt_vinfo->stmt;
71c92d17
JJ
3662 enum tree_code rhs_code;
3663 tree var, lhs, rhs, vectype;
310213d4 3664 vec_info *vinfo = stmt_vinfo->vinfo;
355fe088 3665 gimple *pattern_stmt;
71c92d17
JJ
3666
3667 if (!is_gimple_assign (last_stmt))
3668 return NULL;
3669
3670 var = gimple_assign_rhs1 (last_stmt);
3671 lhs = gimple_assign_lhs (last_stmt);
3672
2568d8a1 3673 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
71c92d17
JJ
3674 return NULL;
3675
fa2c9034
RB
3676 hash_set<gimple *> bool_stmts;
3677
71c92d17
JJ
3678 rhs_code = gimple_assign_rhs_code (last_stmt);
3679 if (CONVERT_EXPR_CODE_P (rhs_code))
3680 {
d362ac6c 3681 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
78048b1c 3682 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
3683 return NULL;
3684 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3685 if (vectype == NULL_TREE)
3686 return NULL;
3687
fa2c9034 3688 if (check_bool_pattern (var, vinfo, bool_stmts))
42fd8198 3689 {
86a91c0a 3690 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
42fd8198
IE
3691 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3692 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3693 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3694 else
3695 pattern_stmt
3696 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3697 }
71c92d17 3698 else
42fd8198
IE
3699 {
3700 tree type = search_type_for_mask (var, vinfo);
a414c77f 3701 tree cst0, cst1, tmp;
42fd8198
IE
3702
3703 if (!type)
3704 return NULL;
3705
3706 /* We may directly use cond with narrowed type to avoid
3707 multiple cond exprs with following result packing and
3708 perform single cond with packed mask instead. In case
3709 of widening we better make cond first and then extract
3710 results. */
3711 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3712 type = TREE_TYPE (lhs);
3713
3714 cst0 = build_int_cst (type, 0);
3715 cst1 = build_int_cst (type, 1);
3716 tmp = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3717 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3718
3719 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3720 {
3721 tree new_vectype = get_vectype_for_scalar_type (type);
776bfcea 3722 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
42fd8198
IE
3723
3724 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3725 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3726 }
3727 }
3728
71c92d17 3729 *type_out = vectype;
49d8df1b 3730 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
f5709183 3731
52264dbf
RB
3732 return pattern_stmt;
3733 }
3734 else if (rhs_code == COND_EXPR
3735 && TREE_CODE (var) == SSA_NAME)
3736 {
3737 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3738 if (vectype == NULL_TREE)
3739 return NULL;
3740
3741 /* Build a scalar type for the boolean result that when
3742 vectorized matches the vector type of the result in
3743 size and number of elements. */
3744 unsigned prec
928686b1
RS
3745 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3746 TYPE_VECTOR_SUBPARTS (vectype));
3747
52264dbf
RB
3748 tree type
3749 = build_nonstandard_integer_type (prec,
3750 TYPE_UNSIGNED (TREE_TYPE (var)));
3751 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3752 return NULL;
3753
fa2c9034 3754 if (!check_bool_pattern (var, vinfo, bool_stmts))
a414c77f
IE
3755 return NULL;
3756
86a91c0a 3757 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
52264dbf 3758
52264dbf
RB
3759 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3760 pattern_stmt
a414c77f
IE
3761 = gimple_build_assign (lhs, COND_EXPR,
3762 build2 (NE_EXPR, boolean_type_node,
3763 rhs, build_int_cst (type, 0)),
0d0e4a03
JJ
3764 gimple_assign_rhs2 (last_stmt),
3765 gimple_assign_rhs3 (last_stmt));
52264dbf 3766 *type_out = vectype;
49d8df1b 3767 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
52264dbf 3768
71c92d17
JJ
3769 return pattern_stmt;
3770 }
ab0ef706
JJ
3771 else if (rhs_code == SSA_NAME
3772 && STMT_VINFO_DATA_REF (stmt_vinfo))
3773 {
3774 stmt_vec_info pattern_stmt_info;
3775 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3776 gcc_assert (vectype != NULL_TREE);
78336739
JJ
3777 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3778 return NULL;
ab0ef706 3779
fa2c9034 3780 if (check_bool_pattern (var, vinfo, bool_stmts))
86a91c0a 3781 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
42fd8198
IE
3782 else
3783 {
3784 tree type = search_type_for_mask (var, vinfo);
a414c77f 3785 tree cst0, cst1, new_vectype;
42fd8198
IE
3786
3787 if (!type)
3788 return NULL;
3789
3790 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3791 type = TREE_TYPE (vectype);
3792
3793 cst0 = build_int_cst (type, 0);
3794 cst1 = build_int_cst (type, 1);
3795 new_vectype = get_vectype_for_scalar_type (type);
3796
3797 rhs = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3798 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
776bfcea 3799 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
42fd8198
IE
3800 }
3801
ab0ef706
JJ
3802 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3803 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3804 {
3805 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
355fe088 3806 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
42fd8198 3807 append_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
3808 rhs = rhs2;
3809 }
0d0e4a03 3810 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4fbeb363 3811 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
f44fb7aa 3812 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
ab0ef706 3813 *type_out = vectype;
49d8df1b
RS
3814 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3815
ab0ef706
JJ
3816 return pattern_stmt;
3817 }
71c92d17
JJ
3818 else
3819 return NULL;
3820}
3821
3822
e6f5c25d
IE
3823/* A helper for vect_recog_mask_conversion_pattern. Build
3824 conversion of MASK to a type suitable for masking VECTYPE.
3825 Built statement gets required vectype and is appended to
3826 a pattern sequence of STMT_VINFO.
3827
3828 Return converted mask. */
3829
3830static tree
776bfcea 3831build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
e6f5c25d
IE
3832{
3833 gimple *stmt;
3834 tree masktype, tmp;
e6f5c25d
IE
3835
3836 masktype = build_same_sized_truth_vector_type (vectype);
3837 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3838 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
776bfcea 3839 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
e6f5c25d
IE
3840
3841 return tmp;
3842}
3843
3844
3845/* Function vect_recog_mask_conversion_pattern
3846
3847 Try to find statements which require boolean type
3848 converison. Additional conversion statements are
3849 added to handle such cases. For example:
3850
3851 bool m_1, m_2, m_3;
3852 int i_4, i_5;
3853 double d_6, d_7;
3854 char c_1, c_2, c_3;
3855
3856 S1 m_1 = i_4 > i_5;
3857 S2 m_2 = d_6 < d_7;
3858 S3 m_3 = m_1 & m_2;
3859 S4 c_1 = m_3 ? c_2 : c_3;
3860
3861 Will be transformed into:
3862
3863 S1 m_1 = i_4 > i_5;
3864 S2 m_2 = d_6 < d_7;
3865 S3'' m_2' = (_Bool[bitsize=32])m_2
3866 S3' m_3' = m_1 & m_2';
3867 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3868 S4' c_1' = m_3'' ? c_2 : c_3; */
3869
3870static gimple *
ba9728b0 3871vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
e6f5c25d 3872{
ba9728b0 3873 gimple *last_stmt = stmt_vinfo->stmt;
e6f5c25d 3874 enum tree_code rhs_code;
310aba3b
ML
3875 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3876 tree vectype1, vectype2;
e6f5c25d
IE
3877 stmt_vec_info pattern_stmt_info;
3878 vec_info *vinfo = stmt_vinfo->vinfo;
e6f5c25d
IE
3879
3880 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3881 if (is_gimple_call (last_stmt)
2c58d42c 3882 && gimple_call_internal_p (last_stmt))
e6f5c25d 3883 {
a844293d 3884 gcall *pattern_stmt;
e6f5c25d 3885
2c58d42c
RS
3886 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3887 int mask_argno = internal_fn_mask_index (ifn);
3888 if (mask_argno < 0)
3889 return NULL;
3890
3891 bool store_p = internal_store_fn_p (ifn);
3892 if (store_p)
e6f5c25d 3893 {
2c58d42c
RS
3894 int rhs_index = internal_fn_stored_value_index (ifn);
3895 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3896 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
e6f5c25d
IE
3897 }
3898 else
3899 {
2c58d42c
RS
3900 lhs = gimple_call_lhs (last_stmt);
3901 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
e6f5c25d
IE
3902 }
3903
2c58d42c
RS
3904 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3905 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3906 if (!mask_arg_type)
e6f5c25d 3907 return NULL;
2c58d42c 3908 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
e6f5c25d
IE
3909
3910 if (!vectype1 || !vectype2
928686b1
RS
3911 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3912 TYPE_VECTOR_SUBPARTS (vectype2)))
e6f5c25d
IE
3913 return NULL;
3914
2c58d42c
RS
3915 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3916
3917 auto_vec<tree, 8> args;
3918 unsigned int nargs = gimple_call_num_args (last_stmt);
3919 args.safe_grow (nargs);
3920 for (unsigned int i = 0; i < nargs; ++i)
3921 args[i] = ((int) i == mask_argno
3922 ? tmp
3923 : gimple_call_arg (last_stmt, i));
3924 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
e6f5c25d 3925
2c58d42c 3926 if (!store_p)
e6f5c25d
IE
3927 {
3928 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
e6f5c25d
IE
3929 gimple_call_set_lhs (pattern_stmt, lhs);
3930 }
a844293d 3931 gimple_call_set_nothrow (pattern_stmt, true);
e6f5c25d 3932
4fbeb363 3933 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
2c58d42c 3934 if (STMT_VINFO_DATA_REF (stmt_vinfo))
f44fb7aa 3935 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
e6f5c25d
IE
3936
3937 *type_out = vectype1;
49d8df1b 3938 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
3939
3940 return pattern_stmt;
3941 }
3942
3943 if (!is_gimple_assign (last_stmt))
3944 return NULL;
3945
a844293d 3946 gimple *pattern_stmt;
e6f5c25d
IE
3947 lhs = gimple_assign_lhs (last_stmt);
3948 rhs1 = gimple_assign_rhs1 (last_stmt);
3949 rhs_code = gimple_assign_rhs_code (last_stmt);
3950
3951 /* Check for cond expression requiring mask conversion. */
3952 if (rhs_code == COND_EXPR)
3953 {
e6f5c25d
IE
3954 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3955
3956 if (TREE_CODE (rhs1) == SSA_NAME)
3957 {
3958 rhs1_type = search_type_for_mask (rhs1, vinfo);
3959 if (!rhs1_type)
3960 return NULL;
3961 }
dea60b59 3962 else if (COMPARISON_CLASS_P (rhs1))
6a3c127c
RS
3963 {
3964 /* Check whether we're comparing scalar booleans and (if so)
3965 whether a better mask type exists than the mask associated
3966 with boolean-sized elements. This avoids unnecessary packs
3967 and unpacks if the booleans are set from comparisons of
3968 wider types. E.g. in:
3969
3970 int x1, x2, x3, x4, y1, y1;
3971 ...
3972 bool b1 = (x1 == x2);
3973 bool b2 = (x3 == x4);
3974 ... = b1 == b2 ? y1 : y2;
3975
3976 it is better for b1 and b2 to use the mask type associated
3977 with int elements rather bool (byte) elements. */
3978 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
3979 if (!rhs1_type)
3980 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3981 }
dea60b59
JJ
3982 else
3983 return NULL;
e6f5c25d
IE
3984
3985 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3986
6a3c127c
RS
3987 if (!vectype1 || !vectype2)
3988 return NULL;
3989
3990 /* Continue if a conversion is needed. Also continue if we have
3991 a comparison whose vector type would normally be different from
3992 VECTYPE2 when considered in isolation. In that case we'll
3993 replace the comparison with an SSA name (so that we can record
3994 its vector type) and behave as though the comparison was an SSA
3995 name from the outset. */
3996 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3997 TYPE_VECTOR_SUBPARTS (vectype2))
3998 && (TREE_CODE (rhs1) == SSA_NAME
3999 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
e6f5c25d
IE
4000 return NULL;
4001
8da4c8d8
RB
4002 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4003 in place, we can handle it in vectorizable_condition. This avoids
4004 unnecessary promotion stmts and increased vectorization factor. */
4005 if (COMPARISON_CLASS_P (rhs1)
4006 && INTEGRAL_TYPE_P (rhs1_type)
928686b1
RS
4007 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4008 TYPE_VECTOR_SUBPARTS (vectype2)))
8da4c8d8 4009 {
8da4c8d8 4010 enum vect_def_type dt;
894dd753 4011 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
8da4c8d8 4012 && dt == vect_external_def
894dd753 4013 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
8da4c8d8
RB
4014 && (dt == vect_external_def
4015 || dt == vect_constant_def))
4016 {
4017 tree wide_scalar_type = build_nonstandard_integer_type
4018 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4019 TYPE_UNSIGNED (rhs1_type));
4020 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4021 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4022 return NULL;
4023 }
4024 }
4025
e6f5c25d
IE
4026 /* If rhs1 is a comparison we need to move it into a
4027 separate statement. */
4028 if (TREE_CODE (rhs1) != SSA_NAME)
4029 {
4030 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4031 pattern_stmt = gimple_build_assign (tmp, rhs1);
4032 rhs1 = tmp;
776bfcea 4033 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
e6f5c25d
IE
4034 }
4035
6a3c127c
RS
4036 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4037 TYPE_VECTOR_SUBPARTS (vectype2)))
776bfcea 4038 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
6a3c127c
RS
4039 else
4040 tmp = rhs1;
e6f5c25d
IE
4041
4042 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4043 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4044 gimple_assign_rhs2 (last_stmt),
4045 gimple_assign_rhs3 (last_stmt));
4046
4047 *type_out = vectype1;
49d8df1b 4048 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
4049
4050 return pattern_stmt;
4051 }
4052
4053 /* Now check for binary boolean operations requiring conversion for
4054 one of operands. */
2568d8a1 4055 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
e6f5c25d
IE
4056 return NULL;
4057
4058 if (rhs_code != BIT_IOR_EXPR
4059 && rhs_code != BIT_XOR_EXPR
49e76ff1
IE
4060 && rhs_code != BIT_AND_EXPR
4061 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
e6f5c25d
IE
4062 return NULL;
4063
4064 rhs2 = gimple_assign_rhs2 (last_stmt);
4065
4066 rhs1_type = search_type_for_mask (rhs1, vinfo);
4067 rhs2_type = search_type_for_mask (rhs2, vinfo);
4068
4069 if (!rhs1_type || !rhs2_type
4070 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4071 return NULL;
4072
4073 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4074 {
4075 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4076 if (!vectype1)
4077 return NULL;
776bfcea 4078 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
e6f5c25d
IE
4079 }
4080 else
4081 {
4082 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4083 if (!vectype1)
4084 return NULL;
776bfcea 4085 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
e6f5c25d
IE
4086 }
4087
4088 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4089 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4090
4091 *type_out = vectype1;
49d8df1b 4092 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
4093
4094 return pattern_stmt;
4095}
4096
32e8e429 4097/* STMT_INFO is a load or store. If the load or store is conditional, return
bfaa08b7
RS
4098 the boolean condition under which it occurs, otherwise return null. */
4099
4100static tree
32e8e429 4101vect_get_load_store_mask (stmt_vec_info stmt_info)
bfaa08b7 4102{
32e8e429 4103 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
bfaa08b7
RS
4104 {
4105 gcc_assert (gimple_assign_single_p (def_assign));
4106 return NULL_TREE;
4107 }
4108
32e8e429 4109 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
bfaa08b7
RS
4110 {
4111 internal_fn ifn = gimple_call_internal_fn (def_call);
4112 int mask_index = internal_fn_mask_index (ifn);
4113 return gimple_call_arg (def_call, mask_index);
4114 }
4115
4116 gcc_unreachable ();
4117}
4118
4119/* Return the scalar offset type that an internal gather/scatter function
4120 should use. GS_INFO describes the gather/scatter operation. */
4121
4122static tree
4123vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4124{
4125 tree offset_type = TREE_TYPE (gs_info->offset);
4126 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4127
4128 /* Enforced by vect_check_gather_scatter. */
4129 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4130 gcc_assert (element_bits >= offset_bits);
4131
4132 /* If the offset is narrower than the elements, extend it according
4133 to its sign. */
4134 if (element_bits > offset_bits)
4135 return build_nonstandard_integer_type (element_bits,
4136 TYPE_UNSIGNED (offset_type));
4137
4138 return offset_type;
4139}
4140
4141/* Return MASK if MASK is suitable for masking an operation on vectors
4142 of type VECTYPE, otherwise convert it into such a form and return
4143 the result. Associate any conversion statements with STMT_INFO's
4144 pattern. */
4145
4146static tree
4147vect_convert_mask_for_vectype (tree mask, tree vectype,
4148 stmt_vec_info stmt_info, vec_info *vinfo)
4149{
4150 tree mask_type = search_type_for_mask (mask, vinfo);
4151 if (mask_type)
4152 {
4153 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4154 if (mask_vectype
4155 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4156 TYPE_VECTOR_SUBPARTS (mask_vectype)))
776bfcea 4157 mask = build_mask_conversion (mask, vectype, stmt_info);
bfaa08b7
RS
4158 }
4159 return mask;
4160}
4161
4162/* Return the equivalent of:
4163
4164 fold_convert (TYPE, VALUE)
4165
4166 with the expectation that the operation will be vectorized.
4167 If new statements are needed, add them as pattern statements
4168 to STMT_INFO. */
4169
4170static tree
776bfcea 4171vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
bfaa08b7
RS
4172{
4173 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4174 return value;
4175
4176 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4177 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
776bfcea
RS
4178 append_pattern_def_seq (stmt_info, conversion,
4179 get_vectype_for_scalar_type (type));
bfaa08b7
RS
4180 return new_value;
4181}
4182
ba9728b0 4183/* Try to convert STMT_INFO into a call to a gather load or scatter store
bfaa08b7 4184 internal function. Return the final statement on success and set
1cbfeccc 4185 *TYPE_OUT to the vector type being loaded or stored.
bfaa08b7
RS
4186
4187 This function only handles gathers and scatters that were recognized
4188 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4189
4190static gimple *
ba9728b0 4191vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
bfaa08b7
RS
4192{
4193 /* Currently we only support this for loop vectorization. */
bfaa08b7
RS
4194 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4195 if (!loop_vinfo)
4196 return NULL;
4197
4198 /* Make sure that we're looking at a gather load or scatter store. */
4199 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4200 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4201 return NULL;
4202
bfaa08b7
RS
4203 /* Get the boolean that controls whether the load or store happens.
4204 This is null if the operation is unconditional. */
86a91c0a 4205 tree mask = vect_get_load_store_mask (stmt_info);
bfaa08b7
RS
4206
4207 /* Make sure that the target supports an appropriate internal
4208 function for the gather/scatter operation. */
4209 gather_scatter_info gs_info;
86a91c0a 4210 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
bfaa08b7
RS
4211 || gs_info.decl)
4212 return NULL;
4213
4214 /* Convert the mask to the right form. */
4215 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4216 if (mask)
ba9728b0 4217 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
bfaa08b7
RS
4218 loop_vinfo);
4219
4220 /* Get the invariant base and non-invariant offset, converting the
4221 latter to the same width as the vector elements. */
4222 tree base = gs_info.base;
4223 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
776bfcea
RS
4224 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4225 stmt_info);
bfaa08b7
RS
4226
4227 /* Build the new pattern statement. */
4228 tree scale = size_int (gs_info.scale);
4229 gcall *pattern_stmt;
4230 if (DR_IS_READ (dr))
4231 {
4232 if (mask != NULL)
4233 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4234 offset, scale, mask);
4235 else
4236 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4237 offset, scale);
4238 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4239 gimple_call_set_lhs (pattern_stmt, load_lhs);
4240 }
4241 else
f307441a 4242 {
86a91c0a 4243 tree rhs = vect_get_store_rhs (stmt_info);
f307441a
RS
4244 if (mask != NULL)
4245 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4246 base, offset, scale, rhs,
4247 mask);
4248 else
4249 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4250 base, offset, scale, rhs);
4251 }
bfaa08b7
RS
4252 gimple_call_set_nothrow (pattern_stmt, true);
4253
4254 /* Copy across relevant vectorization info and associate DR with the
4255 new pattern statement instead of the original statement. */
4fbeb363 4256 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
f44fb7aa 4257 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
bfaa08b7
RS
4258
4259 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4260 *type_out = vectype;
86a91c0a 4261 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
bfaa08b7
RS
4262
4263 return pattern_stmt;
4264}
4265
370c2ebe
RS
4266/* Return true if TYPE is a non-boolean integer type. These are the types
4267 that we want to consider for narrowing. */
4268
4269static bool
4270vect_narrowable_type_p (tree type)
4271{
4272 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4273}
4274
4275/* Return true if the operation given by CODE can be truncated to N bits
4276 when only N bits of the output are needed. This is only true if bit N+1
4277 of the inputs has no effect on the low N bits of the result. */
4278
4279static bool
4280vect_truncatable_operation_p (tree_code code)
4281{
4282 switch (code)
4283 {
4284 case PLUS_EXPR:
4285 case MINUS_EXPR:
4286 case MULT_EXPR:
4287 case BIT_AND_EXPR:
4288 case BIT_IOR_EXPR:
4289 case BIT_XOR_EXPR:
4290 case COND_EXPR:
4291 return true;
4292
4293 default:
4294 return false;
4295 }
4296}
4297
4298/* Record that STMT_INFO could be changed from operating on TYPE to
4299 operating on a type with the precision and sign given by PRECISION
4300 and SIGN respectively. PRECISION is an arbitrary bit precision;
4301 it might not be a whole number of bytes. */
4302
4303static void
4304vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4305 unsigned int precision, signop sign)
4306{
4307 /* Round the precision up to a whole number of bytes. */
4308 precision = vect_element_precision (precision);
4309 if (precision < TYPE_PRECISION (type)
4310 && (!stmt_info->operation_precision
4311 || stmt_info->operation_precision > precision))
4312 {
4313 stmt_info->operation_precision = precision;
4314 stmt_info->operation_sign = sign;
4315 }
4316}
4317
4318/* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4319 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4320 is an arbitrary bit precision; it might not be a whole number of bytes. */
4321
4322static void
4323vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4324 unsigned int min_input_precision)
4325{
4326 /* This operation in isolation only requires the inputs to have
4327 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4328 that MIN_INPUT_PRECISION is a natural precision for the chain
4329 as a whole. E.g. consider something like:
4330
4331 unsigned short *x, *y;
4332 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4333
4334 The right shift can be done on unsigned chars, and only requires the
4335 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4336 approach would mean turning a natural chain of single-vector unsigned
4337 short operations into one that truncates "*x" and then extends
4338 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4339 operation and one vector for each unsigned char operation.
4340 This would be a significant pessimization.
4341
4342 Instead only propagate the maximum of this precision and the precision
4343 required by the users of the result. This means that we don't pessimize
4344 the case above but continue to optimize things like:
4345
4346 unsigned char *y;
4347 unsigned short *x;
4348 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4349
4350 Here we would truncate two vectors of *x to a single vector of
4351 unsigned chars and use single-vector unsigned char operations for
4352 everything else, rather than doing two unsigned short copies of
4353 "(*x & 0xf0) >> 4" and then truncating the result. */
4354 min_input_precision = MAX (min_input_precision,
4355 stmt_info->min_output_precision);
4356
4357 if (min_input_precision < TYPE_PRECISION (type)
4358 && (!stmt_info->min_input_precision
4359 || stmt_info->min_input_precision > min_input_precision))
4360 stmt_info->min_input_precision = min_input_precision;
4361}
4362
4363/* Subroutine of vect_determine_min_output_precision. Return true if
4364 we can calculate a reduced number of output bits for STMT_INFO,
4365 whose result is LHS. */
4366
4367static bool
4368vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4369{
4370 /* Take the maximum precision required by users of the result. */
6585ff8f 4371 vec_info *vinfo = stmt_info->vinfo;
370c2ebe
RS
4372 unsigned int precision = 0;
4373 imm_use_iterator iter;
4374 use_operand_p use;
4375 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4376 {
4377 gimple *use_stmt = USE_STMT (use);
4378 if (is_gimple_debug (use_stmt))
4379 continue;
6585ff8f
RS
4380 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4381 if (!use_stmt_info || !use_stmt_info->min_input_precision)
370c2ebe 4382 return false;
047fba34
RS
4383 /* The input precision recorded for COND_EXPRs applies only to the
4384 "then" and "else" values. */
4385 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4386 if (assign
4387 && gimple_assign_rhs_code (assign) == COND_EXPR
4388 && use->use != gimple_assign_rhs2_ptr (assign)
4389 && use->use != gimple_assign_rhs3_ptr (assign))
4390 return false;
370c2ebe
RS
4391 precision = MAX (precision, use_stmt_info->min_input_precision);
4392 }
4393
4394 if (dump_enabled_p ())
3c2a8ed0
DM
4395 dump_printf_loc (MSG_NOTE, vect_location,
4396 "only the low %d bits of %T are significant\n",
4397 precision, lhs);
370c2ebe
RS
4398 stmt_info->min_output_precision = precision;
4399 return true;
4400}
4401
4402/* Calculate min_output_precision for STMT_INFO. */
4403
4404static void
4405vect_determine_min_output_precision (stmt_vec_info stmt_info)
4406{
4407 /* We're only interested in statements with a narrowable result. */
4408 tree lhs = gimple_get_lhs (stmt_info->stmt);
4409 if (!lhs
4410 || TREE_CODE (lhs) != SSA_NAME
4411 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4412 return;
4413
4414 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4415 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4416}
4417
4418/* Use range information to decide whether STMT (described by STMT_INFO)
4419 could be done in a narrower type. This is effectively a forward
4420 propagation, since it uses context-independent information that applies
4421 to all users of an SSA name. */
4422
4423static void
4424vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4425{
4426 tree lhs = gimple_assign_lhs (stmt);
4427 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4428 return;
4429
4430 tree type = TREE_TYPE (lhs);
4431 if (!vect_narrowable_type_p (type))
4432 return;
4433
4434 /* First see whether we have any useful range information for the result. */
4435 unsigned int precision = TYPE_PRECISION (type);
4436 signop sign = TYPE_SIGN (type);
4437 wide_int min_value, max_value;
4438 if (!vect_get_range_info (lhs, &min_value, &max_value))
4439 return;
4440
4441 tree_code code = gimple_assign_rhs_code (stmt);
4442 unsigned int nops = gimple_num_ops (stmt);
4443
4444 if (!vect_truncatable_operation_p (code))
4445 /* Check that all relevant input operands are compatible, and update
4446 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4447 for (unsigned int i = 1; i < nops; ++i)
4448 {
4449 tree op = gimple_op (stmt, i);
4450 if (TREE_CODE (op) == INTEGER_CST)
4451 {
4452 /* Don't require the integer to have RHS_TYPE (which it might
4453 not for things like shift amounts, etc.), but do require it
4454 to fit the type. */
4455 if (!int_fits_type_p (op, type))
4456 return;
4457
4458 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4459 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4460 }
4461 else if (TREE_CODE (op) == SSA_NAME)
4462 {
4463 /* Ignore codes that don't take uniform arguments. */
4464 if (!types_compatible_p (TREE_TYPE (op), type))
4465 return;
4466
4467 wide_int op_min_value, op_max_value;
4468 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4469 return;
4470
4471 min_value = wi::min (min_value, op_min_value, sign);
4472 max_value = wi::max (max_value, op_max_value, sign);
4473 }
4474 else
4475 return;
4476 }
4477
4478 /* Try to switch signed types for unsigned types if we can.
4479 This is better for two reasons. First, unsigned ops tend
4480 to be cheaper than signed ops. Second, it means that we can
4481 handle things like:
4482
4483 signed char c;
4484 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4485
4486 as:
4487
4488 signed char c;
4489 unsigned short res_1 = (unsigned short) c & 0xff00;
4490 int res = (int) res_1;
4491
4492 where the intermediate result res_1 has unsigned rather than
4493 signed type. */
4494 if (sign == SIGNED && !wi::neg_p (min_value))
4495 sign = UNSIGNED;
4496
4497 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4498 unsigned int precision1 = wi::min_precision (min_value, sign);
4499 unsigned int precision2 = wi::min_precision (max_value, sign);
4500 unsigned int value_precision = MAX (precision1, precision2);
4501 if (value_precision >= precision)
4502 return;
4503
4504 if (dump_enabled_p ())
3c2a8ed0
DM
4505 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4506 " without loss of precision: %G",
4507 sign == SIGNED ? "signed" : "unsigned",
4508 value_precision, stmt);
370c2ebe
RS
4509
4510 vect_set_operation_type (stmt_info, type, value_precision, sign);
4511 vect_set_min_input_precision (stmt_info, type, value_precision);
4512}
4513
4514/* Use information about the users of STMT's result to decide whether
4515 STMT (described by STMT_INFO) could be done in a narrower type.
4516 This is effectively a backward propagation. */
4517
4518static void
4519vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4520{
4521 tree_code code = gimple_assign_rhs_code (stmt);
4522 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4523 tree type = TREE_TYPE (gimple_op (stmt, opno));
4524 if (!vect_narrowable_type_p (type))
4525 return;
4526
4527 unsigned int precision = TYPE_PRECISION (type);
4528 unsigned int operation_precision, min_input_precision;
4529 switch (code)
4530 {
4531 CASE_CONVERT:
4532 /* Only the bits that contribute to the output matter. Don't change
4533 the precision of the operation itself. */
4534 operation_precision = precision;
4535 min_input_precision = stmt_info->min_output_precision;
4536 break;
4537
4538 case LSHIFT_EXPR:
4539 case RSHIFT_EXPR:
4540 {
4541 tree shift = gimple_assign_rhs2 (stmt);
4542 if (TREE_CODE (shift) != INTEGER_CST
4543 || !wi::ltu_p (wi::to_widest (shift), precision))
4544 return;
4545 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4546 if (code == LSHIFT_EXPR)
4547 {
4548 /* We need CONST_SHIFT fewer bits of the input. */
4549 operation_precision = stmt_info->min_output_precision;
4550 min_input_precision = (MAX (operation_precision, const_shift)
4551 - const_shift);
4552 }
4553 else
4554 {
4555 /* We need CONST_SHIFT extra bits to do the operation. */
4556 operation_precision = (stmt_info->min_output_precision
4557 + const_shift);
4558 min_input_precision = operation_precision;
4559 }
4560 break;
4561 }
4562
4563 default:
4564 if (vect_truncatable_operation_p (code))
4565 {
4566 /* Input bit N has no effect on output bits N-1 and lower. */
4567 operation_precision = stmt_info->min_output_precision;
4568 min_input_precision = operation_precision;
4569 break;
4570 }
4571 return;
4572 }
4573
4574 if (operation_precision < precision)
4575 {
4576 if (dump_enabled_p ())
3c2a8ed0
DM
4577 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4578 " without affecting users: %G",
4579 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4580 operation_precision, stmt);
370c2ebe
RS
4581 vect_set_operation_type (stmt_info, type, operation_precision,
4582 TYPE_SIGN (type));
4583 }
4584 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4585}
4586
4587/* Handle vect_determine_precisions for STMT_INFO, given that we
4588 have already done so for the users of its result. */
4589
4590void
4591vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4592{
4593 vect_determine_min_output_precision (stmt_info);
4594 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4595 {
4596 vect_determine_precisions_from_range (stmt_info, stmt);
4597 vect_determine_precisions_from_users (stmt_info, stmt);
4598 }
4599}
4600
4601/* Walk backwards through the vectorizable region to determine the
4602 values of these fields:
4603
4604 - min_output_precision
4605 - min_input_precision
4606 - operation_precision
4607 - operation_sign. */
4608
4609void
4610vect_determine_precisions (vec_info *vinfo)
4611{
4612 DUMP_VECT_SCOPE ("vect_determine_precisions");
4613
4614 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4615 {
4616 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4617 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4618 unsigned int nbbs = loop->num_nodes;
4619
4620 for (unsigned int i = 0; i < nbbs; i++)
4621 {
4622 basic_block bb = bbs[nbbs - i - 1];
4623 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4624 !gsi_end_p (si); gsi_prev (&si))
6585ff8f
RS
4625 vect_determine_stmt_precisions
4626 (vinfo->lookup_stmt (gsi_stmt (si)));
370c2ebe
RS
4627 }
4628 }
4629 else
4630 {
4631 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4632 gimple_stmt_iterator si = bb_vinfo->region_end;
4633 gimple *stmt;
4634 do
4635 {
4636 if (!gsi_stmt (si))
4637 si = gsi_last_bb (bb_vinfo->bb);
4638 else
4639 gsi_prev (&si);
4640 stmt = gsi_stmt (si);
6585ff8f 4641 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
370c2ebe
RS
4642 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4643 vect_determine_stmt_precisions (stmt_info);
4644 }
4645 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4646 }
4647}
4648
ba9728b0 4649typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
1cbfeccc
RS
4650
4651struct vect_recog_func
4652{
4653 vect_recog_func_ptr fn;
4654 const char *name;
4655};
4656
4657/* Note that ordering matters - the first pattern matching on a stmt is
4658 taken which means usually the more complex one needs to preceed the
4659 less comples onex (widen_sum only after dot_prod or sad for example). */
4660static vect_recog_func vect_vect_recog_func_ptrs[] = {
370c2ebe 4661 { vect_recog_over_widening_pattern, "over_widening" },
0267732b
RS
4662 /* Must come after over_widening, which narrows the shift as much as
4663 possible beforehand. */
4664 { vect_recog_average_pattern, "average" },
370c2ebe 4665 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
1cbfeccc
RS
4666 { vect_recog_widen_mult_pattern, "widen_mult" },
4667 { vect_recog_dot_prod_pattern, "dot_prod" },
4668 { vect_recog_sad_pattern, "sad" },
4669 { vect_recog_widen_sum_pattern, "widen_sum" },
4670 { vect_recog_pow_pattern, "pow" },
4671 { vect_recog_widen_shift_pattern, "widen_shift" },
1cbfeccc
RS
4672 { vect_recog_rotate_pattern, "rotate" },
4673 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4674 { vect_recog_divmod_pattern, "divmod" },
4675 { vect_recog_mult_pattern, "mult" },
4676 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4677 { vect_recog_bool_pattern, "bool" },
4678 /* This must come before mask conversion, and includes the parts
4679 of mask conversion that are needed for gather and scatter
4680 internal functions. */
4681 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4682 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4683};
4684
4685const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4686
1107f3ae
IR
4687/* Mark statements that are involved in a pattern. */
4688
4689static inline void
cef6cac8 4690vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
1107f3ae
IR
4691 tree pattern_vectype)
4692{
41949de9 4693 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
1107f3ae 4694
cef6cac8
RS
4695 gimple *orig_pattern_stmt = NULL;
4696 if (is_pattern_stmt_p (orig_stmt_info))
ab0ef706 4697 {
41949de9
RS
4698 /* We're replacing a statement in an existing pattern definition
4699 sequence. */
cef6cac8 4700 orig_pattern_stmt = orig_stmt_info->stmt;
41949de9 4701 if (dump_enabled_p ())
3c2a8ed0
DM
4702 dump_printf_loc (MSG_NOTE, vect_location,
4703 "replacing earlier pattern %G", orig_pattern_stmt);
41949de9
RS
4704
4705 /* To keep the book-keeping simple, just swap the lhs of the
4706 old and new statements, so that the old one has a valid but
4707 unused lhs. */
cef6cac8
RS
4708 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4709 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
41949de9
RS
4710 gimple_set_lhs (pattern_stmt, old_lhs);
4711
4712 if (dump_enabled_p ())
3c2a8ed0 4713 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
41949de9
RS
4714
4715 /* Switch to the statement that ORIG replaces. */
10681ce8 4716 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
41949de9
RS
4717
4718 /* We shouldn't be replacing the main pattern statement. */
cef6cac8
RS
4719 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4720 != orig_pattern_stmt);
ab0ef706 4721 }
1107f3ae 4722
41949de9 4723 if (def_seq)
e3947d80
RS
4724 for (gimple_stmt_iterator si = gsi_start (def_seq);
4725 !gsi_end_p (si); gsi_next (&si))
154fb72b
RB
4726 {
4727 stmt_vec_info pattern_stmt_info
4728 = vect_init_pattern_stmt (gsi_stmt (si),
4729 orig_stmt_info, pattern_vectype);
4730 /* Stmts in the def sequence are not vectorizable cycle or
4731 induction defs, instead they should all be vect_internal_def
4732 feeding the main pattern stmt which retains this def type. */
4733 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
4734 }
41949de9 4735
cef6cac8 4736 if (orig_pattern_stmt)
41949de9
RS
4737 {
4738 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4739
4740 /* Insert all the new pattern statements before the original one. */
4741 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
cef6cac8
RS
4742 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4743 orig_def_seq);
41949de9
RS
4744 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4745 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
3239dde9
RS
4746
4747 /* Remove the pattern statement that this new pattern replaces. */
4748 gsi_remove (&gsi, false);
41949de9
RS
4749 }
4750 else
4751 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
1107f3ae
IR
4752}
4753
b8698a0f 4754/* Function vect_pattern_recog_1
20f06221
DN
4755
4756 Input:
4757 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4758 computation pattern.
cef6cac8 4759 STMT_INFO: A stmt from which the pattern search should start.
20f06221 4760
1cbfeccc
RS
4761 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4762 a sequence of statements that has the same functionality and can be
cef6cac8
RS
4763 used to replace STMT_INFO. It returns the last statement in the sequence
4764 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
1cbfeccc
RS
4765 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4766 statement, having first checked that the target supports the new operation
4767 in that type.
20f06221 4768
b8698a0f 4769 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
4770 for vect_recog_pattern. */
4771
41949de9 4772static void
cef6cac8 4773vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
20f06221 4774{
cef6cac8
RS
4775 vec_info *vinfo = stmt_info->vinfo;
4776 gimple *pattern_stmt;
383d9c83 4777 loop_vec_info loop_vinfo;
20f06221 4778 tree pattern_vectype;
20f06221 4779
41949de9
RS
4780 /* If this statement has already been replaced with pattern statements,
4781 leave the original statement alone, since the first match wins.
4782 Instead try to match against the definition statements that feed
4783 the main pattern statement. */
41949de9
RS
4784 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4785 {
4786 gimple_stmt_iterator gsi;
4787 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4788 !gsi_end_p (gsi); gsi_next (&gsi))
cef6cac8 4789 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
41949de9
RS
4790 return;
4791 }
4792
9c58fb7a 4793 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
ba9728b0 4794 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
726a989a 4795 if (!pattern_stmt)
df0aef6d 4796 {
3239dde9
RS
4797 /* Clear any half-formed pattern definition sequence. */
4798 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
41949de9 4799 return;
df0aef6d 4800 }
b8698a0f 4801
383d9c83 4802 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1cbfeccc 4803 gcc_assert (pattern_vectype);
383d9c83 4804
20f06221 4805 /* Found a vectorizable pattern. */
73fbfcad 4806 if (dump_enabled_p ())
3c2a8ed0
DM
4807 dump_printf_loc (MSG_NOTE, vect_location,
4808 "%s pattern recognized: %G",
4809 recog_func->name, pattern_stmt);
b8698a0f 4810
726a989a 4811 /* Mark the stmts that are involved in the pattern. */
cef6cac8 4812 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
20f06221 4813
b5aeb3bb
IR
4814 /* Patterns cannot be vectorized using SLP, because they change the order of
4815 computation. */
f5709183 4816 if (loop_vinfo)
b94c2dc1
TV
4817 {
4818 unsigned ix, ix2;
32c91dfc 4819 stmt_vec_info *elem_ptr;
b94c2dc1 4820 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
32c91dfc 4821 elem_ptr, *elem_ptr == stmt_info);
b94c2dc1 4822 }
20f06221
DN
4823}
4824
4825
4826/* Function vect_pattern_recog
4827
4828 Input:
4829 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4830 computation idioms.
4831
9d5e7640
IR
4832 Output - for each computation idiom that is detected we create a new stmt
4833 that provides the same functionality and that can be vectorized. We
20f06221
DN
4834 also record some information in the struct_stmt_info of the relevant
4835 stmts, as explained below:
4836
4837 At the entry to this function we have the following stmts, with the
4838 following initial value in the STMT_VINFO fields:
4839
4840 stmt in_pattern_p related_stmt vec_stmt
4841 S1: a_i = .... - - -
4842 S2: a_2 = ..use(a_i).. - - -
4843 S3: a_1 = ..use(a_2).. - - -
4844 S4: a_0 = ..use(a_1).. - - -
4845 S5: ... = ..use(a_0).. - - -
4846
4847 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
4848 represented by a single stmt. We then:
4849 - create a new stmt S6 equivalent to the pattern (the stmt is not
4850 inserted into the code)
20f06221
DN
4851 - fill in the STMT_VINFO fields as follows:
4852
4853 in_pattern_p related_stmt vec_stmt
b8698a0f 4854 S1: a_i = .... - - -
20f06221
DN
4855 S2: a_2 = ..use(a_i).. - - -
4856 S3: a_1 = ..use(a_2).. - - -
20f06221 4857 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 4858 '---> S6: a_new = .... - S4 -
20f06221
DN
4859 S5: ... = ..use(a_0).. - - -
4860
4861 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 4862 to each other through the RELATED_STMT field).
20f06221
DN
4863
4864 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4865 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4866 remain irrelevant unless used by stmts other than S4.
4867
4868 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 4869 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
4870 a pointer to the new vector stmt VS6 from S6 (as usual).
4871 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
4872
4873 in_pattern_p related_stmt vec_stmt
4874 S1: a_i = .... - - -
4875 S2: a_2 = ..use(a_i).. - - -
4876 S3: a_1 = ..use(a_2).. - - -
4877 > VS6: va_new = .... - - -
20f06221 4878 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 4879 '---> S6: a_new = .... - S4 VS6
20f06221
DN
4880 > VS5: ... = ..vuse(va_new).. - - -
4881 S5: ... = ..use(a_0).. - - -
4882
9d5e7640 4883 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
4884 elsewhere), and we'll end up with:
4885
b8698a0f 4886 VS6: va_new = ....
83197f37
IR
4887 VS5: ... = ..vuse(va_new)..
4888
4889 In case of more than one pattern statements, e.g., widen-mult with
4890 intermediate type:
4891
4892 S1 a_t = ;
4893 S2 a_T = (TYPE) a_t;
4894 '--> S3: a_it = (interm_type) a_t;
4895 S4 prod_T = a_T * CONST;
4896 '--> S5: prod_T' = a_it w* CONST;
4897
4898 there may be other users of a_T outside the pattern. In that case S2 will
4899 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4900 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4901 be recorded in S3. */
20f06221
DN
4902
4903void
310213d4 4904vect_pattern_recog (vec_info *vinfo)
20f06221 4905{
f5709183 4906 struct loop *loop;
772e61e1 4907 basic_block *bbs;
f5709183 4908 unsigned int nbbs;
726a989a 4909 gimple_stmt_iterator si;
20f06221 4910 unsigned int i, j;
20f06221 4911
370c2ebe
RS
4912 vect_determine_precisions (vinfo);
4913
adac3a68 4914 DUMP_VECT_SCOPE ("vect_pattern_recog");
20f06221 4915
310213d4 4916 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
f5709183
IR
4917 {
4918 loop = LOOP_VINFO_LOOP (loop_vinfo);
4919 bbs = LOOP_VINFO_BBS (loop_vinfo);
4920 nbbs = loop->num_nodes;
61d371eb
RB
4921
4922 /* Scan through the loop stmts, applying the pattern recognition
4923 functions starting at each stmt visited: */
4924 for (i = 0; i < nbbs; i++)
4925 {
4926 basic_block bb = bbs[i];
4927 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
cef6cac8
RS
4928 {
4929 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4930 /* Scan over all generic vect_recog_xxx_pattern functions. */
4931 for (j = 0; j < NUM_PATTERNS; j++)
4932 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4933 stmt_info);
4934 }
61d371eb 4935 }
f5709183
IR
4936 }
4937 else
4938 {
61d371eb
RB
4939 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4940 for (si = bb_vinfo->region_begin;
4941 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4942 {
df0aef6d 4943 gimple *stmt = gsi_stmt (si);
6585ff8f 4944 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
41949de9 4945 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
61d371eb 4946 continue;
f5709183 4947
61d371eb
RB
4948 /* Scan over all generic vect_recog_xxx_pattern functions. */
4949 for (j = 0; j < NUM_PATTERNS; j++)
cef6cac8 4950 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
61d371eb 4951 }
20f06221
DN
4952 }
4953}