]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
Protect some checks of DECL_FUNCTION_CODE
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
a5544970 2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
957060b5 25#include "rtl.h"
20f06221 26#include "tree.h"
c7131fb2 27#include "gimple.h"
c7131fb2 28#include "ssa.h"
957060b5
AM
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
40e23961 33#include "fold-const.h"
d8a2d370 34#include "stor-layout.h"
2fb9a547 35#include "tree-eh.h"
45b0be94 36#include "gimplify.h"
5be5c238 37#include "gimple-iterator.h"
20f06221 38#include "cfgloop.h"
20f06221 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
9b2b7279 41#include "builtins.h"
b4e5bc47 42#include "internal-fn.h"
7a31e5ef 43#include "case-cfn-macros.h"
848bb6fc
JJ
44#include "fold-const-call.h"
45#include "attribs.h"
46#include "cgraph.h"
47#include "omp-simd-clone.h"
8f76f377 48#include "predict.h"
20f06221 49
370c2ebe
RS
50/* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
52
53static bool
54vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
55{
54994253 56 value_range_kind vr_type = get_range_info (var, min_value, max_value);
370c2ebe
RS
57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
61 {
62 if (dump_enabled_p ())
63 {
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
70 }
71 return true;
72 }
73 else
74 {
75 if (dump_enabled_p ())
76 {
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
79 }
80 return false;
81 }
82}
83
49d8df1b
RS
84/* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
86
87static void
88vect_pattern_detected (const char *name, gimple *stmt)
89{
90 if (dump_enabled_p ())
3c2a8ed0 91 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
49d8df1b
RS
92}
93
10681ce8
RS
94/* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
41949de9 97
10681ce8 98static stmt_vec_info
41949de9
RS
99vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 tree vectype)
101{
6585ff8f
RS
102 vec_info *vinfo = orig_stmt_info->vinfo;
103 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
ddf98a96 104 if (pattern_stmt_info == NULL)
4fbeb363 105 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
41949de9
RS
106 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
107
634e7150 108 pattern_stmt_info->pattern_stmt_p = true;
10681ce8 109 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
41949de9
RS
110 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
10681ce8 114 return pattern_stmt_info;
41949de9
RS
115}
116
117/* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119 have one already. */
120
121static void
122vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 tree vectype)
124{
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
10681ce8
RS
126 STMT_VINFO_RELATED_STMT (orig_stmt_info)
127 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
41949de9
RS
128}
129
00347934
RS
130/* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
133
083481d8 134static inline void
00347934
RS
135append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 tree vectype = NULL_TREE)
083481d8 137{
00347934
RS
138 vec_info *vinfo = stmt_info->vinfo;
139 if (vectype)
140 {
4fbeb363 141 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
00347934
RS
142 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
143 }
a1a6c5b2 144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
00347934 145 new_stmt);
083481d8
JJ
146}
147
3330053e
RS
148/* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
153
154static edge
155vect_get_external_def_edge (vec_info *vinfo, tree var)
156{
157 edge e = NULL;
158 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
159 {
160 e = loop_preheader_edge (loop_vinfo->loop);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var))
162 {
163 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 if (bb == NULL
165 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 e = NULL;
167 }
168 }
169 return e;
170}
171
1cbfeccc
RS
172/* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
176
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
181 must have.
182
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185 is nonnull. */
186
187static bool
188vect_supportable_direct_optab_p (tree otype, tree_code code,
189 tree itype, tree *vecotype_out,
190 tree *vecitype_out = NULL)
191{
192 tree vecitype = get_vectype_for_scalar_type (itype);
193 if (!vecitype)
194 return false;
195
196 tree vecotype = get_vectype_for_scalar_type (otype);
197 if (!vecotype)
198 return false;
199
200 optab optab = optab_for_tree_code (code, vecitype, optab_default);
201 if (!optab)
202 return false;
203
204 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205 if (icode == CODE_FOR_nothing
206 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207 return false;
208
209 *vecotype_out = vecotype;
210 if (vecitype_out)
211 *vecitype_out = vecitype;
212 return true;
213}
214
00347934
RS
215/* Round bit precision PRECISION up to a full element. */
216
217static unsigned int
218vect_element_precision (unsigned int precision)
219{
220 precision = 1 << ceil_log2 (precision);
221 return MAX (precision, BITS_PER_UNIT);
222}
223
25927307
RS
224/* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
226
227static stmt_vec_info
228vect_get_internal_def (vec_info *vinfo, tree op)
229{
c98d0595
RS
230 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231 if (def_stmt_info
232 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233 return def_stmt_info;
234 return NULL;
25927307
RS
235}
236
32e8e429 237/* Check whether NAME, an ssa-name used in STMT_VINFO,
79d652a5 238 is a result of a type promotion, such that:
20f06221 239 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
241 unsigned. */
20f06221
DN
242
243static bool
32e8e429 244type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
355fe088 245 tree *orig_type, gimple **def_stmt, bool *promotion)
20f06221 246{
20f06221
DN
247 tree type = TREE_TYPE (name);
248 tree oprnd0;
249 enum vect_def_type dt;
20f06221 250
fef96d8e
RS
251 stmt_vec_info def_stmt_info;
252 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 def_stmt))
20f06221
DN
254 return false;
255
8644a673
IR
256 if (dt != vect_internal_def
257 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
258 return false;
259
bc4fb355 260 if (!*def_stmt)
20f06221
DN
261 return false;
262
726a989a 263 if (!is_gimple_assign (*def_stmt))
20f06221
DN
264 return false;
265
bc4fb355 266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
267 return false;
268
726a989a 269 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 270
bc4fb355
IR
271 *orig_type = TREE_TYPE (oprnd0);
272 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274 return false;
275
276 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277 *promotion = true;
bc4fb355 278 else
79d652a5 279 *promotion = false;
20f06221 280
894dd753 281 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
20f06221
DN
282 return false;
283
20f06221
DN
284 return true;
285}
286
00347934
RS
287/* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
6c1dae73
MS
289class vect_unpromoted_value {
290public:
00347934
RS
291 vect_unpromoted_value ();
292
293 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
294
295 /* The value obtained after peeling away zero or more casts. */
296 tree op;
297
298 /* The type of OP. */
299 tree type;
300
301 /* The definition type of OP. */
302 vect_def_type dt;
303
304 /* If OP is the result of peeling at least one cast, and if the cast
305 of OP itself is a vectorizable statement, CASTER identifies that
306 statement, otherwise it is null. */
307 stmt_vec_info caster;
308};
309
310inline vect_unpromoted_value::vect_unpromoted_value ()
311 : op (NULL_TREE),
312 type (NULL_TREE),
313 dt (vect_uninitialized_def),
314 caster (NULL)
315{
316}
317
318/* Set the operand to OP_IN, its definition type to DT_IN, and the
319 statement that casts it to CASTER_IN. */
320
321inline void
322vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
323 stmt_vec_info caster_in)
324{
325 op = op_in;
326 type = TREE_TYPE (op);
327 dt = dt_in;
328 caster = caster_in;
329}
330
331/* If OP is a vectorizable SSA name, strip a sequence of integer conversions
332 to reach some vectorizable inner operand OP', continuing as long as it
333 is possible to convert OP' back to OP using a possible sign change
334 followed by a possible promotion P. Return this OP', or null if OP is
335 not a vectorizable SSA name. If there is a promotion P, describe its
370c2ebe
RS
336 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
337 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
338 have more than one user.
00347934
RS
339
340 A successful return means that it is possible to go from OP' to OP
341 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
342 whereas the cast from UNPROM to OP might be a promotion, a sign
343 change, or a nop.
344
345 E.g. say we have:
346
347 signed short *ptr = ...;
348 signed short C = *ptr;
349 unsigned short B = (unsigned short) C; // sign change
350 signed int A = (signed int) B; // unsigned promotion
351 ...possible other uses of A...
352 unsigned int OP = (unsigned int) A; // sign change
353
354 In this case it's possible to go directly from C to OP using:
355
356 OP = (unsigned int) (unsigned short) C;
357 +------------+ +--------------+
358 promotion sign change
359
360 so OP' would be C. The input to the promotion is B, so UNPROM
361 would describe B. */
362
363static tree
364vect_look_through_possible_promotion (vec_info *vinfo, tree op,
370c2ebe
RS
365 vect_unpromoted_value *unprom,
366 bool *single_use_p = NULL)
00347934
RS
367{
368 tree res = NULL_TREE;
369 tree op_type = TREE_TYPE (op);
370 unsigned int orig_precision = TYPE_PRECISION (op_type);
d11be094 371 unsigned int min_precision = orig_precision;
00347934
RS
372 stmt_vec_info caster = NULL;
373 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
374 {
375 /* See whether OP is simple enough to vectorize. */
fef96d8e 376 stmt_vec_info def_stmt_info;
00347934
RS
377 gimple *def_stmt;
378 vect_def_type dt;
fef96d8e 379 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
00347934
RS
380 break;
381
382 /* If OP is the input of a demotion, skip over it to see whether
383 OP is itself the result of a promotion. If so, the combined
384 effect of the promotion and the demotion might fit the required
385 pattern, otherwise neither operation fits.
386
387 This copes with cases such as the result of an arithmetic
388 operation being truncated before being stored, and where that
389 arithmetic operation has been recognized as an over-widened one. */
d11be094 390 if (TYPE_PRECISION (op_type) <= min_precision)
00347934
RS
391 {
392 /* Use OP as the UNPROM described above if we haven't yet
393 found a promotion, or if using the new input preserves the
394 sign of the previous promotion. */
395 if (!res
396 || TYPE_PRECISION (unprom->type) == orig_precision
397 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
d11be094
JJ
398 {
399 unprom->set_op (op, dt, caster);
400 min_precision = TYPE_PRECISION (op_type);
401 }
00347934
RS
402 /* Stop if we've already seen a promotion and if this
403 conversion does more than change the sign. */
404 else if (TYPE_PRECISION (op_type)
405 != TYPE_PRECISION (unprom->type))
406 break;
407
408 /* The sequence now extends to OP. */
409 res = op;
410 }
411
412 /* See whether OP is defined by a cast. Record it as CASTER if
413 the cast is potentially vectorizable. */
414 if (!def_stmt)
415 break;
fef96d8e
RS
416 caster = def_stmt_info;
417
418 /* Ignore pattern statements, since we don't link uses for them. */
419 if (caster
420 && single_use_p
421 && !STMT_VINFO_RELATED_STMT (caster)
422 && !has_single_use (res))
423 *single_use_p = false;
424
00347934
RS
425 gassign *assign = dyn_cast <gassign *> (def_stmt);
426 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
427 break;
428
429 /* Continue with the input to the cast. */
430 op = gimple_assign_rhs1 (def_stmt);
431 op_type = TREE_TYPE (op);
432 }
433 return res;
434}
435
436/* OP is an integer operand to an operation that returns TYPE, and we
437 want to treat the operation as a widening one. So far we can treat
438 it as widening from *COMMON_TYPE.
439
440 Return true if OP is suitable for such a widening operation,
441 either widening from *COMMON_TYPE or from some supertype of it.
442 Update *COMMON_TYPE to the supertype in the latter case.
443
444 SHIFT_P is true if OP is a shift amount. */
445
446static bool
447vect_joust_widened_integer (tree type, bool shift_p, tree op,
448 tree *common_type)
449{
450 /* Calculate the minimum precision required by OP, without changing
451 the sign of either operand. */
452 unsigned int precision;
453 if (shift_p)
454 {
455 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
456 return false;
457 precision = TREE_INT_CST_LOW (op);
458 }
459 else
460 {
461 precision = wi::min_precision (wi::to_widest (op),
462 TYPE_SIGN (*common_type));
463 if (precision * 2 > TYPE_PRECISION (type))
464 return false;
465 }
466
467 /* If OP requires a wider type, switch to that type. The checks
468 above ensure that this is still narrower than the result. */
469 precision = vect_element_precision (precision);
470 if (TYPE_PRECISION (*common_type) < precision)
471 *common_type = build_nonstandard_integer_type
472 (precision, TYPE_UNSIGNED (*common_type));
473 return true;
474}
475
476/* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
477 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
478
479static bool
480vect_joust_widened_type (tree type, tree new_type, tree *common_type)
481{
482 if (types_compatible_p (*common_type, new_type))
483 return true;
484
485 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
486 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
487 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
488 return true;
489
490 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
491 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
492 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
493 {
494 *common_type = new_type;
495 return true;
496 }
497
498 /* We have mismatched signs, with the signed type being
499 no wider than the unsigned type. In this case we need
500 a wider signed type. */
501 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
502 TYPE_PRECISION (new_type));
503 precision *= 2;
504 if (precision * 2 > TYPE_PRECISION (type))
505 return false;
506
507 *common_type = build_nonstandard_integer_type (precision, false);
508 return true;
509}
510
511/* Check whether STMT_INFO can be viewed as a tree of integer operations
512 in which each node either performs CODE or WIDENED_CODE, and where
513 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
514 specifies the maximum number of leaf operands. SHIFT_P says whether
515 CODE and WIDENED_CODE are some sort of shift.
516
517 If STMT_INFO is such a tree, return the number of leaf operands
518 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
519 to a type that (a) is narrower than the result of STMT_INFO and
520 (b) can hold all leaf operand values.
521
522 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
523 exists. */
524
525static unsigned int
526vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
527 tree_code widened_code, bool shift_p,
528 unsigned int max_nops,
529 vect_unpromoted_value *unprom, tree *common_type)
530{
531 /* Check for an integer operation with the right code. */
c98d0595 532 vec_info *vinfo = stmt_info->vinfo;
00347934
RS
533 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
534 if (!assign)
535 return 0;
536
537 tree_code rhs_code = gimple_assign_rhs_code (assign);
538 if (rhs_code != code && rhs_code != widened_code)
539 return 0;
540
541 tree type = gimple_expr_type (assign);
542 if (!INTEGRAL_TYPE_P (type))
543 return 0;
544
545 /* Assume that both operands will be leaf operands. */
546 max_nops -= 2;
547
548 /* Check the operands. */
549 unsigned int next_op = 0;
550 for (unsigned int i = 0; i < 2; ++i)
551 {
552 vect_unpromoted_value *this_unprom = &unprom[next_op];
553 unsigned int nops = 1;
554 tree op = gimple_op (assign, i + 1);
555 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
556 {
557 /* We already have a common type from earlier operands.
558 Update it to account for OP. */
559 this_unprom->set_op (op, vect_constant_def);
560 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
561 return 0;
562 }
563 else
564 {
565 /* Only allow shifts by constants. */
566 if (shift_p && i == 1)
567 return 0;
568
569 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
570 this_unprom))
571 return 0;
572
573 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
574 {
575 /* The operand isn't widened. If STMT_INFO has the code
576 for an unwidened operation, recursively check whether
577 this operand is a node of the tree. */
578 if (rhs_code != code
579 || max_nops == 0
580 || this_unprom->dt != vect_internal_def)
581 return 0;
582
583 /* Give back the leaf slot allocated above now that we're
584 not treating this as a leaf operand. */
585 max_nops += 1;
586
587 /* Recursively process the definition of the operand. */
588 stmt_vec_info def_stmt_info
c98d0595 589 = vinfo->lookup_def (this_unprom->op);
00347934
RS
590 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
591 shift_p, max_nops, this_unprom,
592 common_type);
593 if (nops == 0)
594 return 0;
595
596 max_nops -= nops;
597 }
598 else
599 {
600 /* Make sure that the operand is narrower than the result. */
601 if (TYPE_PRECISION (this_unprom->type) * 2
602 > TYPE_PRECISION (type))
603 return 0;
604
605 /* Update COMMON_TYPE for the new operand. */
606 if (i == 0)
607 *common_type = this_unprom->type;
608 else if (!vect_joust_widened_type (type, this_unprom->type,
609 common_type))
610 return 0;
611 }
612 }
613 next_op += nops;
614 }
615 return next_op;
616}
617
726a989a
RB
618/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
619 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
620
621static tree
355fe088 622vect_recog_temp_ssa_var (tree type, gimple *stmt)
726a989a 623{
83d5977e 624 return make_temp_ssa_name (type, stmt, "patt");
726a989a 625}
20f06221 626
4ef79c96
RS
627/* STMT2_INFO describes a type conversion that could be split into STMT1
628 followed by a version of STMT2_INFO that takes NEW_RHS as its first
629 input. Try to do this using pattern statements, returning true on
630 success. */
631
632static bool
633vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
634 gimple *stmt1, tree vectype)
635{
636 if (is_pattern_stmt_p (stmt2_info))
637 {
638 /* STMT2_INFO is part of a pattern. Get the statement to which
639 the pattern is attached. */
10681ce8 640 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
4ef79c96
RS
641 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
642
643 if (dump_enabled_p ())
3c2a8ed0
DM
644 dump_printf_loc (MSG_NOTE, vect_location,
645 "Splitting pattern statement: %G", stmt2_info->stmt);
4ef79c96
RS
646
647 /* Since STMT2_INFO is a pattern statement, we can change it
648 in-situ without worrying about changing the code for the
649 containing block. */
650 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
651
652 if (dump_enabled_p ())
653 {
3c2a8ed0
DM
654 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
655 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
656 stmt2_info->stmt);
4ef79c96
RS
657 }
658
659 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
10681ce8 660 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
4ef79c96
RS
661 /* STMT2_INFO is the actual pattern statement. Add STMT1
662 to the end of the definition sequence. */
663 gimple_seq_add_stmt_without_update (def_seq, stmt1);
664 else
665 {
666 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
667 before it. */
668 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
669 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
670 }
671 return true;
672 }
673 else
674 {
675 /* STMT2_INFO doesn't yet have a pattern. Try to create a
676 two-statement pattern now. */
677 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
678 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
679 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
680 if (!lhs_vectype)
681 return false;
682
683 if (dump_enabled_p ())
3c2a8ed0
DM
684 dump_printf_loc (MSG_NOTE, vect_location,
685 "Splitting statement: %G", stmt2_info->stmt);
4ef79c96
RS
686
687 /* Add STMT1 as a singleton pattern definition sequence. */
688 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
689 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
690 gimple_seq_add_stmt_without_update (def_seq, stmt1);
691
692 /* Build the second of the two pattern statements. */
693 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
694 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
695 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
696
697 if (dump_enabled_p ())
698 {
699 dump_printf_loc (MSG_NOTE, vect_location,
3c2a8ed0
DM
700 "into pattern statements: %G", stmt1);
701 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
4ef79c96
RS
702 }
703
704 return true;
705 }
706}
707
00347934
RS
708/* Convert UNPROM to TYPE and return the result, adding new statements
709 to STMT_INFO's pattern definition statements if no better way is
710 available. VECTYPE is the vector form of TYPE. */
711
712static tree
713vect_convert_input (stmt_vec_info stmt_info, tree type,
714 vect_unpromoted_value *unprom, tree vectype)
715{
716 /* Check for a no-op conversion. */
717 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
718 return unprom->op;
719
720 /* Allow the caller to create constant vect_unpromoted_values. */
721 if (TREE_CODE (unprom->op) == INTEGER_CST)
722 return wide_int_to_tree (type, wi::to_widest (unprom->op));
723
a1ff7f14 724 tree input = unprom->op;
00347934
RS
725 if (unprom->caster)
726 {
727 tree lhs = gimple_get_lhs (unprom->caster->stmt);
a1ff7f14
RS
728 tree lhs_type = TREE_TYPE (lhs);
729
730 /* If the result of the existing cast is the right width, use it
731 instead of the source of the cast. */
732 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
733 input = lhs;
734 /* If the precision we want is between the source and result
735 precisions of the existing cast, try splitting the cast into
736 two and tapping into a mid-way point. */
737 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
738 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
739 {
740 /* In order to preserve the semantics of the original cast,
741 give the mid-way point the same signedness as the input value.
742
743 It would be possible to use a signed type here instead if
744 TYPE is signed and UNPROM->TYPE is unsigned, but that would
745 make the sign of the midtype sensitive to the order in
746 which we process the statements, since the signedness of
747 TYPE is the signedness required by just one of possibly
748 many users. Also, unsigned promotions are usually as cheap
749 as or cheaper than signed ones, so it's better to keep an
750 unsigned promotion. */
751 tree midtype = build_nonstandard_integer_type
752 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
753 tree vec_midtype = get_vectype_for_scalar_type (midtype);
754 if (vec_midtype)
755 {
756 input = vect_recog_temp_ssa_var (midtype, NULL);
757 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
758 unprom->op);
759 if (!vect_split_statement (unprom->caster, input, new_stmt,
760 vec_midtype))
761 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
762 }
763 }
764
765 /* See if we can reuse an existing result. */
766 if (types_compatible_p (type, TREE_TYPE (input)))
767 return input;
00347934
RS
768 }
769
770 /* We need a new conversion statement. */
771 tree new_op = vect_recog_temp_ssa_var (type, NULL);
a1ff7f14 772 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
4ef79c96 773
3330053e
RS
774 /* If OP is an external value, see if we can insert the new statement
775 on an incoming edge. */
a1ff7f14
RS
776 if (input == unprom->op && unprom->dt == vect_external_def)
777 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
3330053e
RS
778 {
779 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
780 gcc_assert (!new_bb);
781 return new_op;
782 }
783
00347934
RS
784 /* As a (common) last resort, add the statement to the pattern itself. */
785 append_pattern_def_seq (stmt_info, new_stmt, vectype);
786 return new_op;
787}
788
789/* Invoke vect_convert_input for N elements of UNPROM and store the
790 result in the corresponding elements of RESULT. */
791
792static void
793vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
794 tree *result, tree type, vect_unpromoted_value *unprom,
795 tree vectype)
796{
797 for (unsigned int i = 0; i < n; ++i)
798 {
799 unsigned int j;
800 for (j = 0; j < i; ++j)
801 if (unprom[j].op == unprom[i].op)
802 break;
803 if (j < i)
804 result[i] = result[j];
805 else
806 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
807 }
808}
809
810/* The caller has created a (possibly empty) sequence of pattern definition
811 statements followed by a single statement PATTERN_STMT. Cast the result
812 of this final statement to TYPE. If a new statement is needed, add
813 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
814 and return the new statement, otherwise return PATTERN_STMT as-is.
815 VECITYPE is the vector form of PATTERN_STMT's result type. */
816
817static gimple *
818vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
819 tree vecitype)
820{
821 tree lhs = gimple_get_lhs (pattern_stmt);
822 if (!types_compatible_p (type, TREE_TYPE (lhs)))
823 {
824 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
825 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
826 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
827 }
828 return pattern_stmt;
829}
830
97e52238 831/* Return true if STMT_VINFO describes a reduction for which reassociation
d99dcb77
RS
832 is allowed. If STMT_INFO is part of a group, assume that it's part of
833 a reduction chain and optimistically assume that all statements
834 except the last allow reassociation. */
97e52238
RS
835
836static bool
837vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
838{
839 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
d99dcb77 840 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
ddf98a96 841 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
97e52238
RS
842}
843
7b98e98a
RS
844/* As above, but also require it to have code CODE and to be a reduction
845 in the outermost loop. When returning true, store the operands in
846 *OP0_OUT and *OP1_OUT. */
847
848static bool
849vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
850 tree *op0_out, tree *op1_out)
851{
852 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
853 if (!loop_info)
854 return false;
855
856 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
857 if (!assign || gimple_assign_rhs_code (assign) != code)
858 return false;
859
860 /* We don't allow changing the order of the computation in the inner-loop
861 when doing outer-loop vectorization. */
99b1c316 862 class loop *loop = LOOP_VINFO_LOOP (loop_info);
86a91c0a 863 if (loop && nested_in_vect_loop_p (loop, stmt_info))
7b98e98a
RS
864 return false;
865
866 if (!vect_reassociating_reduction_p (stmt_info))
867 return false;
868
869 *op0_out = gimple_assign_rhs1 (assign);
870 *op1_out = gimple_assign_rhs2 (assign);
871 return true;
872}
873
20f06221
DN
874/* Function vect_recog_dot_prod_pattern
875
876 Try to find the following pattern:
877
878 type x_t, y_t;
879 TYPE1 prod;
880 TYPE2 sum = init;
881 loop:
882 sum_0 = phi <init, sum_1>
883 S1 x_t = ...
884 S2 y_t = ...
885 S3 x_T = (TYPE1) x_t;
886 S4 y_T = (TYPE1) y_t;
887 S5 prod = x_T * y_T;
888 [S6 prod = (TYPE2) prod; #optional]
889 S7 sum_1 = prod + sum_0;
890
b8698a0f
L
891 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
892 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 893 computation.
b8698a0f 894
20f06221
DN
895 Input:
896
ba9728b0 897 * STMT_VINFO: The stmt from which the pattern search begins. In the
51312233
IR
898 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
899 will be detected.
20f06221
DN
900
901 Output:
902
20f06221
DN
903 * TYPE_OUT: The type of the output of this pattern.
904
905 * Return value: A new stmt that will be used to replace the sequence of
906 stmts that constitute the pattern. In this case it will be:
907 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
908
909 Note: The dot-prod idiom is a widening reduction pattern that is
910 vectorized without preserving all the intermediate results. It
911 produces only N/2 (widened) results (by summing up pairs of
912 intermediate results) rather than all N results. Therefore, we
913 cannot allow this pattern when we want to get all the results and in
914 the correct order (as is the case when this computation is in an
915 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 916
355fe088 917static gimple *
ba9728b0 918vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
20f06221 919{
20f06221 920 tree oprnd0, oprnd1;
ba9728b0 921 gimple *last_stmt = stmt_vinfo->stmt;
25927307 922 vec_info *vinfo = stmt_vinfo->vinfo;
20f06221 923 tree type, half_type;
355fe088 924 gimple *pattern_stmt;
f471fe72 925 tree var;
20f06221 926
b8698a0f 927 /* Look for the following pattern
20f06221
DN
928 DX = (TYPE1) X;
929 DY = (TYPE1) Y;
b8698a0f 930 DPROD = DX * DY;
20f06221
DN
931 DDPROD = (TYPE2) DPROD;
932 sum_1 = DDPROD + sum_0;
b8698a0f 933 In which
20f06221
DN
934 - DX is double the size of X
935 - DY is double the size of Y
936 - DX, DY, DPROD all have the same type
937 - sum is the same size of DPROD or bigger
938 - sum has been recognized as a reduction variable.
939
940 This is equivalent to:
941 DPROD = X w* Y; #widen mult
942 sum_1 = DPROD w+ sum_0; #widen summation
943 or
944 DPROD = X w* Y; #widen mult
945 sum_1 = DPROD + sum_0; #summation
946 */
947
948 /* Starting from LAST_STMT, follow the defs of its uses in search
949 of the above pattern. */
950
7b98e98a
RS
951 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
952 &oprnd0, &oprnd1))
1f786170
RS
953 return NULL;
954
7b98e98a 955 type = gimple_expr_type (last_stmt);
1f786170 956
00347934
RS
957 vect_unpromoted_value unprom_mult;
958 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
20f06221 959
51312233 960 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
961 we know that oprnd1 is the reduction variable (defined by a loop-header
962 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
963 Left to check that oprnd0 is defined by a (widen_)mult_expr */
00347934 964 if (!oprnd0)
ba02d3bc 965 return NULL;
20f06221 966
25927307
RS
967 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
968 if (!mult_vinfo)
3cb35c12
CF
969 return NULL;
970
b8698a0f 971 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 972 inside the loop (in case we are analyzing an outer-loop). */
00347934
RS
973 vect_unpromoted_value unprom0[2];
974 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
975 false, 2, unprom0, &half_type))
b8698a0f 976 return NULL;
0f8c840c 977
00347934
RS
978 /* If there are two widening operations, make sure they agree on
979 the sign of the extension. */
980 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
981 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
982 return NULL;
20f06221 983
49d8df1b
RS
984 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
985
00347934 986 tree half_vectype;
1cbfeccc 987 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
00347934 988 type_out, &half_vectype))
1cbfeccc 989 return NULL;
b8698a0f 990
00347934 991 /* Get the inputs in the appropriate types. */
00347934
RS
992 tree mult_oprnd[2];
993 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
994 unprom0, half_vectype);
995
726a989a 996 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 997 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
00347934 998 mult_oprnd[0], mult_oprnd[1], oprnd1);
b8698a0f 999
726a989a 1000 return pattern_stmt;
20f06221 1001}
b8698a0f 1002
51312233 1003
79d652a5
CH
1004/* Function vect_recog_sad_pattern
1005
1006 Try to find the following Sum of Absolute Difference (SAD) pattern:
1007
1008 type x_t, y_t;
1009 signed TYPE1 diff, abs_diff;
1010 TYPE2 sum = init;
1011 loop:
1012 sum_0 = phi <init, sum_1>
1013 S1 x_t = ...
1014 S2 y_t = ...
1015 S3 x_T = (TYPE1) x_t;
1016 S4 y_T = (TYPE1) y_t;
1017 S5 diff = x_T - y_T;
1018 S6 abs_diff = ABS_EXPR <diff>;
1019 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1020 S8 sum_1 = abs_diff + sum_0;
1021
1022 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1023 same size of 'TYPE1' or bigger. This is a special case of a reduction
1024 computation.
1025
1026 Input:
1027
ba9728b0 1028 * STMT_VINFO: The stmt from which the pattern search begins. In the
79d652a5
CH
1029 example, when this function is called with S8, the pattern
1030 {S3,S4,S5,S6,S7,S8} will be detected.
1031
1032 Output:
1033
79d652a5
CH
1034 * TYPE_OUT: The type of the output of this pattern.
1035
1036 * Return value: A new stmt that will be used to replace the sequence of
1037 stmts that constitute the pattern. In this case it will be:
1038 SAD_EXPR <x_t, y_t, sum_0>
1039 */
1040
355fe088 1041static gimple *
ba9728b0 1042vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
79d652a5 1043{
ba9728b0 1044 gimple *last_stmt = stmt_vinfo->stmt;
25927307 1045 vec_info *vinfo = stmt_vinfo->vinfo;
79d652a5 1046 tree half_type;
79d652a5 1047
79d652a5
CH
1048 /* Look for the following pattern
1049 DX = (TYPE1) X;
1050 DY = (TYPE1) Y;
1051 DDIFF = DX - DY;
1052 DAD = ABS_EXPR <DDIFF>;
1053 DDPROD = (TYPE2) DPROD;
1054 sum_1 = DAD + sum_0;
1055 In which
1056 - DX is at least double the size of X
1057 - DY is at least double the size of Y
1058 - DX, DY, DDIFF, DAD all have the same type
1059 - sum is the same size of DAD or bigger
1060 - sum has been recognized as a reduction variable.
1061
1062 This is equivalent to:
1063 DDIFF = X w- Y; #widen sub
1064 DAD = ABS_EXPR <DDIFF>;
1065 sum_1 = DAD w+ sum_0; #widen summation
1066 or
1067 DDIFF = X w- Y; #widen sub
1068 DAD = ABS_EXPR <DDIFF>;
1069 sum_1 = DAD + sum_0; #summation
1070 */
1071
1072 /* Starting from LAST_STMT, follow the defs of its uses in search
1073 of the above pattern. */
1074
7b98e98a
RS
1075 tree plus_oprnd0, plus_oprnd1;
1076 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1077 &plus_oprnd0, &plus_oprnd1))
1f786170 1078 return NULL;
79d652a5 1079
7b98e98a 1080 tree sum_type = gimple_expr_type (last_stmt);
1f786170 1081
00347934
RS
1082 /* Any non-truncating sequence of conversions is OK here, since
1083 with a successful match, the result of the ABS(U) is known to fit
1084 within the nonnegative range of the result type. (It cannot be the
1085 negative of the minimum signed value due to the range of the widening
1086 MINUS_EXPR.) */
1087 vect_unpromoted_value unprom_abs;
1088 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1089 &unprom_abs);
79d652a5
CH
1090
1091 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1092 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1093 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1094 Then check that plus_oprnd0 is defined by an abs_expr. */
1095
00347934 1096 if (!plus_oprnd0)
79d652a5
CH
1097 return NULL;
1098
25927307
RS
1099 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1100 if (!abs_stmt_vinfo)
79d652a5
CH
1101 return NULL;
1102
1103 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1104 inside the loop (in case we are analyzing an outer-loop). */
25927307
RS
1105 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1106 if (!abs_stmt
1107 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1108 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
79d652a5
CH
1109 return NULL;
1110
1111 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
00347934 1112 tree abs_type = TREE_TYPE (abs_oprnd);
79d652a5
CH
1113 if (TYPE_UNSIGNED (abs_type))
1114 return NULL;
1115
00347934
RS
1116 /* Peel off conversions from the ABS input. This can involve sign
1117 changes (e.g. from an unsigned subtraction to a signed ABS input)
1118 or signed promotion, but it can't include unsigned promotion.
1119 (Note that ABS of an unsigned promotion should have been folded
1120 away before now anyway.) */
1121 vect_unpromoted_value unprom_diff;
1122 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1123 &unprom_diff);
1124 if (!abs_oprnd)
1125 return NULL;
1126 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1127 && TYPE_UNSIGNED (unprom_diff.type))
79d652a5
CH
1128 return NULL;
1129
00347934 1130 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
25927307
RS
1131 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1132 if (!diff_stmt_vinfo)
79d652a5
CH
1133 return NULL;
1134
1135 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1136 inside the loop (in case we are analyzing an outer-loop). */
00347934
RS
1137 vect_unpromoted_value unprom[2];
1138 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1139 false, 2, unprom, &half_type))
79d652a5
CH
1140 return NULL;
1141
49d8df1b
RS
1142 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1143
00347934
RS
1144 tree half_vectype;
1145 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1146 type_out, &half_vectype))
1cbfeccc 1147 return NULL;
79d652a5 1148
00347934 1149 /* Get the inputs to the SAD_EXPR in the appropriate types. */
00347934
RS
1150 tree sad_oprnd[2];
1151 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1152 unprom, half_vectype);
1153
79d652a5 1154 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
00347934
RS
1155 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1156 sad_oprnd[1], plus_oprnd1);
79d652a5 1157
79d652a5
CH
1158 return pattern_stmt;
1159}
1160
00347934
RS
1161/* Recognize an operation that performs ORIG_CODE on widened inputs,
1162 so that it can be treated as though it had the form:
79d652a5 1163
00347934
RS
1164 A_TYPE a;
1165 B_TYPE b;
1166 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1167 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1168 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1169 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1170 | RES_TYPE res = a_extend ORIG_CODE b_extend;
36ba4aae 1171
00347934 1172 Try to replace the pattern with:
51312233 1173
00347934
RS
1174 A_TYPE a;
1175 B_TYPE b;
1176 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1177 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1178 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1179 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
51312233 1180
00347934 1181 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
51312233 1182
00347934
RS
1183 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1184 name of the pattern being matched, for dump purposes. */
20f06221 1185
355fe088 1186static gimple *
ba9728b0 1187vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
00347934
RS
1188 tree_code orig_code, tree_code wide_code,
1189 bool shift_p, const char *name)
20f06221 1190{
ba9728b0 1191 gimple *last_stmt = last_stmt_info->stmt;
89d67cca 1192
00347934
RS
1193 vect_unpromoted_value unprom[2];
1194 tree half_type;
1195 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1196 shift_p, 2, unprom, &half_type))
89d67cca
DN
1197 return NULL;
1198
00347934
RS
1199 /* Pattern detected. */
1200 vect_pattern_detected (name, last_stmt);
89d67cca 1201
00347934 1202 tree type = gimple_expr_type (last_stmt);
d367387c 1203 tree itype = type;
00347934
RS
1204 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1205 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1206 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1207 TYPE_UNSIGNED (half_type));
89d67cca
DN
1208
1209 /* Check target support */
00347934
RS
1210 tree vectype = get_vectype_for_scalar_type (half_type);
1211 tree vecitype = get_vectype_for_scalar_type (itype);
1212 enum tree_code dummy_code;
1213 int dummy_int;
1214 auto_vec<tree> dummy_vec;
03d3e953 1215 if (!vectype
d367387c 1216 || !vecitype
86a91c0a 1217 || !supportable_widening_operation (wide_code, last_stmt_info,
d367387c 1218 vecitype, vectype,
a86ec597
RH
1219 &dummy_code, &dummy_code,
1220 &dummy_int, &dummy_vec))
89d67cca
DN
1221 return NULL;
1222
d367387c 1223 *type_out = get_vectype_for_scalar_type (type);
1cbfeccc
RS
1224 if (!*type_out)
1225 return NULL;
89d67cca 1226
00347934
RS
1227 tree oprnd[2];
1228 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
d367387c 1229
00347934
RS
1230 tree var = vect_recog_temp_ssa_var (itype, NULL);
1231 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1232 oprnd[0], oprnd[1]);
d367387c 1233
00347934 1234 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
20f06221
DN
1235}
1236
00347934
RS
1237/* Try to detect multiplication on widened inputs, converting MULT_EXPR
1238 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1239
1240static gimple *
ba9728b0 1241vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
00347934 1242{
ba9728b0 1243 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
00347934
RS
1244 WIDEN_MULT_EXPR, false,
1245 "vect_recog_widen_mult_pattern");
1246}
20f06221 1247
0b2229b0
RG
1248/* Function vect_recog_pow_pattern
1249
1250 Try to find the following pattern:
1251
1252 x = POW (y, N);
1253
1254 with POW being one of pow, powf, powi, powif and N being
1255 either 2 or 0.5.
1256
1257 Input:
1258
ba9728b0 1259 * STMT_VINFO: The stmt from which the pattern search begins.
0b2229b0
RG
1260
1261 Output:
1262
0b2229b0
RG
1263 * TYPE_OUT: The type of the output of this pattern.
1264
1265 * Return value: A new stmt that will be used to replace the sequence of
1266 stmts that constitute the pattern. In this case it will be:
726a989a 1267 x = x * x
0b2229b0 1268 or
726a989a 1269 x = sqrt (x)
0b2229b0
RG
1270*/
1271
355fe088 1272static gimple *
ba9728b0 1273vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
0b2229b0 1274{
ba9728b0 1275 gimple *last_stmt = stmt_vinfo->stmt;
848bb6fc 1276 tree base, exp;
355fe088 1277 gimple *stmt;
726a989a 1278 tree var;
0b2229b0 1279
51312233 1280 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1281 return NULL;
1282
7a31e5ef 1283 switch (gimple_call_combined_fn (last_stmt))
0b2229b0 1284 {
7a31e5ef
RS
1285 CASE_CFN_POW:
1286 CASE_CFN_POWI:
0b2229b0
RG
1287 break;
1288
726a989a
RB
1289 default:
1290 return NULL;
0b2229b0
RG
1291 }
1292
848bb6fc
JJ
1293 base = gimple_call_arg (last_stmt, 0);
1294 exp = gimple_call_arg (last_stmt, 1);
1295 if (TREE_CODE (exp) != REAL_CST
1296 && TREE_CODE (exp) != INTEGER_CST)
1297 {
1298 if (flag_unsafe_math_optimizations
1299 && TREE_CODE (base) == REAL_CST
cb1180d5 1300 && gimple_call_builtin_p (last_stmt, BUILT_IN_NORMAL))
848bb6fc
JJ
1301 {
1302 combined_fn log_cfn;
1303 built_in_function exp_bfn;
1304 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1305 {
1306 case BUILT_IN_POW:
1307 log_cfn = CFN_BUILT_IN_LOG;
1308 exp_bfn = BUILT_IN_EXP;
1309 break;
1310 case BUILT_IN_POWF:
1311 log_cfn = CFN_BUILT_IN_LOGF;
1312 exp_bfn = BUILT_IN_EXPF;
1313 break;
1314 case BUILT_IN_POWL:
1315 log_cfn = CFN_BUILT_IN_LOGL;
1316 exp_bfn = BUILT_IN_EXPL;
1317 break;
1318 default:
1319 return NULL;
1320 }
1321 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1322 tree exp_decl = builtin_decl_implicit (exp_bfn);
1323 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1324 does that, but if C is a power of 2, we want to use
1325 exp2 (log2 (C) * x) in the non-vectorized version, but for
1326 vectorization we don't have vectorized exp2. */
1327 if (logc
1328 && TREE_CODE (logc) == REAL_CST
1329 && exp_decl
1330 && lookup_attribute ("omp declare simd",
1331 DECL_ATTRIBUTES (exp_decl)))
1332 {
1333 cgraph_node *node = cgraph_node::get_create (exp_decl);
1334 if (node->simd_clones == NULL)
1335 {
73829f90
JJ
1336 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1337 || node->definition)
848bb6fc
JJ
1338 return NULL;
1339 expand_simd_clones (node);
1340 if (node->simd_clones == NULL)
1341 return NULL;
1342 }
1cbfeccc
RS
1343 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1344 if (!*type_out)
1345 return NULL;
848bb6fc
JJ
1346 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1347 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
9c58fb7a 1348 append_pattern_def_seq (stmt_vinfo, g);
848bb6fc
JJ
1349 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1350 g = gimple_build_call (exp_decl, 1, def);
1351 gimple_call_set_lhs (g, res);
1352 return g;
1353 }
1354 }
1355
1356 return NULL;
1357 }
1358
0b2229b0
RG
1359 /* We now have a pow or powi builtin function call with a constant
1360 exponent. */
1361
0b2229b0 1362 /* Catch squaring. */
9541ffee 1363 if ((tree_fits_shwi_p (exp)
9439e9a1 1364 && tree_to_shwi (exp) == 2)
0b2229b0 1365 || (TREE_CODE (exp) == REAL_CST
624d31fe 1366 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
c6b1b49b 1367 {
1cbfeccc
RS
1368 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1369 TREE_TYPE (base), type_out))
1370 return NULL;
726a989a
RB
1371
1372 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1373 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1374 return stmt;
c6b1b49b 1375 }
0b2229b0
RG
1376
1377 /* Catch square root. */
1378 if (TREE_CODE (exp) == REAL_CST
624d31fe 1379 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
0b2229b0 1380 {
1cbfeccc
RS
1381 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1382 if (*type_out
1383 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
d95ab70a 1384 OPTIMIZE_FOR_SPEED))
c6b1b49b 1385 {
b4e5bc47
RS
1386 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1387 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1388 gimple_call_set_lhs (stmt, var);
a844293d 1389 gimple_call_set_nothrow (stmt, true);
b4e5bc47 1390 return stmt;
c6b1b49b 1391 }
0b2229b0
RG
1392 }
1393
726a989a 1394 return NULL;
0b2229b0
RG
1395}
1396
1397
20f06221
DN
1398/* Function vect_recog_widen_sum_pattern
1399
1400 Try to find the following pattern:
1401
b8698a0f 1402 type x_t;
20f06221
DN
1403 TYPE x_T, sum = init;
1404 loop:
1405 sum_0 = phi <init, sum_1>
1406 S1 x_t = *p;
1407 S2 x_T = (TYPE) x_t;
1408 S3 sum_1 = x_T + sum_0;
1409
b8698a0f 1410 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1411 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1412 a special case of a reduction computation.
20f06221
DN
1413
1414 Input:
1415
ba9728b0 1416 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
20f06221 1417 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1418
20f06221 1419 Output:
b8698a0f 1420
20f06221
DN
1421 * TYPE_OUT: The type of the output of this pattern.
1422
1423 * Return value: A new stmt that will be used to replace the sequence of
1424 stmts that constitute the pattern. In this case it will be:
1425 WIDEN_SUM <x_t, sum_0>
d29de1bf 1426
b8698a0f 1427 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1428 vectorized without preserving all the intermediate results. It
b8698a0f
L
1429 produces only N/2 (widened) results (by summing up pairs of
1430 intermediate results) rather than all N results. Therefore, we
1431 cannot allow this pattern when we want to get all the results and in
1432 the correct order (as is the case when this computation is in an
d29de1bf 1433 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1434
355fe088 1435static gimple *
ba9728b0 1436vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
20f06221 1437{
ba9728b0 1438 gimple *last_stmt = stmt_vinfo->stmt;
20f06221 1439 tree oprnd0, oprnd1;
00347934
RS
1440 vec_info *vinfo = stmt_vinfo->vinfo;
1441 tree type;
355fe088 1442 gimple *pattern_stmt;
726a989a 1443 tree var;
20f06221 1444
20f06221
DN
1445 /* Look for the following pattern
1446 DX = (TYPE) X;
1447 sum_1 = DX + sum_0;
1448 In which DX is at least double the size of X, and sum_1 has been
1449 recognized as a reduction variable.
1450 */
1451
1452 /* Starting from LAST_STMT, follow the defs of its uses in search
1453 of the above pattern. */
1454
7b98e98a
RS
1455 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1456 &oprnd0, &oprnd1))
b308d872
RB
1457 return NULL;
1458
7b98e98a 1459 type = gimple_expr_type (last_stmt);
20f06221 1460
51312233 1461 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1462 we know that oprnd1 is the reduction variable (defined by a loop-header
1463 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1464 Left to check that oprnd0 is defined by a cast from type 'type' to type
1465 'TYPE'. */
1466
00347934
RS
1467 vect_unpromoted_value unprom0;
1468 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1469 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1cbfeccc 1470 return NULL;
20f06221 1471
49d8df1b
RS
1472 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1473
00347934 1474 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1cbfeccc
RS
1475 type_out))
1476 return NULL;
20f06221 1477
726a989a 1478 var = vect_recog_temp_ssa_var (type, NULL);
00347934 1479 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
726a989a 1480
726a989a 1481 return pattern_stmt;
20f06221
DN
1482}
1483
370c2ebe
RS
1484/* Recognize cases in which an operation is performed in one type WTYPE
1485 but could be done more efficiently in a narrower type NTYPE. For example,
1486 if we have:
20f06221 1487
370c2ebe
RS
1488 ATYPE a; // narrower than NTYPE
1489 BTYPE b; // narrower than NTYPE
1490 WTYPE aw = (WTYPE) a;
1491 WTYPE bw = (WTYPE) b;
1492 WTYPE res = aw + bw; // only uses of aw and bw
1107f3ae 1493
370c2ebe 1494 then it would be more efficient to do:
1107f3ae 1495
370c2ebe
RS
1496 NTYPE an = (NTYPE) a;
1497 NTYPE bn = (NTYPE) b;
1498 NTYPE resn = an + bn;
1499 WTYPE res = (WTYPE) resn;
1107f3ae 1500
370c2ebe 1501 Other situations include things like:
f5709183 1502
370c2ebe
RS
1503 ATYPE a; // NTYPE or narrower
1504 WTYPE aw = (WTYPE) a;
1505 WTYPE res = aw + b;
1107f3ae 1506
370c2ebe
RS
1507 when only "(NTYPE) res" is significant. In that case it's more efficient
1508 to truncate "b" and do the operation on NTYPE instead:
1107f3ae 1509
370c2ebe
RS
1510 NTYPE an = (NTYPE) a;
1511 NTYPE bn = (NTYPE) b; // truncation
1512 NTYPE resn = an + bn;
1513 WTYPE res = (WTYPE) resn;
1107f3ae 1514
370c2ebe
RS
1515 All users of "res" should then use "resn" instead, making the final
1516 statement dead (not marked as relevant). The final statement is still
1517 needed to maintain the type correctness of the IR.
1107f3ae 1518
370c2ebe
RS
1519 vect_determine_precisions has already determined the minimum
1520 precison of the operation and the minimum precision required
1521 by users of the result. */
1107f3ae 1522
370c2ebe 1523static gimple *
ba9728b0 1524vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
370c2ebe 1525{
ba9728b0 1526 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
370c2ebe
RS
1527 if (!last_stmt)
1528 return NULL;
9ef7adc0 1529
370c2ebe
RS
1530 /* See whether we have found that this operation can be done on a
1531 narrower type without changing its semantics. */
370c2ebe
RS
1532 unsigned int new_precision = last_stmt_info->operation_precision;
1533 if (!new_precision)
1534 return NULL;
1107f3ae 1535
370c2ebe
RS
1536 vec_info *vinfo = last_stmt_info->vinfo;
1537 tree lhs = gimple_assign_lhs (last_stmt);
1538 tree type = TREE_TYPE (lhs);
1539 tree_code code = gimple_assign_rhs_code (last_stmt);
1540
1541 /* Keep the first operand of a COND_EXPR as-is: only the other two
1542 operands are interesting. */
1543 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1544
1545 /* Check the operands. */
1546 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1547 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1548 unprom.quick_grow (nops);
1549 unsigned int min_precision = 0;
1550 bool single_use_p = false;
1551 for (unsigned int i = 0; i < nops; ++i)
1107f3ae 1552 {
370c2ebe
RS
1553 tree op = gimple_op (last_stmt, first_op + i);
1554 if (TREE_CODE (op) == INTEGER_CST)
1555 unprom[i].set_op (op, vect_constant_def);
1556 else if (TREE_CODE (op) == SSA_NAME)
1557 {
1558 bool op_single_use_p = true;
1559 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1560 &op_single_use_p))
1561 return NULL;
1562 /* If:
1107f3ae 1563
370c2ebe
RS
1564 (1) N bits of the result are needed;
1565 (2) all inputs are widened from M<N bits; and
1566 (3) one operand OP is a single-use SSA name
1107f3ae 1567
370c2ebe
RS
1568 we can shift the M->N widening from OP to the output
1569 without changing the number or type of extensions involved.
1570 This then reduces the number of copies of STMT_INFO.
1107f3ae 1571
370c2ebe
RS
1572 If instead of (3) more than one operand is a single-use SSA name,
1573 shifting the extension to the output is even more of a win.
1107f3ae 1574
370c2ebe 1575 If instead:
1107f3ae 1576
370c2ebe
RS
1577 (1) N bits of the result are needed;
1578 (2) one operand OP2 is widened from M2<N bits;
1579 (3) another operand OP1 is widened from M1<M2 bits; and
1580 (4) both OP1 and OP2 are single-use
1107f3ae 1581
370c2ebe 1582 the choice is between:
1107f3ae 1583
370c2ebe
RS
1584 (a) truncating OP2 to M1, doing the operation on M1,
1585 and then widening the result to N
1107f3ae 1586
370c2ebe
RS
1587 (b) widening OP1 to M2, doing the operation on M2, and then
1588 widening the result to N
1107f3ae 1589
370c2ebe
RS
1590 Both shift the M2->N widening of the inputs to the output.
1591 (a) additionally shifts the M1->M2 widening to the output;
1592 it requires fewer copies of STMT_INFO but requires an extra
1593 M2->M1 truncation.
1107f3ae 1594
370c2ebe
RS
1595 Which is better will depend on the complexity and cost of
1596 STMT_INFO, which is hard to predict at this stage. However,
1597 a clear tie-breaker in favor of (b) is the fact that the
1598 truncation in (a) increases the length of the operation chain.
1107f3ae 1599
370c2ebe
RS
1600 If instead of (4) only one of OP1 or OP2 is single-use,
1601 (b) is still a win over doing the operation in N bits:
1602 it still shifts the M2->N widening on the single-use operand
1603 to the output and reduces the number of STMT_INFO copies.
1107f3ae 1604
370c2ebe
RS
1605 If neither operand is single-use then operating on fewer than
1606 N bits might lead to more extensions overall. Whether it does
1607 or not depends on global information about the vectorization
1608 region, and whether that's a good trade-off would again
1609 depend on the complexity and cost of the statements involved,
1610 as well as things like register pressure that are not normally
1611 modelled at this stage. We therefore ignore these cases
1612 and just optimize the clear single-use wins above.
1107f3ae 1613
370c2ebe
RS
1614 Thus we take the maximum precision of the unpromoted operands
1615 and record whether any operand is single-use. */
1616 if (unprom[i].dt == vect_internal_def)
1617 {
1618 min_precision = MAX (min_precision,
1619 TYPE_PRECISION (unprom[i].type));
1620 single_use_p |= op_single_use_p;
1621 }
1622 }
1107f3ae
IR
1623 }
1624
370c2ebe
RS
1625 /* Although the operation could be done in operation_precision, we have
1626 to balance that against introducing extra truncations or extensions.
1627 Calculate the minimum precision that can be handled efficiently.
1628
1629 The loop above determined that the operation could be handled
1630 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1631 extension from the inputs to the output without introducing more
1632 instructions, and would reduce the number of instructions required
1633 for STMT_INFO itself.
1634
1635 vect_determine_precisions has also determined that the result only
1636 needs min_output_precision bits. Truncating by a factor of N times
1637 requires a tree of N - 1 instructions, so if TYPE is N times wider
1638 than min_output_precision, doing the operation in TYPE and truncating
1639 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1640 In contrast:
1641
1642 - truncating the input to a unary operation and doing the operation
1643 in the new type requires at most N - 1 + 1 = N instructions per
1644 output vector
1645
1646 - doing the same for a binary operation requires at most
1647 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1648
1649 Both unary and binary operations require fewer instructions than
1650 this if the operands were extended from a suitable truncated form.
1651 Thus there is usually nothing to lose by doing operations in
1652 min_output_precision bits, but there can be something to gain. */
1653 if (!single_use_p)
1654 min_precision = last_stmt_info->min_output_precision;
1107f3ae 1655 else
370c2ebe
RS
1656 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1657
1658 /* Apply the minimum efficient precision we just calculated. */
1659 if (new_precision < min_precision)
1660 new_precision = min_precision;
1661 if (new_precision >= TYPE_PRECISION (type))
1662 return NULL;
1663
1664 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1665
1666 *type_out = get_vectype_for_scalar_type (type);
1667 if (!*type_out)
1668 return NULL;
1669
1670 /* We've found a viable pattern. Get the new type of the operation. */
1671 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1672 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1673
a1ff7f14
RS
1674 /* If we're truncating an operation, we need to make sure that we
1675 don't introduce new undefined overflow. The codes tested here are
1676 a subset of those accepted by vect_truncatable_operation_p. */
1677 tree op_type = new_type;
1678 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1679 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1680 op_type = build_nonstandard_integer_type (new_precision, true);
1681
370c2ebe
RS
1682 /* We specifically don't check here whether the target supports the
1683 new operation, since it might be something that a later pattern
1684 wants to rewrite anyway. If targets have a minimum element size
1685 for some optabs, we should pattern-match smaller ops to larger ops
1686 where beneficial. */
1687 tree new_vectype = get_vectype_for_scalar_type (new_type);
a1ff7f14
RS
1688 tree op_vectype = get_vectype_for_scalar_type (op_type);
1689 if (!new_vectype || !op_vectype)
370c2ebe 1690 return NULL;
1107f3ae 1691
370c2ebe 1692 if (dump_enabled_p ())
3c2a8ed0
DM
1693 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1694 type, new_type);
1107f3ae 1695
a1ff7f14 1696 /* Calculate the rhs operands for an operation on OP_TYPE. */
370c2ebe
RS
1697 tree ops[3] = {};
1698 for (unsigned int i = 1; i < first_op; ++i)
1699 ops[i - 1] = gimple_op (last_stmt, i);
1700 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
a1ff7f14 1701 op_type, &unprom[0], op_vectype);
370c2ebe 1702
a1ff7f14
RS
1703 /* Use the operation to produce a result of type OP_TYPE. */
1704 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
370c2ebe
RS
1705 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1706 ops[0], ops[1], ops[2]);
1707 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1708
1709 if (dump_enabled_p ())
3c2a8ed0
DM
1710 dump_printf_loc (MSG_NOTE, vect_location,
1711 "created pattern stmt: %G", pattern_stmt);
1107f3ae 1712
a1ff7f14
RS
1713 /* Convert back to the original signedness, if OP_TYPE is different
1714 from NEW_TYPE. */
1715 if (op_type != new_type)
1716 pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1717 pattern_stmt, op_vectype);
1718
1719 /* Promote the result to the original type. */
370c2ebe
RS
1720 pattern_stmt = vect_convert_output (last_stmt_info, type,
1721 pattern_stmt, new_vectype);
1107f3ae 1722
370c2ebe 1723 return pattern_stmt;
1107f3ae
IR
1724}
1725
0267732b
RS
1726/* Recognize the patterns:
1727
1728 ATYPE a; // narrower than TYPE
1729 BTYPE b; // narrower than TYPE
1730 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1731 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1732
1733 where only the bottom half of avg is used. Try to transform them into:
1734
1735 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1736 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1737
1738 followed by:
1739
1740 TYPE avg = (TYPE) avg';
1741
1742 where NTYPE is no wider than half of TYPE. Since only the bottom half
1743 of avg is used, all or part of the cast of avg' should become redundant. */
1744
1745static gimple *
ba9728b0 1746vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
0267732b
RS
1747{
1748 /* Check for a shift right by one bit. */
ba9728b0
RS
1749 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1750 vec_info *vinfo = last_stmt_info->vinfo;
0267732b
RS
1751 if (!last_stmt
1752 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1753 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1754 return NULL;
1755
0267732b
RS
1756 /* Check that the shift result is wider than the users of the
1757 result need (i.e. that narrowing would be a natural choice). */
1758 tree lhs = gimple_assign_lhs (last_stmt);
1759 tree type = TREE_TYPE (lhs);
1760 unsigned int target_precision
1761 = vect_element_precision (last_stmt_info->min_output_precision);
1762 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1763 return NULL;
1764
a1ff7f14 1765 /* Look through any change in sign on the shift input. */
0267732b 1766 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
a1ff7f14
RS
1767 vect_unpromoted_value unprom_plus;
1768 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1769 &unprom_plus);
1770 if (!rshift_rhs
1771 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1772 return NULL;
1773
1774 /* Get the definition of the shift input. */
0267732b
RS
1775 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1776 if (!plus_stmt_info)
1777 return NULL;
1778
1779 /* Check whether the shift input can be seen as a tree of additions on
1780 2 or 3 widened inputs.
1781
1782 Note that the pattern should be a win even if the result of one or
1783 more additions is reused elsewhere: if the pattern matches, we'd be
1784 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1785 internal_fn ifn = IFN_AVG_FLOOR;
1786 vect_unpromoted_value unprom[3];
1787 tree new_type;
1788 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1789 PLUS_EXPR, false, 3,
1790 unprom, &new_type);
1791 if (nops == 0)
1792 return NULL;
1793 if (nops == 3)
1794 {
1795 /* Check that one operand is 1. */
1796 unsigned int i;
1797 for (i = 0; i < 3; ++i)
1798 if (integer_onep (unprom[i].op))
1799 break;
1800 if (i == 3)
1801 return NULL;
1802 /* Throw away the 1 operand and keep the other two. */
1803 if (i < 2)
1804 unprom[i] = unprom[2];
1805 ifn = IFN_AVG_CEIL;
1806 }
1807
1808 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1809
1810 /* We know that:
1811
1812 (a) the operation can be viewed as:
1813
1814 TYPE widened0 = (TYPE) UNPROM[0];
1815 TYPE widened1 = (TYPE) UNPROM[1];
1816 TYPE tmp1 = widened0 + widened1 {+ 1};
1817 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1818
1819 (b) the first two statements are equivalent to:
1820
1821 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1822 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1823
1824 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1825 where sensible;
1826
1827 (d) all the operations can be performed correctly at twice the width of
1828 NEW_TYPE, due to the nature of the average operation; and
1829
1830 (e) users of the result of the right shift need only TARGET_PRECISION
1831 bits, where TARGET_PRECISION is no more than half of TYPE's
1832 precision.
1833
1834 Under these circumstances, the only situation in which NEW_TYPE
1835 could be narrower than TARGET_PRECISION is if widened0, widened1
1836 and an addition result are all used more than once. Thus we can
1837 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1838 as "free", whereas widening the result of the average instruction
1839 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1840 therefore better not to go narrower than TARGET_PRECISION. */
1841 if (TYPE_PRECISION (new_type) < target_precision)
1842 new_type = build_nonstandard_integer_type (target_precision,
1843 TYPE_UNSIGNED (new_type));
1844
1845 /* Check for target support. */
1846 tree new_vectype = get_vectype_for_scalar_type (new_type);
1847 if (!new_vectype
1848 || !direct_internal_fn_supported_p (ifn, new_vectype,
1849 OPTIMIZE_FOR_SPEED))
1850 return NULL;
1851
1852 /* The IR requires a valid vector type for the cast result, even though
1853 it's likely to be discarded. */
1854 *type_out = get_vectype_for_scalar_type (type);
1855 if (!*type_out)
1856 return NULL;
1857
1858 /* Generate the IFN_AVG* call. */
1859 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1860 tree new_ops[2];
1861 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1862 unprom, new_vectype);
1863 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1864 new_ops[1]);
1865 gimple_call_set_lhs (average_stmt, new_var);
1866 gimple_set_location (average_stmt, gimple_location (last_stmt));
1867
1868 if (dump_enabled_p ())
3c2a8ed0
DM
1869 dump_printf_loc (MSG_NOTE, vect_location,
1870 "created pattern stmt: %G", average_stmt);
0267732b 1871
0267732b
RS
1872 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1873}
1874
370c2ebe
RS
1875/* Recognize cases in which the input to a cast is wider than its
1876 output, and the input is fed by a widening operation. Fold this
1877 by removing the unnecessary intermediate widening. E.g.:
1107f3ae 1878
370c2ebe
RS
1879 unsigned char a;
1880 unsigned int b = (unsigned int) a;
1881 unsigned short c = (unsigned short) b;
1107f3ae 1882
370c2ebe 1883 -->
1107f3ae 1884
370c2ebe 1885 unsigned short c = (unsigned short) a;
1107f3ae 1886
370c2ebe
RS
1887 Although this is rare in input IR, it is an expected side-effect
1888 of the over-widening pattern above.
1107f3ae 1889
370c2ebe
RS
1890 This is beneficial also for integer-to-float conversions, if the
1891 widened integer has more bits than the float, and if the unwidened
1892 input doesn't. */
1107f3ae 1893
370c2ebe 1894static gimple *
ba9728b0 1895vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
370c2ebe
RS
1896{
1897 /* Check for a cast, including an integer-to-float conversion. */
ba9728b0 1898 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
370c2ebe
RS
1899 if (!last_stmt)
1900 return NULL;
1901 tree_code code = gimple_assign_rhs_code (last_stmt);
1902 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1903 return NULL;
1107f3ae 1904
370c2ebe
RS
1905 /* Make sure that the rhs is a scalar with a natural bitsize. */
1906 tree lhs = gimple_assign_lhs (last_stmt);
1907 if (!lhs)
1908 return NULL;
1909 tree lhs_type = TREE_TYPE (lhs);
1910 scalar_mode lhs_mode;
1911 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1912 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1913 return NULL;
1107f3ae 1914
370c2ebe
RS
1915 /* Check for a narrowing operation (from a vector point of view). */
1916 tree rhs = gimple_assign_rhs1 (last_stmt);
1917 tree rhs_type = TREE_TYPE (rhs);
1918 if (!INTEGRAL_TYPE_P (rhs_type)
1919 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1920 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1921 return NULL;
82db3d43 1922
370c2ebe 1923 /* Try to find an unpromoted input. */
370c2ebe
RS
1924 vec_info *vinfo = last_stmt_info->vinfo;
1925 vect_unpromoted_value unprom;
1926 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1927 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1928 return NULL;
1cbfeccc 1929
370c2ebe
RS
1930 /* If the bits above RHS_TYPE matter, make sure that they're the
1931 same when extending from UNPROM as they are when extending from RHS. */
1932 if (!INTEGRAL_TYPE_P (lhs_type)
1933 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1934 return NULL;
1107f3ae 1935
370c2ebe
RS
1936 /* We can get the same result by casting UNPROM directly, to avoid
1937 the unnecessary widening and narrowing. */
1938 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
1107f3ae 1939
370c2ebe
RS
1940 *type_out = get_vectype_for_scalar_type (lhs_type);
1941 if (!*type_out)
1107f3ae
IR
1942 return NULL;
1943
370c2ebe
RS
1944 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1945 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1946 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1107f3ae
IR
1947
1948 return pattern_stmt;
1949}
1950
00347934
RS
1951/* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1952 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
36ba4aae 1953
355fe088 1954static gimple *
ba9728b0 1955vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
36ba4aae 1956{
ba9728b0 1957 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
00347934 1958 WIDEN_LSHIFT_EXPR, true,
d379ac22 1959 "vect_recog_widen_shift_pattern");
7e9a3abb
JJ
1960}
1961
1962/* Detect a rotate pattern wouldn't be otherwise vectorized:
1963
1964 type a_t, b_t, c_t;
1965
1966 S0 a_t = b_t r<< c_t;
1967
1968 Input/Output:
1969
ba9728b0 1970 * STMT_VINFO: The stmt from which the pattern search begins,
7e9a3abb
JJ
1971 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1972 with a sequence:
1973
1974 S1 d_t = -c_t;
1975 S2 e_t = d_t & (B - 1);
1976 S3 f_t = b_t << c_t;
1977 S4 g_t = b_t >> e_t;
1978 S0 a_t = f_t | g_t;
1979
1980 where B is element bitsize of type.
1981
1982 Output:
1983
7e9a3abb
JJ
1984 * TYPE_OUT: The type of the output of this pattern.
1985
1986 * Return value: A new stmt that will be used to replace the rotate
1987 S0 stmt. */
1988
355fe088 1989static gimple *
ba9728b0 1990vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
7e9a3abb 1991{
ba9728b0 1992 gimple *last_stmt = stmt_vinfo->stmt;
7e9a3abb 1993 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
355fe088 1994 gimple *pattern_stmt, *def_stmt;
7e9a3abb 1995 enum tree_code rhs_code;
310213d4 1996 vec_info *vinfo = stmt_vinfo->vinfo;
7e9a3abb
JJ
1997 enum vect_def_type dt;
1998 optab optab1, optab2;
68119618 1999 edge ext_def = NULL;
7e9a3abb
JJ
2000
2001 if (!is_gimple_assign (last_stmt))
2002 return NULL;
2003
2004 rhs_code = gimple_assign_rhs_code (last_stmt);
2005 switch (rhs_code)
2006 {
2007 case LROTATE_EXPR:
2008 case RROTATE_EXPR:
2009 break;
2010 default:
2011 return NULL;
2012 }
2013
7e9a3abb
JJ
2014 lhs = gimple_assign_lhs (last_stmt);
2015 oprnd0 = gimple_assign_rhs1 (last_stmt);
2016 type = TREE_TYPE (oprnd0);
2017 oprnd1 = gimple_assign_rhs2 (last_stmt);
2018 if (TREE_CODE (oprnd0) != SSA_NAME
2019 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2020 || !INTEGRAL_TYPE_P (type)
2021 || !TYPE_UNSIGNED (type))
2022 return NULL;
2023
fef96d8e
RS
2024 stmt_vec_info def_stmt_info;
2025 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
7e9a3abb
JJ
2026 return NULL;
2027
2028 if (dt != vect_internal_def
2029 && dt != vect_constant_def
2030 && dt != vect_external_def)
2031 return NULL;
2032
2033 vectype = get_vectype_for_scalar_type (type);
2034 if (vectype == NULL_TREE)
2035 return NULL;
2036
2037 /* If vector/vector or vector/scalar rotate is supported by the target,
2038 don't do anything here. */
2039 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2040 if (optab1
2041 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2042 return NULL;
2043
310213d4 2044 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
7e9a3abb
JJ
2045 {
2046 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2047 if (optab2
2048 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2049 return NULL;
2050 }
2051
2052 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2053 don't do anything here either. */
2054 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2055 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2056 if (!optab1
2057 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2058 || !optab2
2059 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2060 {
310213d4 2061 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
7e9a3abb
JJ
2062 return NULL;
2063 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2064 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2065 if (!optab1
2066 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2067 || !optab2
2068 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2069 return NULL;
2070 }
2071
7e9a3abb 2072 *type_out = vectype;
7e9a3abb 2073
68119618 2074 if (dt == vect_external_def
3330053e
RS
2075 && TREE_CODE (oprnd1) == SSA_NAME)
2076 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
68119618 2077
7e9a3abb 2078 def = NULL_TREE;
7a504f33 2079 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
7e9a3abb 2080 if (TREE_CODE (oprnd1) == INTEGER_CST
7a504f33 2081 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
7e9a3abb
JJ
2082 def = oprnd1;
2083 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2084 {
2085 tree rhs1 = gimple_assign_rhs1 (def_stmt);
7a504f33 2086 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
7e9a3abb
JJ
2087 && TYPE_PRECISION (TREE_TYPE (rhs1))
2088 == TYPE_PRECISION (type))
2089 def = rhs1;
2090 }
2091
7e9a3abb
JJ
2092 if (def == NULL_TREE)
2093 {
2094 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2095 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
68119618
JJ
2096 if (ext_def)
2097 {
2098 basic_block new_bb
2099 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2100 gcc_assert (!new_bb);
2101 }
2102 else
2103 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
2104 }
2105 stype = TREE_TYPE (def);
7a504f33 2106 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
7e9a3abb
JJ
2107
2108 if (TREE_CODE (def) == INTEGER_CST)
2109 {
cc269bb6 2110 if (!tree_fits_uhwi_p (def)
7a504f33 2111 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
7e9a3abb
JJ
2112 || integer_zerop (def))
2113 return NULL;
2114 def2 = build_int_cst (stype,
7a504f33 2115 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
7e9a3abb
JJ
2116 }
2117 else
2118 {
2119 tree vecstype = get_vectype_for_scalar_type (stype);
7e9a3abb
JJ
2120
2121 if (vecstype == NULL_TREE)
2122 return NULL;
2123 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 2124 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
2125 if (ext_def)
2126 {
2127 basic_block new_bb
2128 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2129 gcc_assert (!new_bb);
2130 }
2131 else
776bfcea 2132 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
7e9a3abb
JJ
2133
2134 def2 = vect_recog_temp_ssa_var (stype, NULL);
7a504f33 2135 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
0d0e4a03
JJ
2136 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2137 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
2138 if (ext_def)
2139 {
2140 basic_block new_bb
2141 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2142 gcc_assert (!new_bb);
2143 }
2144 else
776bfcea 2145 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
7e9a3abb
JJ
2146 }
2147
2148 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2149 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2150 ? LSHIFT_EXPR : RSHIFT_EXPR,
2151 oprnd0, def);
7e9a3abb
JJ
2152 append_pattern_def_seq (stmt_vinfo, def_stmt);
2153
2154 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2155 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2156 ? RSHIFT_EXPR : LSHIFT_EXPR,
2157 oprnd0, def2);
7e9a3abb
JJ
2158 append_pattern_def_seq (stmt_vinfo, def_stmt);
2159
2160 /* Pattern detected. */
49d8df1b 2161 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
7e9a3abb
JJ
2162
2163 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2164 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2165 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb 2166
36ba4aae
IR
2167 return pattern_stmt;
2168}
1107f3ae 2169
732a0ad3
JJ
2170/* Detect a vector by vector shift pattern that wouldn't be otherwise
2171 vectorized:
2172
2173 type a_t;
2174 TYPE b_T, res_T;
2175
2176 S1 a_t = ;
2177 S2 b_T = ;
2178 S3 res_T = b_T op a_t;
2179
2180 where type 'TYPE' is a type with different size than 'type',
2181 and op is <<, >> or rotate.
2182
2183 Also detect cases:
2184
2185 type a_t;
2186 TYPE b_T, c_T, res_T;
2187
2188 S0 c_T = ;
2189 S1 a_t = (type) c_T;
2190 S2 b_T = ;
2191 S3 res_T = b_T op a_t;
2192
2193 Input/Output:
2194
ba9728b0 2195 * STMT_VINFO: The stmt from which the pattern search begins,
732a0ad3
JJ
2196 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2197 with a shift/rotate which has same type on both operands, in the
2198 second case just b_T op c_T, in the first case with added cast
363477c0 2199 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2200
2201 Output:
2202
732a0ad3
JJ
2203 * TYPE_OUT: The type of the output of this pattern.
2204
2205 * Return value: A new stmt that will be used to replace the shift/rotate
2206 S3 stmt. */
2207
355fe088 2208static gimple *
ba9728b0
RS
2209vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2210 tree *type_out)
732a0ad3 2211{
ba9728b0 2212 gimple *last_stmt = stmt_vinfo->stmt;
732a0ad3 2213 tree oprnd0, oprnd1, lhs, var;
25927307 2214 gimple *pattern_stmt;
732a0ad3 2215 enum tree_code rhs_code;
310213d4 2216 vec_info *vinfo = stmt_vinfo->vinfo;
732a0ad3
JJ
2217
2218 if (!is_gimple_assign (last_stmt))
2219 return NULL;
2220
2221 rhs_code = gimple_assign_rhs_code (last_stmt);
2222 switch (rhs_code)
2223 {
2224 case LSHIFT_EXPR:
2225 case RSHIFT_EXPR:
2226 case LROTATE_EXPR:
2227 case RROTATE_EXPR:
2228 break;
2229 default:
2230 return NULL;
2231 }
2232
732a0ad3
JJ
2233 lhs = gimple_assign_lhs (last_stmt);
2234 oprnd0 = gimple_assign_rhs1 (last_stmt);
2235 oprnd1 = gimple_assign_rhs2 (last_stmt);
2236 if (TREE_CODE (oprnd0) != SSA_NAME
2237 || TREE_CODE (oprnd1) != SSA_NAME
2238 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2be65d9e 2239 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
732a0ad3
JJ
2240 || TYPE_PRECISION (TREE_TYPE (lhs))
2241 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2242 return NULL;
2243
25927307
RS
2244 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2245 if (!def_vinfo)
732a0ad3
JJ
2246 return NULL;
2247
1cbfeccc
RS
2248 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2249 if (*type_out == NULL_TREE)
732a0ad3
JJ
2250 return NULL;
2251
81c40241 2252 tree def = NULL_TREE;
25927307 2253 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
0f8c840c 2254 if (def_stmt && gimple_assign_cast_p (def_stmt))
732a0ad3
JJ
2255 {
2256 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2257 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2258 && TYPE_PRECISION (TREE_TYPE (rhs1))
2259 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
0179520a
JJ
2260 {
2261 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2262 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2263 def = rhs1;
2264 else
2265 {
2266 tree mask
2267 = build_low_bits_mask (TREE_TYPE (rhs1),
2268 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2269 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2270 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
776bfcea
RS
2271 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2272 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
0179520a
JJ
2273 }
2274 }
732a0ad3
JJ
2275 }
2276
2277 if (def == NULL_TREE)
2278 {
2279 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2280 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
9c58fb7a 2281 append_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2282 }
2283
2284 /* Pattern detected. */
49d8df1b 2285 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
732a0ad3
JJ
2286
2287 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2288 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2289 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2290
732a0ad3
JJ
2291 return pattern_stmt;
2292}
2293
53109ba8
KT
2294/* Return true iff the target has a vector optab implementing the operation
2295 CODE on type VECTYPE. */
47486460 2296
53109ba8
KT
2297static bool
2298target_has_vecop_for_code (tree_code code, tree vectype)
2299{
2300 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2301 return voptab
2302 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2303}
47486460 2304
53109ba8
KT
2305/* Verify that the target has optabs of VECTYPE to perform all the steps
2306 needed by the multiplication-by-immediate synthesis algorithm described by
2307 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2308 present. Return true iff the target supports all the steps. */
2309
2310static bool
2311target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2312 tree vectype, bool synth_shift_p)
2313{
2314 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2315 return false;
2316
2317 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2318 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2319
2320 if (var == negate_variant
2321 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2322 return false;
2323
2324 /* If we must synthesize shifts with additions make sure that vector
2325 addition is available. */
2326 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2327 return false;
2328
2329 for (int i = 1; i < alg->ops; i++)
2330 {
2331 switch (alg->op[i])
2332 {
2333 case alg_shift:
2334 break;
2335 case alg_add_t_m2:
2336 case alg_add_t2_m:
2337 case alg_add_factor:
2338 if (!supports_vplus)
2339 return false;
2340 break;
2341 case alg_sub_t_m2:
2342 case alg_sub_t2_m:
2343 case alg_sub_factor:
2344 if (!supports_vminus)
2345 return false;
2346 break;
2347 case alg_unknown:
2348 case alg_m:
2349 case alg_zero:
2350 case alg_impossible:
2351 return false;
2352 default:
2353 gcc_unreachable ();
2354 }
2355 }
2356
2357 return true;
2358}
2359
2360/* Synthesize a left shift of OP by AMNT bits using a series of additions and
2361 putting the final result in DEST. Append all statements but the last into
2362 VINFO. Return the last statement. */
2363
2364static gimple *
2365synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2366 stmt_vec_info vinfo)
2367{
2368 HOST_WIDE_INT i;
2369 tree itype = TREE_TYPE (op);
2370 tree prev_res = op;
2371 gcc_assert (amnt >= 0);
2372 for (i = 0; i < amnt; i++)
2373 {
2374 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2375 : dest;
2376 gimple *stmt
2377 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2378 prev_res = tmp_var;
2379 if (i < amnt - 1)
2380 append_pattern_def_seq (vinfo, stmt);
2381 else
2382 return stmt;
2383 }
2384 gcc_unreachable ();
2385 return NULL;
2386}
2387
2388/* Helper for vect_synth_mult_by_constant. Apply a binary operation
2389 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2390 the process if necessary. Append the resulting assignment statements
2391 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2392 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2393 left shifts using additions. */
2394
2395static tree
2396apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2397 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2398{
2399 if (integer_zerop (op2)
2400 && (code == LSHIFT_EXPR
2401 || code == PLUS_EXPR))
2402 {
2403 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2404 return op1;
2405 }
2406
2407 gimple *stmt;
2408 tree itype = TREE_TYPE (op1);
2409 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2410
2411 if (code == LSHIFT_EXPR
2412 && synth_shift_p)
2413 {
2414 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2415 stmt_vinfo);
2416 append_pattern_def_seq (stmt_vinfo, stmt);
2417 return tmp_var;
2418 }
2419
2420 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2421 append_pattern_def_seq (stmt_vinfo, stmt);
2422 return tmp_var;
2423}
2424
2425/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2426 and simple arithmetic operations to be vectorized. Record the statements
2427 produced in STMT_VINFO and return the last statement in the sequence or
2428 NULL if it's not possible to synthesize such a multiplication.
2429 This function mirrors the behavior of expand_mult_const in expmed.c but
2430 works on tree-ssa form. */
2431
2432static gimple *
2433vect_synth_mult_by_constant (tree op, tree val,
2434 stmt_vec_info stmt_vinfo)
2435{
2436 tree itype = TREE_TYPE (op);
2437 machine_mode mode = TYPE_MODE (itype);
2438 struct algorithm alg;
2439 mult_variant variant;
2440 if (!tree_fits_shwi_p (val))
2441 return NULL;
2442
2443 /* Multiplication synthesis by shifts, adds and subs can introduce
2444 signed overflow where the original operation didn't. Perform the
2445 operations on an unsigned type and cast back to avoid this.
2446 In the future we may want to relax this for synthesis algorithms
2447 that we can prove do not cause unexpected overflow. */
2448 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2449
2450 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
47486460 2451
53109ba8
KT
2452 /* Targets that don't support vector shifts but support vector additions
2453 can synthesize shifts that way. */
2454 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2455
2456 HOST_WIDE_INT hwval = tree_to_shwi (val);
2457 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2458 The vectorizer's benefit analysis will decide whether it's beneficial
2459 to do this. */
2460 bool possible = choose_mult_variant (mode, hwval, &alg,
2461 &variant, MAX_COST);
2462 if (!possible)
2463 return NULL;
2464
2465 tree vectype = get_vectype_for_scalar_type (multtype);
2466
2467 if (!vectype
2468 || !target_supports_mult_synth_alg (&alg, variant,
2469 vectype, synth_shift_p))
2470 return NULL;
2471
2472 tree accumulator;
2473
2474 /* Clear out the sequence of statements so we can populate it below. */
53109ba8
KT
2475 gimple *stmt = NULL;
2476
2477 if (cast_to_unsigned_p)
2478 {
2479 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2480 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2481 append_pattern_def_seq (stmt_vinfo, stmt);
2482 op = tmp_op;
2483 }
2484
2485 if (alg.op[0] == alg_zero)
2486 accumulator = build_int_cst (multtype, 0);
2487 else
2488 accumulator = op;
2489
2490 bool needs_fixup = (variant == negate_variant)
2491 || (variant == add_variant);
2492
2493 for (int i = 1; i < alg.ops; i++)
2494 {
2495 tree shft_log = build_int_cst (multtype, alg.log[i]);
2496 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2497 tree tmp_var = NULL_TREE;
2498
2499 switch (alg.op[i])
2500 {
2501 case alg_shift:
2502 if (synth_shift_p)
2503 stmt
2504 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2505 stmt_vinfo);
2506 else
2507 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2508 shft_log);
2509 break;
2510 case alg_add_t_m2:
2511 tmp_var
2512 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2513 stmt_vinfo, synth_shift_p);
2514 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2515 tmp_var);
2516 break;
2517 case alg_sub_t_m2:
2518 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2519 shft_log, stmt_vinfo,
2520 synth_shift_p);
2521 /* In some algorithms the first step involves zeroing the
2522 accumulator. If subtracting from such an accumulator
2523 just emit the negation directly. */
2524 if (integer_zerop (accumulator))
2525 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2526 else
2527 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2528 tmp_var);
2529 break;
2530 case alg_add_t2_m:
2531 tmp_var
2532 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2533 stmt_vinfo, synth_shift_p);
2534 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2535 break;
2536 case alg_sub_t2_m:
2537 tmp_var
2538 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2539 stmt_vinfo, synth_shift_p);
2540 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2541 break;
2542 case alg_add_factor:
2543 tmp_var
2544 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2545 stmt_vinfo, synth_shift_p);
2546 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2547 tmp_var);
2548 break;
2549 case alg_sub_factor:
2550 tmp_var
2551 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2552 stmt_vinfo, synth_shift_p);
2553 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2554 accumulator);
2555 break;
2556 default:
2557 gcc_unreachable ();
2558 }
2559 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2560 but rather return it directly. */
2561
2562 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2563 append_pattern_def_seq (stmt_vinfo, stmt);
2564 accumulator = accum_tmp;
2565 }
2566 if (variant == negate_variant)
2567 {
2568 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2569 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2570 accumulator = accum_tmp;
2571 if (cast_to_unsigned_p)
2572 append_pattern_def_seq (stmt_vinfo, stmt);
2573 }
2574 else if (variant == add_variant)
2575 {
2576 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2577 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2578 accumulator = accum_tmp;
2579 if (cast_to_unsigned_p)
2580 append_pattern_def_seq (stmt_vinfo, stmt);
2581 }
2582 /* Move back to a signed if needed. */
2583 if (cast_to_unsigned_p)
2584 {
2585 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2586 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2587 }
2588
2589 return stmt;
2590}
2591
2592/* Detect multiplication by constant and convert it into a sequence of
2593 shifts and additions, subtractions, negations. We reuse the
2594 choose_mult_variant algorithms from expmed.c
47486460
VK
2595
2596 Input/Output:
2597
ba9728b0 2598 STMT_VINFO: The stmt from which the pattern search begins,
53109ba8 2599 i.e. the mult stmt.
47486460
VK
2600
2601 Output:
2602
47486460
VK
2603 * TYPE_OUT: The type of the output of this pattern.
2604
53109ba8
KT
2605 * Return value: A new stmt that will be used to replace
2606 the multiplication. */
47486460 2607
355fe088 2608static gimple *
ba9728b0 2609vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
47486460 2610{
ba9728b0 2611 gimple *last_stmt = stmt_vinfo->stmt;
47486460 2612 tree oprnd0, oprnd1, vectype, itype;
53109ba8 2613 gimple *pattern_stmt;
47486460
VK
2614
2615 if (!is_gimple_assign (last_stmt))
2616 return NULL;
2617
2618 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2619 return NULL;
2620
2621 oprnd0 = gimple_assign_rhs1 (last_stmt);
2622 oprnd1 = gimple_assign_rhs2 (last_stmt);
2623 itype = TREE_TYPE (oprnd0);
2624
2625 if (TREE_CODE (oprnd0) != SSA_NAME
2626 || TREE_CODE (oprnd1) != INTEGER_CST
2627 || !INTEGRAL_TYPE_P (itype)
2be65d9e 2628 || !type_has_mode_precision_p (itype))
47486460
VK
2629 return NULL;
2630
2631 vectype = get_vectype_for_scalar_type (itype);
2632 if (vectype == NULL_TREE)
2633 return NULL;
2634
2635 /* If the target can handle vectorized multiplication natively,
2636 don't attempt to optimize this. */
53109ba8
KT
2637 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2638 if (mul_optab != unknown_optab)
47486460
VK
2639 {
2640 machine_mode vec_mode = TYPE_MODE (vectype);
53109ba8 2641 int icode = (int) optab_handler (mul_optab, vec_mode);
47486460 2642 if (icode != CODE_FOR_nothing)
53109ba8 2643 return NULL;
47486460
VK
2644 }
2645
53109ba8
KT
2646 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2647 if (!pattern_stmt)
47486460
VK
2648 return NULL;
2649
2650 /* Pattern detected. */
49d8df1b 2651 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
47486460 2652
47486460
VK
2653 *type_out = vectype;
2654
2655 return pattern_stmt;
2656}
2657
079c527f 2658/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
2659 otherwise vectorized:
2660
2661 type a_t, b_t;
2662
2663 S1 a_t = b_t / N;
2664
079c527f 2665 where type 'type' is an integral type and N is a constant.
363477c0 2666
079c527f 2667 Similarly handle modulo by a constant:
363477c0
JJ
2668
2669 S4 a_t = b_t % N;
2670
2671 Input/Output:
2672
ba9728b0 2673 * STMT_VINFO: The stmt from which the pattern search begins,
079c527f
JJ
2674 i.e. the division stmt. S1 is replaced by if N is a power
2675 of two constant and type is signed:
363477c0
JJ
2676 S3 y_t = b_t < 0 ? N - 1 : 0;
2677 S2 x_t = b_t + y_t;
2678 S1' a_t = x_t >> log2 (N);
2679
079c527f
JJ
2680 S4 is replaced if N is a power of two constant and
2681 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
2682 S9 y_T = b_t < 0 ? -1U : 0U;
2683 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2684 S7 z_t = (type) z_T;
2685 S6 w_t = b_t + z_t;
2686 S5 x_t = w_t & (N - 1);
2687 S4' a_t = x_t - z_t;
2688
2689 Output:
2690
363477c0
JJ
2691 * TYPE_OUT: The type of the output of this pattern.
2692
2693 * Return value: A new stmt that will be used to replace the division
2694 S1 or modulo S4 stmt. */
2695
355fe088 2696static gimple *
ba9728b0 2697vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
363477c0 2698{
ba9728b0 2699 gimple *last_stmt = stmt_vinfo->stmt;
5deb57cb 2700 tree oprnd0, oprnd1, vectype, itype, cond;
355fe088 2701 gimple *pattern_stmt, *def_stmt;
363477c0 2702 enum tree_code rhs_code;
363477c0 2703 optab optab;
00f07b86 2704 tree q;
079c527f 2705 int dummy_int, prec;
363477c0
JJ
2706
2707 if (!is_gimple_assign (last_stmt))
2708 return NULL;
2709
2710 rhs_code = gimple_assign_rhs_code (last_stmt);
2711 switch (rhs_code)
2712 {
2713 case TRUNC_DIV_EXPR:
eb592645 2714 case EXACT_DIV_EXPR:
363477c0
JJ
2715 case TRUNC_MOD_EXPR:
2716 break;
2717 default:
2718 return NULL;
2719 }
2720
363477c0
JJ
2721 oprnd0 = gimple_assign_rhs1 (last_stmt);
2722 oprnd1 = gimple_assign_rhs2 (last_stmt);
2723 itype = TREE_TYPE (oprnd0);
2724 if (TREE_CODE (oprnd0) != SSA_NAME
2725 || TREE_CODE (oprnd1) != INTEGER_CST
2726 || TREE_CODE (itype) != INTEGER_TYPE
2be65d9e 2727 || !type_has_mode_precision_p (itype))
363477c0
JJ
2728 return NULL;
2729
7a504f33 2730 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
363477c0
JJ
2731 vectype = get_vectype_for_scalar_type (itype);
2732 if (vectype == NULL_TREE)
2733 return NULL;
2734
8f76f377 2735 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
363477c0 2736 {
8f76f377
RS
2737 /* If the target can handle vectorized division or modulo natively,
2738 don't attempt to optimize this, since native division is likely
2739 to give smaller code. */
2740 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2741 if (optab != unknown_optab)
2742 {
2743 machine_mode vec_mode = TYPE_MODE (vectype);
2744 int icode = (int) optab_handler (optab, vec_mode);
2745 if (icode != CODE_FOR_nothing)
2746 return NULL;
2747 }
363477c0
JJ
2748 }
2749
079c527f
JJ
2750 prec = TYPE_PRECISION (itype);
2751 if (integer_pow2p (oprnd1))
363477c0 2752 {
079c527f
JJ
2753 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2754 return NULL;
363477c0 2755
079c527f 2756 /* Pattern detected. */
49d8df1b 2757 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
079c527f
JJ
2758
2759 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2760 build_int_cst (itype, 0));
eb592645
RB
2761 if (rhs_code == TRUNC_DIV_EXPR
2762 || rhs_code == EXACT_DIV_EXPR)
079c527f
JJ
2763 {
2764 tree var = vect_recog_temp_ssa_var (itype, NULL);
2765 tree shift;
2766 def_stmt
0d0e4a03
JJ
2767 = gimple_build_assign (var, COND_EXPR, cond,
2768 fold_build2 (MINUS_EXPR, itype, oprnd1,
2769 build_int_cst (itype, 1)),
2770 build_int_cst (itype, 0));
9c58fb7a 2771 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2772 var = vect_recog_temp_ssa_var (itype, NULL);
2773 def_stmt
0d0e4a03
JJ
2774 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2775 gimple_assign_lhs (def_stmt));
079c527f
JJ
2776 append_pattern_def_seq (stmt_vinfo, def_stmt);
2777
2778 shift = build_int_cst (itype, tree_log2 (oprnd1));
2779 pattern_stmt
0d0e4a03
JJ
2780 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2781 RSHIFT_EXPR, var, shift);
079c527f
JJ
2782 }
2783 else
2784 {
2785 tree signmask;
079c527f
JJ
2786 if (compare_tree_int (oprnd1, 2) == 0)
2787 {
2788 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2789 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2790 build_int_cst (itype, 1),
2791 build_int_cst (itype, 0));
079c527f
JJ
2792 append_pattern_def_seq (stmt_vinfo, def_stmt);
2793 }
2794 else
2795 {
2796 tree utype
2797 = build_nonstandard_integer_type (prec, 1);
2798 tree vecutype = get_vectype_for_scalar_type (utype);
2799 tree shift
7a504f33 2800 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
079c527f
JJ
2801 - tree_log2 (oprnd1));
2802 tree var = vect_recog_temp_ssa_var (utype, NULL);
2803
0d0e4a03
JJ
2804 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2805 build_int_cst (utype, -1),
2806 build_int_cst (utype, 0));
776bfcea 2807 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
079c527f 2808 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
2809 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2810 gimple_assign_lhs (def_stmt),
2811 shift);
776bfcea 2812 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
079c527f
JJ
2813 signmask = vect_recog_temp_ssa_var (itype, NULL);
2814 def_stmt
0d0e4a03 2815 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
2816 append_pattern_def_seq (stmt_vinfo, def_stmt);
2817 }
2818 def_stmt
0d0e4a03
JJ
2819 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2820 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
2821 append_pattern_def_seq (stmt_vinfo, def_stmt);
2822 def_stmt
0d0e4a03
JJ
2823 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2824 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2825 fold_build2 (MINUS_EXPR, itype, oprnd1,
2826 build_int_cst (itype, 1)));
079c527f
JJ
2827 append_pattern_def_seq (stmt_vinfo, def_stmt);
2828
2829 pattern_stmt
0d0e4a03
JJ
2830 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2831 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2832 signmask);
079c527f
JJ
2833 }
2834
079c527f
JJ
2835 *type_out = vectype;
2836 return pattern_stmt;
363477c0 2837 }
079c527f 2838
6b58915b
RS
2839 if (prec > HOST_BITS_PER_WIDE_INT
2840 || integer_zerop (oprnd1))
079c527f
JJ
2841 return NULL;
2842
00f07b86
RH
2843 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2844 return NULL;
079c527f 2845
079c527f 2846 if (TYPE_UNSIGNED (itype))
363477c0 2847 {
079c527f
JJ
2848 unsigned HOST_WIDE_INT mh, ml;
2849 int pre_shift, post_shift;
6b58915b 2850 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
7a504f33 2851 & GET_MODE_MASK (itype_mode));
5deb57cb 2852 tree t1, t2, t3, t4;
079c527f 2853
fecfbfa4 2854 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
079c527f
JJ
2855 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2856 return NULL;
2857
2858 /* Find a suitable multiplier and right shift count
2859 instead of multiplying with D. */
2860 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2861
2862 /* If the suggested multiplier is more than SIZE bits, we can do better
2863 for even divisors, using an initial right shift. */
2864 if (mh != 0 && (d & 1) == 0)
363477c0 2865 {
146ec50f 2866 pre_shift = ctz_or_zero (d);
079c527f
JJ
2867 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2868 &ml, &post_shift, &dummy_int);
2869 gcc_assert (!mh);
2870 }
2871 else
2872 pre_shift = 0;
2873
2874 if (mh != 0)
2875 {
2876 if (post_shift - 1 >= prec)
2877 return NULL;
2878
5deb57cb
JJ
2879 /* t1 = oprnd0 h* ml;
2880 t2 = oprnd0 - t1;
2881 t3 = t2 >> 1;
2882 t4 = t1 + t3;
2883 q = t4 >> (post_shift - 1); */
2884 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2885 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2886 build_int_cst (itype, ml));
079c527f 2887 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2888
5deb57cb 2889 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2890 def_stmt
0d0e4a03 2891 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 2892 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2893
2894 t3 = vect_recog_temp_ssa_var (itype, NULL);
2895 def_stmt
0d0e4a03 2896 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
2897 append_pattern_def_seq (stmt_vinfo, def_stmt);
2898
5deb57cb 2899 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2900 def_stmt
0d0e4a03 2901 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
2902
2903 if (post_shift != 1)
2904 {
2905 append_pattern_def_seq (stmt_vinfo, def_stmt);
2906
5deb57cb 2907 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2908 pattern_stmt
0d0e4a03
JJ
2909 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2910 build_int_cst (itype, post_shift - 1));
079c527f
JJ
2911 }
2912 else
2913 {
5deb57cb 2914 q = t4;
079c527f
JJ
2915 pattern_stmt = def_stmt;
2916 }
363477c0
JJ
2917 }
2918 else
2919 {
079c527f
JJ
2920 if (pre_shift >= prec || post_shift >= prec)
2921 return NULL;
2922
2923 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2924 t2 = t1 h* ml;
2925 q = t2 >> post_shift; */
079c527f
JJ
2926 if (pre_shift)
2927 {
2928 t1 = vect_recog_temp_ssa_var (itype, NULL);
2929 def_stmt
0d0e4a03
JJ
2930 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2931 build_int_cst (NULL, pre_shift));
079c527f
JJ
2932 append_pattern_def_seq (stmt_vinfo, def_stmt);
2933 }
2934 else
2935 t1 = oprnd0;
363477c0 2936
5deb57cb 2937 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2938 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2939 build_int_cst (itype, ml));
079c527f 2940
5deb57cb
JJ
2941 if (post_shift)
2942 {
2943 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2944
5deb57cb
JJ
2945 q = vect_recog_temp_ssa_var (itype, NULL);
2946 def_stmt
0d0e4a03
JJ
2947 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2948 build_int_cst (itype, post_shift));
5deb57cb
JJ
2949 }
2950 else
2951 q = t2;
2952
2953 pattern_stmt = def_stmt;
079c527f
JJ
2954 }
2955 }
2956 else
2957 {
2958 unsigned HOST_WIDE_INT ml;
4ee4c52c 2959 int post_shift;
6b58915b 2960 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2961 unsigned HOST_WIDE_INT abs_d;
2962 bool add = false;
5deb57cb 2963 tree t1, t2, t3, t4;
079c527f
JJ
2964
2965 /* Give up for -1. */
2966 if (d == -1)
2967 return NULL;
2968
079c527f
JJ
2969 /* Since d might be INT_MIN, we have to cast to
2970 unsigned HOST_WIDE_INT before negating to avoid
2971 undefined signed overflow. */
2972 abs_d = (d >= 0
2973 ? (unsigned HOST_WIDE_INT) d
2974 : - (unsigned HOST_WIDE_INT) d);
2975
2976 /* n rem d = n rem -d */
2977 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2978 {
2979 d = abs_d;
2980 oprnd1 = build_int_cst (itype, abs_d);
2981 }
2982 else if (HOST_BITS_PER_WIDE_INT >= prec
fecfbfa4 2983 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
2984 /* This case is not handled correctly below. */
2985 return NULL;
2986
4ee4c52c 2987 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
fecfbfa4 2988 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
079c527f
JJ
2989 {
2990 add = true;
dd4786fe 2991 ml |= HOST_WIDE_INT_M1U << (prec - 1);
079c527f
JJ
2992 }
2993 if (post_shift >= prec)
2994 return NULL;
2995
7abed779 2996 /* t1 = oprnd0 h* ml; */
5deb57cb 2997 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2998 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2999 build_int_cst (itype, ml));
079c527f
JJ
3000
3001 if (add)
3002 {
5deb57cb 3003 /* t2 = t1 + oprnd0; */
7abed779 3004 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 3005 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3006 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
3007 }
3008 else
5deb57cb 3009 t2 = t1;
079c527f 3010
5deb57cb 3011 if (post_shift)
079c527f 3012 {
5deb57cb 3013 /* t3 = t2 >> post_shift; */
7abed779 3014 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 3015 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3016 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3017 build_int_cst (itype, post_shift));
363477c0 3018 }
079c527f 3019 else
5deb57cb 3020 t3 = t2;
079c527f 3021
807e902e 3022 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
3023 int msb = 1;
3024 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3025 {
807e902e 3026 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 3027 msb = 0;
807e902e 3028 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
3029 msb = -1;
3030 }
079c527f 3031
7abed779
JJ
3032 if (msb == 0 && d >= 0)
3033 {
3034 /* q = t3; */
3035 q = t3;
3036 pattern_stmt = def_stmt;
3037 }
3038 else
3039 {
3040 /* t4 = oprnd0 >> (prec - 1);
3041 or if we know from VRP that oprnd0 >= 0
3042 t4 = 0;
3043 or if we know from VRP that oprnd0 < 0
3044 t4 = -1; */
3045 append_pattern_def_seq (stmt_vinfo, def_stmt);
3046 t4 = vect_recog_temp_ssa_var (itype, NULL);
3047 if (msb != 1)
0d0e4a03
JJ
3048 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3049 build_int_cst (itype, msb));
7abed779 3050 else
0d0e4a03
JJ
3051 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3052 build_int_cst (itype, prec - 1));
7abed779
JJ
3053 append_pattern_def_seq (stmt_vinfo, def_stmt);
3054
3055 /* q = t3 - t4; or q = t4 - t3; */
3056 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
3057 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3058 d < 0 ? t3 : t4);
7abed779 3059 }
079c527f
JJ
3060 }
3061
3062 if (rhs_code == TRUNC_MOD_EXPR)
3063 {
3064 tree r, t1;
3065
3066 /* We divided. Now finish by:
3067 t1 = q * oprnd1;
3068 r = oprnd0 - t1; */
3069 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3070
3071 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3072 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 3073 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 3074
079c527f 3075 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 3076 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
3077 }
3078
079c527f 3079 /* Pattern detected. */
49d8df1b 3080 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
363477c0 3081
363477c0
JJ
3082 *type_out = vectype;
3083 return pattern_stmt;
3084}
3085
69d2aade
JJ
3086/* Function vect_recog_mixed_size_cond_pattern
3087
3088 Try to find the following pattern:
3089
3090 type x_t, y_t;
3091 TYPE a_T, b_T, c_T;
3092 loop:
3093 S1 a_T = x_t CMP y_t ? b_T : c_T;
3094
3095 where type 'TYPE' is an integral type which has different size
bc4fb355 3096 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 3097 than 'type', the constants need to fit into an integer type
bc4fb355 3098 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
3099
3100 Input:
3101
ba9728b0 3102 * STMT_VINFO: The stmt from which the pattern search begins.
69d2aade
JJ
3103
3104 Output:
3105
69d2aade
JJ
3106 * TYPE_OUT: The type of the output of this pattern.
3107
3108 * Return value: A new stmt that will be used to replace the pattern.
3109 Additionally a def_stmt is added.
3110
3111 a_it = x_t CMP y_t ? b_it : c_it;
3112 a_T = (TYPE) a_it; */
3113
355fe088 3114static gimple *
ba9728b0 3115vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
69d2aade 3116{
ba9728b0 3117 gimple *last_stmt = stmt_vinfo->stmt;
69d2aade 3118 tree cond_expr, then_clause, else_clause;
bc4fb355 3119 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
355fe088 3120 gimple *pattern_stmt, *def_stmt;
bc4fb355 3121 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
355fe088 3122 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
bc4fb355
IR
3123 bool promotion;
3124 tree comp_scalar_type;
69d2aade
JJ
3125
3126 if (!is_gimple_assign (last_stmt)
3127 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3128 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3129 return NULL;
3130
3131 cond_expr = gimple_assign_rhs1 (last_stmt);
3132 then_clause = gimple_assign_rhs2 (last_stmt);
3133 else_clause = gimple_assign_rhs3 (last_stmt);
3134
87aab9b2
JJ
3135 if (!COMPARISON_CLASS_P (cond_expr))
3136 return NULL;
3137
bc4fb355
IR
3138 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3139 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 3140 if (comp_vectype == NULL_TREE)
69d2aade
JJ
3141 return NULL;
3142
3143 type = gimple_expr_type (last_stmt);
bc4fb355
IR
3144 if (types_compatible_p (type, comp_scalar_type)
3145 || ((TREE_CODE (then_clause) != INTEGER_CST
3146 || TREE_CODE (else_clause) != INTEGER_CST)
3147 && !INTEGRAL_TYPE_P (comp_scalar_type))
3148 || !INTEGRAL_TYPE_P (type))
3149 return NULL;
3150
3151 if ((TREE_CODE (then_clause) != INTEGER_CST
86a91c0a
RS
3152 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3153 &def_stmt0, &promotion))
bc4fb355 3154 || (TREE_CODE (else_clause) != INTEGER_CST
86a91c0a
RS
3155 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3156 &def_stmt1, &promotion)))
bc4fb355
IR
3157 return NULL;
3158
3159 if (orig_type0 && orig_type1
3160 && !types_compatible_p (orig_type0, orig_type1))
3161 return NULL;
3162
3163 if (orig_type0)
3164 {
3165 if (!types_compatible_p (orig_type0, comp_scalar_type))
3166 return NULL;
3167 then_clause = gimple_assign_rhs1 (def_stmt0);
3168 itype = orig_type0;
3169 }
3170
3171 if (orig_type1)
3172 {
3173 if (!types_compatible_p (orig_type1, comp_scalar_type))
3174 return NULL;
3175 else_clause = gimple_assign_rhs1 (def_stmt1);
3176 itype = orig_type1;
3177 }
3178
69d2aade 3179
6c825cd4
DS
3180 HOST_WIDE_INT cmp_mode_size
3181 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3182
7a504f33
RS
3183 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3184 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
69d2aade
JJ
3185 return NULL;
3186
3187 vectype = get_vectype_for_scalar_type (type);
3188 if (vectype == NULL_TREE)
3189 return NULL;
3190
96592eed 3191 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3192 return NULL;
3193
bc4fb355 3194 if (itype == NULL_TREE)
6c825cd4 3195 itype = build_nonstandard_integer_type (cmp_mode_size,
bc4fb355
IR
3196 TYPE_UNSIGNED (type));
3197
69d2aade 3198 if (itype == NULL_TREE
b397965c 3199 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
69d2aade
JJ
3200 return NULL;
3201
3202 vecitype = get_vectype_for_scalar_type (itype);
3203 if (vecitype == NULL_TREE)
3204 return NULL;
3205
96592eed 3206 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
69d2aade
JJ
3207 return NULL;
3208
7a504f33 3209 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
69d2aade 3210 {
bc4fb355
IR
3211 if ((TREE_CODE (then_clause) == INTEGER_CST
3212 && !int_fits_type_p (then_clause, itype))
3213 || (TREE_CODE (else_clause) == INTEGER_CST
3214 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
3215 return NULL;
3216 }
3217
0d0e4a03
JJ
3218 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3219 COND_EXPR, unshare_expr (cond_expr),
3220 fold_convert (itype, then_clause),
3221 fold_convert (itype, else_clause));
3222 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3223 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 3224
776bfcea 3225 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
69d2aade
JJ
3226 *type_out = vectype;
3227
49d8df1b 3228 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
f5709183 3229
69d2aade
JJ
3230 return pattern_stmt;
3231}
3232
3233
71c92d17 3234/* Helper function of vect_recog_bool_pattern. Called recursively, return
42fd8198
IE
3235 true if bool VAR can and should be optimized that way. Assume it shouldn't
3236 in case it's a result of a comparison which can be directly vectorized into
fa2c9034
RB
3237 a vector comparison. Fills in STMTS with all stmts visited during the
3238 walk. */
71c92d17
JJ
3239
3240static bool
fa2c9034 3241check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
71c92d17 3242{
81c40241 3243 tree rhs1;
71c92d17
JJ
3244 enum tree_code rhs_code;
3245
25927307
RS
3246 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3247 if (!def_stmt_info)
71c92d17
JJ
3248 return false;
3249
25927307
RS
3250 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3251 if (!def_stmt)
71c92d17
JJ
3252 return false;
3253
fa2c9034
RB
3254 if (stmts.contains (def_stmt))
3255 return true;
71c92d17
JJ
3256
3257 rhs1 = gimple_assign_rhs1 (def_stmt);
3258 rhs_code = gimple_assign_rhs_code (def_stmt);
3259 switch (rhs_code)
3260 {
3261 case SSA_NAME:
fa2c9034
RB
3262 if (! check_bool_pattern (rhs1, vinfo, stmts))
3263 return false;
3264 break;
71c92d17
JJ
3265
3266 CASE_CONVERT:
2568d8a1 3267 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
71c92d17 3268 return false;
fa2c9034
RB
3269 if (! check_bool_pattern (rhs1, vinfo, stmts))
3270 return false;
3271 break;
71c92d17
JJ
3272
3273 case BIT_NOT_EXPR:
fa2c9034
RB
3274 if (! check_bool_pattern (rhs1, vinfo, stmts))
3275 return false;
3276 break;
71c92d17
JJ
3277
3278 case BIT_AND_EXPR:
3279 case BIT_IOR_EXPR:
3280 case BIT_XOR_EXPR:
fa2c9034
RB
3281 if (! check_bool_pattern (rhs1, vinfo, stmts)
3282 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
71c92d17 3283 return false;
fa2c9034 3284 break;
71c92d17
JJ
3285
3286 default:
3287 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3288 {
fa2c9034 3289 tree vecitype, comp_vectype;
71c92d17 3290
2f326699
JJ
3291 /* If the comparison can throw, then is_gimple_condexpr will be
3292 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
36bbc05d 3293 if (stmt_could_throw_p (cfun, def_stmt))
2f326699
JJ
3294 return false;
3295
71c92d17
JJ
3296 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3297 if (comp_vectype == NULL_TREE)
3298 return false;
3299
fa2c9034 3300 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
42fd8198 3301 if (mask_type
96592eed 3302 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
42fd8198
IE
3303 return false;
3304
71c92d17
JJ
3305 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3306 {
b397965c 3307 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3308 tree itype
ab0ef706 3309 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3310 vecitype = get_vectype_for_scalar_type (itype);
3311 if (vecitype == NULL_TREE)
3312 return false;
3313 }
3314 else
3315 vecitype = comp_vectype;
96592eed 3316 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
fa2c9034 3317 return false;
71c92d17 3318 }
fa2c9034
RB
3319 else
3320 return false;
3321 break;
71c92d17 3322 }
fa2c9034
RB
3323
3324 bool res = stmts.add (def_stmt);
3325 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3326 gcc_assert (!res);
3327
3328 return true;
71c92d17
JJ
3329}
3330
3331
3332/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
fa2c9034
RB
3333 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3334 pattern sequence. */
71c92d17
JJ
3335
3336static tree
fa2c9034 3337adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
71c92d17 3338{
fa2c9034
RB
3339 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3340 NOP_EXPR, var);
776bfcea
RS
3341 append_pattern_def_seq (stmt_info, cast_stmt,
3342 get_vectype_for_scalar_type (type));
71c92d17
JJ
3343 return gimple_assign_lhs (cast_stmt);
3344}
3345
fa2c9034
RB
3346/* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3347 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3348 type, OUT_TYPE is the desired final integer type of the whole pattern.
3349 STMT_INFO is the info of the pattern root and is where pattern stmts should
3350 be associated with. DEFS is a map of pattern defs. */
71c92d17 3351
fa2c9034
RB
3352static void
3353adjust_bool_pattern (tree var, tree out_type,
3354 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
71c92d17 3355{
355fe088 3356 gimple *stmt = SSA_NAME_DEF_STMT (var);
71c92d17
JJ
3357 enum tree_code rhs_code, def_rhs_code;
3358 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3359 location_t loc;
355fe088 3360 gimple *pattern_stmt, *def_stmt;
fa2c9034 3361 tree trueval = NULL_TREE;
71c92d17
JJ
3362
3363 rhs1 = gimple_assign_rhs1 (stmt);
3364 rhs2 = gimple_assign_rhs2 (stmt);
3365 rhs_code = gimple_assign_rhs_code (stmt);
3366 loc = gimple_location (stmt);
3367 switch (rhs_code)
3368 {
3369 case SSA_NAME:
3370 CASE_CONVERT:
fa2c9034 3371 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3372 itype = TREE_TYPE (irhs1);
3373 pattern_stmt
0d0e4a03
JJ
3374 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3375 SSA_NAME, irhs1);
71c92d17
JJ
3376 break;
3377
3378 case BIT_NOT_EXPR:
fa2c9034 3379 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3380 itype = TREE_TYPE (irhs1);
3381 pattern_stmt
0d0e4a03
JJ
3382 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3383 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
3384 break;
3385
3386 case BIT_AND_EXPR:
3387 /* Try to optimize x = y & (a < b ? 1 : 0); into
3388 x = (a < b ? y : 0);
3389
3390 E.g. for:
3391 bool a_b, b_b, c_b;
3392 TYPE d_T;
3393
3394 S1 a_b = x1 CMP1 y1;
3395 S2 b_b = x2 CMP2 y2;
3396 S3 c_b = a_b & b_b;
3397 S4 d_T = (TYPE) c_b;
3398
3399 we would normally emit:
3400
3401 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3402 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3403 S3' c_T = a_T & b_T;
3404 S4' d_T = c_T;
3405
3406 but we can save one stmt by using the
3407 result of one of the COND_EXPRs in the other COND_EXPR and leave
3408 BIT_AND_EXPR stmt out:
3409
3410 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3411 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3412 S4' f_T = c_T;
3413
3414 At least when VEC_COND_EXPR is implemented using masks
3415 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3416 computes the comparison masks and ands it, in one case with
3417 all ones vector, in the other case with a vector register.
3418 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3419 often more expensive. */
3420 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3421 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3422 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3423 {
fa2c9034 3424 irhs1 = *defs.get (rhs1);
71c92d17 3425 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17 3426 if (TYPE_PRECISION (TREE_TYPE (irhs1))
b397965c 3427 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
71c92d17 3428 {
fa2c9034
RB
3429 rhs_code = def_rhs_code;
3430 rhs1 = def_rhs1;
3431 rhs2 = gimple_assign_rhs2 (def_stmt);
3432 trueval = irhs1;
3433 goto do_compare;
71c92d17
JJ
3434 }
3435 else
fa2c9034 3436 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3437 goto and_ior_xor;
3438 }
3439 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3440 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3441 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3442 {
fa2c9034 3443 irhs2 = *defs.get (rhs2);
71c92d17 3444 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
71c92d17 3445 if (TYPE_PRECISION (TREE_TYPE (irhs2))
b397965c 3446 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
71c92d17 3447 {
fa2c9034
RB
3448 rhs_code = def_rhs_code;
3449 rhs1 = def_rhs1;
3450 rhs2 = gimple_assign_rhs2 (def_stmt);
3451 trueval = irhs2;
3452 goto do_compare;
71c92d17
JJ
3453 }
3454 else
fa2c9034 3455 irhs1 = *defs.get (rhs1);
71c92d17
JJ
3456 goto and_ior_xor;
3457 }
3458 /* FALLTHRU */
3459 case BIT_IOR_EXPR:
3460 case BIT_XOR_EXPR:
fa2c9034
RB
3461 irhs1 = *defs.get (rhs1);
3462 irhs2 = *defs.get (rhs2);
71c92d17
JJ
3463 and_ior_xor:
3464 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3465 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3466 {
3467 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3468 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3469 int out_prec = TYPE_PRECISION (out_type);
3470 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
fa2c9034
RB
3471 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3472 stmt_info);
71c92d17 3473 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
fa2c9034
RB
3474 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3475 stmt_info);
71c92d17
JJ
3476 else
3477 {
fa2c9034
RB
3478 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3479 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
71c92d17
JJ
3480 }
3481 }
3482 itype = TREE_TYPE (irhs1);
3483 pattern_stmt
0d0e4a03
JJ
3484 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3485 rhs_code, irhs1, irhs2);
71c92d17
JJ
3486 break;
3487
3488 default:
fa2c9034 3489 do_compare:
71c92d17
JJ
3490 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3491 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2 3492 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
73a699ae
RS
3493 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3494 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3495 {
b397965c 3496 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3497 itype
ab0ef706 3498 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3499 }
3500 else
3501 itype = TREE_TYPE (rhs1);
3502 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3503 if (trueval == NULL_TREE)
3504 trueval = build_int_cst (itype, 1);
3505 else
3506 gcc_checking_assert (useless_type_conversion_p (itype,
3507 TREE_TYPE (trueval)));
3508 pattern_stmt
0d0e4a03
JJ
3509 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3510 COND_EXPR, cond_expr, trueval,
3511 build_int_cst (itype, 0));
71c92d17
JJ
3512 break;
3513 }
3514
71c92d17 3515 gimple_set_location (pattern_stmt, loc);
776bfcea
RS
3516 append_pattern_def_seq (stmt_info, pattern_stmt,
3517 get_vectype_for_scalar_type (itype));
fa2c9034
RB
3518 defs.put (var, gimple_assign_lhs (pattern_stmt));
3519}
3520
3521/* Comparison function to qsort a vector of gimple stmts after UID. */
3522
3523static int
3524sort_after_uid (const void *p1, const void *p2)
3525{
3526 const gimple *stmt1 = *(const gimple * const *)p1;
3527 const gimple *stmt2 = *(const gimple * const *)p2;
3528 return gimple_uid (stmt1) - gimple_uid (stmt2);
71c92d17
JJ
3529}
3530
fa2c9034 3531/* Create pattern stmts for all stmts participating in the bool pattern
32e8e429 3532 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
fa2c9034
RB
3533 OUT_TYPE. Return the def of the pattern root. */
3534
3535static tree
3536adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
32e8e429 3537 tree out_type, stmt_vec_info stmt_info)
fa2c9034
RB
3538{
3539 /* Gather original stmts in the bool pattern in their order of appearance
3540 in the IL. */
3541 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3542 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3543 i != bool_stmt_set.end (); ++i)
3544 bool_stmts.quick_push (*i);
3545 bool_stmts.qsort (sort_after_uid);
3546
3547 /* Now process them in that order, producing pattern stmts. */
3548 hash_map <tree, tree> defs;
3549 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3550 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
91987857 3551 out_type, stmt_info, defs);
fa2c9034
RB
3552
3553 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3554 gimple *pattern_stmt
91987857 3555 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
fa2c9034
RB
3556 return gimple_assign_lhs (pattern_stmt);
3557}
71c92d17 3558
5116b156 3559/* Helper for search_type_for_mask. */
42fd8198
IE
3560
3561static tree
5116b156
JJ
3562search_type_for_mask_1 (tree var, vec_info *vinfo,
3563 hash_map<gimple *, tree> &cache)
42fd8198 3564{
42fd8198
IE
3565 tree rhs1;
3566 enum tree_code rhs_code;
e6f5c25d 3567 tree res = NULL_TREE, res2;
42fd8198 3568
2568d8a1 3569 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
42fd8198
IE
3570 return NULL_TREE;
3571
25927307
RS
3572 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3573 if (!def_stmt_info)
42fd8198
IE
3574 return NULL_TREE;
3575
25927307
RS
3576 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3577 if (!def_stmt)
42fd8198
IE
3578 return NULL_TREE;
3579
5116b156
JJ
3580 tree *c = cache.get (def_stmt);
3581 if (c)
3582 return *c;
3583
42fd8198
IE
3584 rhs_code = gimple_assign_rhs_code (def_stmt);
3585 rhs1 = gimple_assign_rhs1 (def_stmt);
3586
3587 switch (rhs_code)
3588 {
3589 case SSA_NAME:
3590 case BIT_NOT_EXPR:
3591 CASE_CONVERT:
5116b156 3592 res = search_type_for_mask_1 (rhs1, vinfo, cache);
42fd8198
IE
3593 break;
3594
3595 case BIT_AND_EXPR:
3596 case BIT_IOR_EXPR:
3597 case BIT_XOR_EXPR:
5116b156
JJ
3598 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3599 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3600 cache);
e6f5c25d
IE
3601 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3602 res = res2;
42fd8198
IE
3603 break;
3604
3605 default:
3606 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3607 {
e6f5c25d
IE
3608 tree comp_vectype, mask_type;
3609
2568d8a1 3610 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
af3cdd34 3611 {
5116b156
JJ
3612 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3613 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3614 vinfo, cache);
af3cdd34
IE
3615 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3616 res = res2;
3617 break;
3618 }
3619
e6f5c25d
IE
3620 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3621 if (comp_vectype == NULL_TREE)
5116b156
JJ
3622 {
3623 res = NULL_TREE;
3624 break;
3625 }
e6f5c25d
IE
3626
3627 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3628 if (!mask_type
96592eed 3629 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
5116b156
JJ
3630 {
3631 res = NULL_TREE;
3632 break;
3633 }
e6f5c25d 3634
42fd8198
IE
3635 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3636 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3637 {
b397965c 3638 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
42fd8198
IE
3639 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3640 }
3641 else
3642 res = TREE_TYPE (rhs1);
3643 }
3644 }
3645
5116b156 3646 cache.put (def_stmt, res);
42fd8198
IE
3647 return res;
3648}
3649
5116b156
JJ
3650/* Return the proper type for converting bool VAR into
3651 an integer value or NULL_TREE if no such type exists.
3652 The type is chosen so that converted value has the
3653 same number of elements as VAR's vector type. */
3654
3655static tree
3656search_type_for_mask (tree var, vec_info *vinfo)
3657{
3658 hash_map<gimple *, tree> cache;
3659 return search_type_for_mask_1 (var, vinfo, cache);
3660}
42fd8198 3661
71c92d17
JJ
3662/* Function vect_recog_bool_pattern
3663
3664 Try to find pattern like following:
3665
3666 bool a_b, b_b, c_b, d_b, e_b;
3667 TYPE f_T;
3668 loop:
3669 S1 a_b = x1 CMP1 y1;
3670 S2 b_b = x2 CMP2 y2;
3671 S3 c_b = a_b & b_b;
3672 S4 d_b = x3 CMP3 y3;
3673 S5 e_b = c_b | d_b;
3674 S6 f_T = (TYPE) e_b;
3675
52264dbf
RB
3676 where type 'TYPE' is an integral type. Or a similar pattern
3677 ending in
3678
3679 S6 f_Y = e_b ? r_Y : s_Y;
3680
3681 as results from if-conversion of a complex condition.
71c92d17
JJ
3682
3683 Input:
3684
ba9728b0
RS
3685 * STMT_VINFO: The stmt at the end from which the pattern
3686 search begins, i.e. cast of a bool to
3687 an integer type.
71c92d17
JJ
3688
3689 Output:
3690
71c92d17
JJ
3691 * TYPE_OUT: The type of the output of this pattern.
3692
3693 * Return value: A new stmt that will be used to replace the pattern.
3694
3695 Assuming size of TYPE is the same as size of all comparisons
3696 (otherwise some casts would be added where needed), the above
3697 sequence we create related pattern stmts:
3698 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3699 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3700 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3701 S5' e_T = c_T | d_T;
3702 S6' f_T = e_T;
3703
3704 Instead of the above S3' we could emit:
3705 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3706 S3' c_T = a_T | b_T;
3707 but the above is more efficient. */
3708
355fe088 3709static gimple *
ba9728b0 3710vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
71c92d17 3711{
ba9728b0 3712 gimple *last_stmt = stmt_vinfo->stmt;
71c92d17
JJ
3713 enum tree_code rhs_code;
3714 tree var, lhs, rhs, vectype;
310213d4 3715 vec_info *vinfo = stmt_vinfo->vinfo;
355fe088 3716 gimple *pattern_stmt;
71c92d17
JJ
3717
3718 if (!is_gimple_assign (last_stmt))
3719 return NULL;
3720
3721 var = gimple_assign_rhs1 (last_stmt);
3722 lhs = gimple_assign_lhs (last_stmt);
3723
2568d8a1 3724 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
71c92d17
JJ
3725 return NULL;
3726
fa2c9034
RB
3727 hash_set<gimple *> bool_stmts;
3728
71c92d17
JJ
3729 rhs_code = gimple_assign_rhs_code (last_stmt);
3730 if (CONVERT_EXPR_CODE_P (rhs_code))
3731 {
d362ac6c 3732 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
78048b1c 3733 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
3734 return NULL;
3735 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3736 if (vectype == NULL_TREE)
3737 return NULL;
3738
fa2c9034 3739 if (check_bool_pattern (var, vinfo, bool_stmts))
42fd8198 3740 {
86a91c0a 3741 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
42fd8198
IE
3742 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3743 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3744 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3745 else
3746 pattern_stmt
3747 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3748 }
71c92d17 3749 else
42fd8198
IE
3750 {
3751 tree type = search_type_for_mask (var, vinfo);
a414c77f 3752 tree cst0, cst1, tmp;
42fd8198
IE
3753
3754 if (!type)
3755 return NULL;
3756
3757 /* We may directly use cond with narrowed type to avoid
3758 multiple cond exprs with following result packing and
3759 perform single cond with packed mask instead. In case
3760 of widening we better make cond first and then extract
3761 results. */
3762 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3763 type = TREE_TYPE (lhs);
3764
3765 cst0 = build_int_cst (type, 0);
3766 cst1 = build_int_cst (type, 1);
3767 tmp = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3768 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3769
3770 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3771 {
3772 tree new_vectype = get_vectype_for_scalar_type (type);
776bfcea 3773 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
42fd8198
IE
3774
3775 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3776 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3777 }
3778 }
3779
71c92d17 3780 *type_out = vectype;
49d8df1b 3781 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
f5709183 3782
52264dbf
RB
3783 return pattern_stmt;
3784 }
3785 else if (rhs_code == COND_EXPR
3786 && TREE_CODE (var) == SSA_NAME)
3787 {
3788 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3789 if (vectype == NULL_TREE)
3790 return NULL;
3791
3792 /* Build a scalar type for the boolean result that when
3793 vectorized matches the vector type of the result in
3794 size and number of elements. */
3795 unsigned prec
928686b1
RS
3796 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3797 TYPE_VECTOR_SUBPARTS (vectype));
3798
52264dbf
RB
3799 tree type
3800 = build_nonstandard_integer_type (prec,
3801 TYPE_UNSIGNED (TREE_TYPE (var)));
3802 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3803 return NULL;
3804
fa2c9034 3805 if (!check_bool_pattern (var, vinfo, bool_stmts))
a414c77f
IE
3806 return NULL;
3807
86a91c0a 3808 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
52264dbf 3809
52264dbf
RB
3810 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3811 pattern_stmt
a414c77f
IE
3812 = gimple_build_assign (lhs, COND_EXPR,
3813 build2 (NE_EXPR, boolean_type_node,
3814 rhs, build_int_cst (type, 0)),
0d0e4a03
JJ
3815 gimple_assign_rhs2 (last_stmt),
3816 gimple_assign_rhs3 (last_stmt));
52264dbf 3817 *type_out = vectype;
49d8df1b 3818 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
52264dbf 3819
71c92d17
JJ
3820 return pattern_stmt;
3821 }
ab0ef706
JJ
3822 else if (rhs_code == SSA_NAME
3823 && STMT_VINFO_DATA_REF (stmt_vinfo))
3824 {
3825 stmt_vec_info pattern_stmt_info;
3826 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3827 gcc_assert (vectype != NULL_TREE);
78336739
JJ
3828 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3829 return NULL;
ab0ef706 3830
fa2c9034 3831 if (check_bool_pattern (var, vinfo, bool_stmts))
86a91c0a 3832 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
42fd8198
IE
3833 else
3834 {
3835 tree type = search_type_for_mask (var, vinfo);
a414c77f 3836 tree cst0, cst1, new_vectype;
42fd8198
IE
3837
3838 if (!type)
3839 return NULL;
3840
3841 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3842 type = TREE_TYPE (vectype);
3843
3844 cst0 = build_int_cst (type, 0);
3845 cst1 = build_int_cst (type, 1);
3846 new_vectype = get_vectype_for_scalar_type (type);
3847
3848 rhs = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3849 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
776bfcea 3850 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
42fd8198
IE
3851 }
3852
ab0ef706
JJ
3853 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3854 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3855 {
3856 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
355fe088 3857 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
42fd8198 3858 append_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
3859 rhs = rhs2;
3860 }
0d0e4a03 3861 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
4fbeb363 3862 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
f44fb7aa 3863 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
ab0ef706 3864 *type_out = vectype;
49d8df1b
RS
3865 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3866
ab0ef706
JJ
3867 return pattern_stmt;
3868 }
71c92d17
JJ
3869 else
3870 return NULL;
3871}
3872
3873
e6f5c25d
IE
3874/* A helper for vect_recog_mask_conversion_pattern. Build
3875 conversion of MASK to a type suitable for masking VECTYPE.
3876 Built statement gets required vectype and is appended to
3877 a pattern sequence of STMT_VINFO.
3878
3879 Return converted mask. */
3880
3881static tree
776bfcea 3882build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
e6f5c25d
IE
3883{
3884 gimple *stmt;
3885 tree masktype, tmp;
e6f5c25d
IE
3886
3887 masktype = build_same_sized_truth_vector_type (vectype);
3888 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3889 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
776bfcea 3890 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
e6f5c25d
IE
3891
3892 return tmp;
3893}
3894
3895
3896/* Function vect_recog_mask_conversion_pattern
3897
3898 Try to find statements which require boolean type
3899 converison. Additional conversion statements are
3900 added to handle such cases. For example:
3901
3902 bool m_1, m_2, m_3;
3903 int i_4, i_5;
3904 double d_6, d_7;
3905 char c_1, c_2, c_3;
3906
3907 S1 m_1 = i_4 > i_5;
3908 S2 m_2 = d_6 < d_7;
3909 S3 m_3 = m_1 & m_2;
3910 S4 c_1 = m_3 ? c_2 : c_3;
3911
3912 Will be transformed into:
3913
3914 S1 m_1 = i_4 > i_5;
3915 S2 m_2 = d_6 < d_7;
3916 S3'' m_2' = (_Bool[bitsize=32])m_2
3917 S3' m_3' = m_1 & m_2';
3918 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3919 S4' c_1' = m_3'' ? c_2 : c_3; */
3920
3921static gimple *
ba9728b0 3922vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
e6f5c25d 3923{
ba9728b0 3924 gimple *last_stmt = stmt_vinfo->stmt;
e6f5c25d 3925 enum tree_code rhs_code;
310aba3b
ML
3926 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3927 tree vectype1, vectype2;
e6f5c25d
IE
3928 stmt_vec_info pattern_stmt_info;
3929 vec_info *vinfo = stmt_vinfo->vinfo;
e6f5c25d
IE
3930
3931 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3932 if (is_gimple_call (last_stmt)
2c58d42c 3933 && gimple_call_internal_p (last_stmt))
e6f5c25d 3934 {
a844293d 3935 gcall *pattern_stmt;
e6f5c25d 3936
2c58d42c
RS
3937 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3938 int mask_argno = internal_fn_mask_index (ifn);
3939 if (mask_argno < 0)
3940 return NULL;
3941
3942 bool store_p = internal_store_fn_p (ifn);
3943 if (store_p)
e6f5c25d 3944 {
2c58d42c
RS
3945 int rhs_index = internal_fn_stored_value_index (ifn);
3946 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3947 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
e6f5c25d
IE
3948 }
3949 else
3950 {
2c58d42c
RS
3951 lhs = gimple_call_lhs (last_stmt);
3952 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
e6f5c25d
IE
3953 }
3954
2c58d42c
RS
3955 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3956 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3957 if (!mask_arg_type)
e6f5c25d 3958 return NULL;
2c58d42c 3959 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
e6f5c25d
IE
3960
3961 if (!vectype1 || !vectype2
928686b1
RS
3962 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3963 TYPE_VECTOR_SUBPARTS (vectype2)))
e6f5c25d
IE
3964 return NULL;
3965
2c58d42c
RS
3966 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3967
3968 auto_vec<tree, 8> args;
3969 unsigned int nargs = gimple_call_num_args (last_stmt);
3970 args.safe_grow (nargs);
3971 for (unsigned int i = 0; i < nargs; ++i)
3972 args[i] = ((int) i == mask_argno
3973 ? tmp
3974 : gimple_call_arg (last_stmt, i));
3975 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
e6f5c25d 3976
2c58d42c 3977 if (!store_p)
e6f5c25d
IE
3978 {
3979 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
e6f5c25d
IE
3980 gimple_call_set_lhs (pattern_stmt, lhs);
3981 }
a844293d 3982 gimple_call_set_nothrow (pattern_stmt, true);
e6f5c25d 3983
4fbeb363 3984 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
2c58d42c 3985 if (STMT_VINFO_DATA_REF (stmt_vinfo))
f44fb7aa 3986 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
e6f5c25d
IE
3987
3988 *type_out = vectype1;
49d8df1b 3989 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
3990
3991 return pattern_stmt;
3992 }
3993
3994 if (!is_gimple_assign (last_stmt))
3995 return NULL;
3996
a844293d 3997 gimple *pattern_stmt;
e6f5c25d
IE
3998 lhs = gimple_assign_lhs (last_stmt);
3999 rhs1 = gimple_assign_rhs1 (last_stmt);
4000 rhs_code = gimple_assign_rhs_code (last_stmt);
4001
4002 /* Check for cond expression requiring mask conversion. */
4003 if (rhs_code == COND_EXPR)
4004 {
e6f5c25d
IE
4005 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4006
4007 if (TREE_CODE (rhs1) == SSA_NAME)
4008 {
4009 rhs1_type = search_type_for_mask (rhs1, vinfo);
4010 if (!rhs1_type)
4011 return NULL;
4012 }
dea60b59 4013 else if (COMPARISON_CLASS_P (rhs1))
6a3c127c
RS
4014 {
4015 /* Check whether we're comparing scalar booleans and (if so)
4016 whether a better mask type exists than the mask associated
4017 with boolean-sized elements. This avoids unnecessary packs
4018 and unpacks if the booleans are set from comparisons of
4019 wider types. E.g. in:
4020
4021 int x1, x2, x3, x4, y1, y1;
4022 ...
4023 bool b1 = (x1 == x2);
4024 bool b2 = (x3 == x4);
4025 ... = b1 == b2 ? y1 : y2;
4026
4027 it is better for b1 and b2 to use the mask type associated
4028 with int elements rather bool (byte) elements. */
4029 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4030 if (!rhs1_type)
4031 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4032 }
dea60b59
JJ
4033 else
4034 return NULL;
e6f5c25d
IE
4035
4036 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4037
6a3c127c
RS
4038 if (!vectype1 || !vectype2)
4039 return NULL;
4040
4041 /* Continue if a conversion is needed. Also continue if we have
4042 a comparison whose vector type would normally be different from
4043 VECTYPE2 when considered in isolation. In that case we'll
4044 replace the comparison with an SSA name (so that we can record
4045 its vector type) and behave as though the comparison was an SSA
4046 name from the outset. */
4047 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4048 TYPE_VECTOR_SUBPARTS (vectype2))
4049 && (TREE_CODE (rhs1) == SSA_NAME
4050 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
e6f5c25d
IE
4051 return NULL;
4052
8da4c8d8
RB
4053 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4054 in place, we can handle it in vectorizable_condition. This avoids
4055 unnecessary promotion stmts and increased vectorization factor. */
4056 if (COMPARISON_CLASS_P (rhs1)
4057 && INTEGRAL_TYPE_P (rhs1_type)
928686b1
RS
4058 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4059 TYPE_VECTOR_SUBPARTS (vectype2)))
8da4c8d8 4060 {
8da4c8d8 4061 enum vect_def_type dt;
894dd753 4062 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
8da4c8d8 4063 && dt == vect_external_def
894dd753 4064 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
8da4c8d8
RB
4065 && (dt == vect_external_def
4066 || dt == vect_constant_def))
4067 {
4068 tree wide_scalar_type = build_nonstandard_integer_type
4069 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4070 TYPE_UNSIGNED (rhs1_type));
4071 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4072 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4073 return NULL;
4074 }
4075 }
4076
e6f5c25d
IE
4077 /* If rhs1 is a comparison we need to move it into a
4078 separate statement. */
4079 if (TREE_CODE (rhs1) != SSA_NAME)
4080 {
4081 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4082 pattern_stmt = gimple_build_assign (tmp, rhs1);
4083 rhs1 = tmp;
776bfcea 4084 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
e6f5c25d
IE
4085 }
4086
6a3c127c
RS
4087 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4088 TYPE_VECTOR_SUBPARTS (vectype2)))
776bfcea 4089 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
6a3c127c
RS
4090 else
4091 tmp = rhs1;
e6f5c25d
IE
4092
4093 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4094 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4095 gimple_assign_rhs2 (last_stmt),
4096 gimple_assign_rhs3 (last_stmt));
4097
4098 *type_out = vectype1;
49d8df1b 4099 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
4100
4101 return pattern_stmt;
4102 }
4103
4104 /* Now check for binary boolean operations requiring conversion for
4105 one of operands. */
2568d8a1 4106 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
e6f5c25d
IE
4107 return NULL;
4108
4109 if (rhs_code != BIT_IOR_EXPR
4110 && rhs_code != BIT_XOR_EXPR
49e76ff1
IE
4111 && rhs_code != BIT_AND_EXPR
4112 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
e6f5c25d
IE
4113 return NULL;
4114
4115 rhs2 = gimple_assign_rhs2 (last_stmt);
4116
4117 rhs1_type = search_type_for_mask (rhs1, vinfo);
4118 rhs2_type = search_type_for_mask (rhs2, vinfo);
4119
4120 if (!rhs1_type || !rhs2_type
4121 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4122 return NULL;
4123
4124 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4125 {
4126 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4127 if (!vectype1)
4128 return NULL;
776bfcea 4129 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
e6f5c25d
IE
4130 }
4131 else
4132 {
4133 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4134 if (!vectype1)
4135 return NULL;
776bfcea 4136 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
e6f5c25d
IE
4137 }
4138
4139 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4140 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4141
4142 *type_out = vectype1;
49d8df1b 4143 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
e6f5c25d
IE
4144
4145 return pattern_stmt;
4146}
4147
32e8e429 4148/* STMT_INFO is a load or store. If the load or store is conditional, return
bfaa08b7
RS
4149 the boolean condition under which it occurs, otherwise return null. */
4150
4151static tree
32e8e429 4152vect_get_load_store_mask (stmt_vec_info stmt_info)
bfaa08b7 4153{
32e8e429 4154 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
bfaa08b7
RS
4155 {
4156 gcc_assert (gimple_assign_single_p (def_assign));
4157 return NULL_TREE;
4158 }
4159
32e8e429 4160 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
bfaa08b7
RS
4161 {
4162 internal_fn ifn = gimple_call_internal_fn (def_call);
4163 int mask_index = internal_fn_mask_index (ifn);
4164 return gimple_call_arg (def_call, mask_index);
4165 }
4166
4167 gcc_unreachable ();
4168}
4169
4170/* Return the scalar offset type that an internal gather/scatter function
4171 should use. GS_INFO describes the gather/scatter operation. */
4172
4173static tree
4174vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4175{
4176 tree offset_type = TREE_TYPE (gs_info->offset);
4177 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4178
4179 /* Enforced by vect_check_gather_scatter. */
4180 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4181 gcc_assert (element_bits >= offset_bits);
4182
4183 /* If the offset is narrower than the elements, extend it according
4184 to its sign. */
4185 if (element_bits > offset_bits)
4186 return build_nonstandard_integer_type (element_bits,
4187 TYPE_UNSIGNED (offset_type));
4188
4189 return offset_type;
4190}
4191
4192/* Return MASK if MASK is suitable for masking an operation on vectors
4193 of type VECTYPE, otherwise convert it into such a form and return
4194 the result. Associate any conversion statements with STMT_INFO's
4195 pattern. */
4196
4197static tree
4198vect_convert_mask_for_vectype (tree mask, tree vectype,
4199 stmt_vec_info stmt_info, vec_info *vinfo)
4200{
4201 tree mask_type = search_type_for_mask (mask, vinfo);
4202 if (mask_type)
4203 {
4204 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4205 if (mask_vectype
4206 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4207 TYPE_VECTOR_SUBPARTS (mask_vectype)))
776bfcea 4208 mask = build_mask_conversion (mask, vectype, stmt_info);
bfaa08b7
RS
4209 }
4210 return mask;
4211}
4212
4213/* Return the equivalent of:
4214
4215 fold_convert (TYPE, VALUE)
4216
4217 with the expectation that the operation will be vectorized.
4218 If new statements are needed, add them as pattern statements
4219 to STMT_INFO. */
4220
4221static tree
776bfcea 4222vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
bfaa08b7
RS
4223{
4224 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4225 return value;
4226
4227 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4228 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
776bfcea
RS
4229 append_pattern_def_seq (stmt_info, conversion,
4230 get_vectype_for_scalar_type (type));
bfaa08b7
RS
4231 return new_value;
4232}
4233
ba9728b0 4234/* Try to convert STMT_INFO into a call to a gather load or scatter store
bfaa08b7 4235 internal function. Return the final statement on success and set
1cbfeccc 4236 *TYPE_OUT to the vector type being loaded or stored.
bfaa08b7
RS
4237
4238 This function only handles gathers and scatters that were recognized
4239 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4240
4241static gimple *
ba9728b0 4242vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
bfaa08b7
RS
4243{
4244 /* Currently we only support this for loop vectorization. */
bfaa08b7
RS
4245 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4246 if (!loop_vinfo)
4247 return NULL;
4248
4249 /* Make sure that we're looking at a gather load or scatter store. */
4250 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4251 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4252 return NULL;
4253
bfaa08b7
RS
4254 /* Get the boolean that controls whether the load or store happens.
4255 This is null if the operation is unconditional. */
86a91c0a 4256 tree mask = vect_get_load_store_mask (stmt_info);
bfaa08b7
RS
4257
4258 /* Make sure that the target supports an appropriate internal
4259 function for the gather/scatter operation. */
4260 gather_scatter_info gs_info;
86a91c0a 4261 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
bfaa08b7
RS
4262 || gs_info.decl)
4263 return NULL;
4264
4265 /* Convert the mask to the right form. */
4266 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4267 if (mask)
ba9728b0 4268 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
bfaa08b7
RS
4269 loop_vinfo);
4270
4271 /* Get the invariant base and non-invariant offset, converting the
4272 latter to the same width as the vector elements. */
4273 tree base = gs_info.base;
4274 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
776bfcea
RS
4275 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4276 stmt_info);
bfaa08b7
RS
4277
4278 /* Build the new pattern statement. */
4279 tree scale = size_int (gs_info.scale);
4280 gcall *pattern_stmt;
4281 if (DR_IS_READ (dr))
4282 {
4283 if (mask != NULL)
4284 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4285 offset, scale, mask);
4286 else
4287 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4288 offset, scale);
4289 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4290 gimple_call_set_lhs (pattern_stmt, load_lhs);
4291 }
4292 else
f307441a 4293 {
86a91c0a 4294 tree rhs = vect_get_store_rhs (stmt_info);
f307441a
RS
4295 if (mask != NULL)
4296 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4297 base, offset, scale, rhs,
4298 mask);
4299 else
4300 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4301 base, offset, scale, rhs);
4302 }
bfaa08b7
RS
4303 gimple_call_set_nothrow (pattern_stmt, true);
4304
4305 /* Copy across relevant vectorization info and associate DR with the
4306 new pattern statement instead of the original statement. */
4fbeb363 4307 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
f44fb7aa 4308 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
bfaa08b7
RS
4309
4310 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4311 *type_out = vectype;
86a91c0a 4312 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
bfaa08b7
RS
4313
4314 return pattern_stmt;
4315}
4316
370c2ebe
RS
4317/* Return true if TYPE is a non-boolean integer type. These are the types
4318 that we want to consider for narrowing. */
4319
4320static bool
4321vect_narrowable_type_p (tree type)
4322{
4323 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4324}
4325
4326/* Return true if the operation given by CODE can be truncated to N bits
4327 when only N bits of the output are needed. This is only true if bit N+1
4328 of the inputs has no effect on the low N bits of the result. */
4329
4330static bool
4331vect_truncatable_operation_p (tree_code code)
4332{
4333 switch (code)
4334 {
4335 case PLUS_EXPR:
4336 case MINUS_EXPR:
4337 case MULT_EXPR:
4338 case BIT_AND_EXPR:
4339 case BIT_IOR_EXPR:
4340 case BIT_XOR_EXPR:
4341 case COND_EXPR:
4342 return true;
4343
4344 default:
4345 return false;
4346 }
4347}
4348
4349/* Record that STMT_INFO could be changed from operating on TYPE to
4350 operating on a type with the precision and sign given by PRECISION
4351 and SIGN respectively. PRECISION is an arbitrary bit precision;
4352 it might not be a whole number of bytes. */
4353
4354static void
4355vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4356 unsigned int precision, signop sign)
4357{
4358 /* Round the precision up to a whole number of bytes. */
4359 precision = vect_element_precision (precision);
4360 if (precision < TYPE_PRECISION (type)
4361 && (!stmt_info->operation_precision
4362 || stmt_info->operation_precision > precision))
4363 {
4364 stmt_info->operation_precision = precision;
4365 stmt_info->operation_sign = sign;
4366 }
4367}
4368
4369/* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4370 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4371 is an arbitrary bit precision; it might not be a whole number of bytes. */
4372
4373static void
4374vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4375 unsigned int min_input_precision)
4376{
4377 /* This operation in isolation only requires the inputs to have
4378 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4379 that MIN_INPUT_PRECISION is a natural precision for the chain
4380 as a whole. E.g. consider something like:
4381
4382 unsigned short *x, *y;
4383 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4384
4385 The right shift can be done on unsigned chars, and only requires the
4386 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4387 approach would mean turning a natural chain of single-vector unsigned
4388 short operations into one that truncates "*x" and then extends
4389 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4390 operation and one vector for each unsigned char operation.
4391 This would be a significant pessimization.
4392
4393 Instead only propagate the maximum of this precision and the precision
4394 required by the users of the result. This means that we don't pessimize
4395 the case above but continue to optimize things like:
4396
4397 unsigned char *y;
4398 unsigned short *x;
4399 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4400
4401 Here we would truncate two vectors of *x to a single vector of
4402 unsigned chars and use single-vector unsigned char operations for
4403 everything else, rather than doing two unsigned short copies of
4404 "(*x & 0xf0) >> 4" and then truncating the result. */
4405 min_input_precision = MAX (min_input_precision,
4406 stmt_info->min_output_precision);
4407
4408 if (min_input_precision < TYPE_PRECISION (type)
4409 && (!stmt_info->min_input_precision
4410 || stmt_info->min_input_precision > min_input_precision))
4411 stmt_info->min_input_precision = min_input_precision;
4412}
4413
4414/* Subroutine of vect_determine_min_output_precision. Return true if
4415 we can calculate a reduced number of output bits for STMT_INFO,
4416 whose result is LHS. */
4417
4418static bool
4419vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4420{
4421 /* Take the maximum precision required by users of the result. */
6585ff8f 4422 vec_info *vinfo = stmt_info->vinfo;
370c2ebe
RS
4423 unsigned int precision = 0;
4424 imm_use_iterator iter;
4425 use_operand_p use;
4426 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4427 {
4428 gimple *use_stmt = USE_STMT (use);
4429 if (is_gimple_debug (use_stmt))
4430 continue;
6585ff8f
RS
4431 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4432 if (!use_stmt_info || !use_stmt_info->min_input_precision)
370c2ebe 4433 return false;
047fba34
RS
4434 /* The input precision recorded for COND_EXPRs applies only to the
4435 "then" and "else" values. */
4436 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4437 if (assign
4438 && gimple_assign_rhs_code (assign) == COND_EXPR
4439 && use->use != gimple_assign_rhs2_ptr (assign)
4440 && use->use != gimple_assign_rhs3_ptr (assign))
4441 return false;
370c2ebe
RS
4442 precision = MAX (precision, use_stmt_info->min_input_precision);
4443 }
4444
4445 if (dump_enabled_p ())
3c2a8ed0
DM
4446 dump_printf_loc (MSG_NOTE, vect_location,
4447 "only the low %d bits of %T are significant\n",
4448 precision, lhs);
370c2ebe
RS
4449 stmt_info->min_output_precision = precision;
4450 return true;
4451}
4452
4453/* Calculate min_output_precision for STMT_INFO. */
4454
4455static void
4456vect_determine_min_output_precision (stmt_vec_info stmt_info)
4457{
4458 /* We're only interested in statements with a narrowable result. */
4459 tree lhs = gimple_get_lhs (stmt_info->stmt);
4460 if (!lhs
4461 || TREE_CODE (lhs) != SSA_NAME
4462 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4463 return;
4464
4465 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4466 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4467}
4468
4469/* Use range information to decide whether STMT (described by STMT_INFO)
4470 could be done in a narrower type. This is effectively a forward
4471 propagation, since it uses context-independent information that applies
4472 to all users of an SSA name. */
4473
4474static void
4475vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4476{
4477 tree lhs = gimple_assign_lhs (stmt);
4478 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4479 return;
4480
4481 tree type = TREE_TYPE (lhs);
4482 if (!vect_narrowable_type_p (type))
4483 return;
4484
4485 /* First see whether we have any useful range information for the result. */
4486 unsigned int precision = TYPE_PRECISION (type);
4487 signop sign = TYPE_SIGN (type);
4488 wide_int min_value, max_value;
4489 if (!vect_get_range_info (lhs, &min_value, &max_value))
4490 return;
4491
4492 tree_code code = gimple_assign_rhs_code (stmt);
4493 unsigned int nops = gimple_num_ops (stmt);
4494
4495 if (!vect_truncatable_operation_p (code))
4496 /* Check that all relevant input operands are compatible, and update
4497 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4498 for (unsigned int i = 1; i < nops; ++i)
4499 {
4500 tree op = gimple_op (stmt, i);
4501 if (TREE_CODE (op) == INTEGER_CST)
4502 {
4503 /* Don't require the integer to have RHS_TYPE (which it might
4504 not for things like shift amounts, etc.), but do require it
4505 to fit the type. */
4506 if (!int_fits_type_p (op, type))
4507 return;
4508
4509 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4510 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4511 }
4512 else if (TREE_CODE (op) == SSA_NAME)
4513 {
4514 /* Ignore codes that don't take uniform arguments. */
4515 if (!types_compatible_p (TREE_TYPE (op), type))
4516 return;
4517
4518 wide_int op_min_value, op_max_value;
4519 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4520 return;
4521
4522 min_value = wi::min (min_value, op_min_value, sign);
4523 max_value = wi::max (max_value, op_max_value, sign);
4524 }
4525 else
4526 return;
4527 }
4528
4529 /* Try to switch signed types for unsigned types if we can.
4530 This is better for two reasons. First, unsigned ops tend
4531 to be cheaper than signed ops. Second, it means that we can
4532 handle things like:
4533
4534 signed char c;
4535 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4536
4537 as:
4538
4539 signed char c;
4540 unsigned short res_1 = (unsigned short) c & 0xff00;
4541 int res = (int) res_1;
4542
4543 where the intermediate result res_1 has unsigned rather than
4544 signed type. */
4545 if (sign == SIGNED && !wi::neg_p (min_value))
4546 sign = UNSIGNED;
4547
4548 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4549 unsigned int precision1 = wi::min_precision (min_value, sign);
4550 unsigned int precision2 = wi::min_precision (max_value, sign);
4551 unsigned int value_precision = MAX (precision1, precision2);
4552 if (value_precision >= precision)
4553 return;
4554
4555 if (dump_enabled_p ())
3c2a8ed0
DM
4556 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4557 " without loss of precision: %G",
4558 sign == SIGNED ? "signed" : "unsigned",
4559 value_precision, stmt);
370c2ebe
RS
4560
4561 vect_set_operation_type (stmt_info, type, value_precision, sign);
4562 vect_set_min_input_precision (stmt_info, type, value_precision);
4563}
4564
4565/* Use information about the users of STMT's result to decide whether
4566 STMT (described by STMT_INFO) could be done in a narrower type.
4567 This is effectively a backward propagation. */
4568
4569static void
4570vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4571{
4572 tree_code code = gimple_assign_rhs_code (stmt);
4573 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4574 tree type = TREE_TYPE (gimple_op (stmt, opno));
4575 if (!vect_narrowable_type_p (type))
4576 return;
4577
4578 unsigned int precision = TYPE_PRECISION (type);
4579 unsigned int operation_precision, min_input_precision;
4580 switch (code)
4581 {
4582 CASE_CONVERT:
4583 /* Only the bits that contribute to the output matter. Don't change
4584 the precision of the operation itself. */
4585 operation_precision = precision;
4586 min_input_precision = stmt_info->min_output_precision;
4587 break;
4588
4589 case LSHIFT_EXPR:
4590 case RSHIFT_EXPR:
4591 {
4592 tree shift = gimple_assign_rhs2 (stmt);
4593 if (TREE_CODE (shift) != INTEGER_CST
4594 || !wi::ltu_p (wi::to_widest (shift), precision))
4595 return;
4596 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4597 if (code == LSHIFT_EXPR)
4598 {
4599 /* We need CONST_SHIFT fewer bits of the input. */
4600 operation_precision = stmt_info->min_output_precision;
4601 min_input_precision = (MAX (operation_precision, const_shift)
4602 - const_shift);
4603 }
4604 else
4605 {
4606 /* We need CONST_SHIFT extra bits to do the operation. */
4607 operation_precision = (stmt_info->min_output_precision
4608 + const_shift);
4609 min_input_precision = operation_precision;
4610 }
4611 break;
4612 }
4613
4614 default:
4615 if (vect_truncatable_operation_p (code))
4616 {
4617 /* Input bit N has no effect on output bits N-1 and lower. */
4618 operation_precision = stmt_info->min_output_precision;
4619 min_input_precision = operation_precision;
4620 break;
4621 }
4622 return;
4623 }
4624
4625 if (operation_precision < precision)
4626 {
4627 if (dump_enabled_p ())
3c2a8ed0
DM
4628 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4629 " without affecting users: %G",
4630 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4631 operation_precision, stmt);
370c2ebe
RS
4632 vect_set_operation_type (stmt_info, type, operation_precision,
4633 TYPE_SIGN (type));
4634 }
4635 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4636}
4637
4638/* Handle vect_determine_precisions for STMT_INFO, given that we
4639 have already done so for the users of its result. */
4640
4641void
4642vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4643{
4644 vect_determine_min_output_precision (stmt_info);
4645 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4646 {
4647 vect_determine_precisions_from_range (stmt_info, stmt);
4648 vect_determine_precisions_from_users (stmt_info, stmt);
4649 }
4650}
4651
4652/* Walk backwards through the vectorizable region to determine the
4653 values of these fields:
4654
4655 - min_output_precision
4656 - min_input_precision
4657 - operation_precision
4658 - operation_sign. */
4659
4660void
4661vect_determine_precisions (vec_info *vinfo)
4662{
4663 DUMP_VECT_SCOPE ("vect_determine_precisions");
4664
4665 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4666 {
99b1c316 4667 class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
370c2ebe
RS
4668 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4669 unsigned int nbbs = loop->num_nodes;
4670
4671 for (unsigned int i = 0; i < nbbs; i++)
4672 {
4673 basic_block bb = bbs[nbbs - i - 1];
4674 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4675 !gsi_end_p (si); gsi_prev (&si))
6585ff8f
RS
4676 vect_determine_stmt_precisions
4677 (vinfo->lookup_stmt (gsi_stmt (si)));
370c2ebe
RS
4678 }
4679 }
4680 else
4681 {
4682 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4683 gimple_stmt_iterator si = bb_vinfo->region_end;
4684 gimple *stmt;
4685 do
4686 {
4687 if (!gsi_stmt (si))
4688 si = gsi_last_bb (bb_vinfo->bb);
4689 else
4690 gsi_prev (&si);
4691 stmt = gsi_stmt (si);
6585ff8f 4692 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
370c2ebe
RS
4693 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4694 vect_determine_stmt_precisions (stmt_info);
4695 }
4696 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4697 }
4698}
4699
ba9728b0 4700typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
1cbfeccc
RS
4701
4702struct vect_recog_func
4703{
4704 vect_recog_func_ptr fn;
4705 const char *name;
4706};
4707
4708/* Note that ordering matters - the first pattern matching on a stmt is
4709 taken which means usually the more complex one needs to preceed the
4710 less comples onex (widen_sum only after dot_prod or sad for example). */
4711static vect_recog_func vect_vect_recog_func_ptrs[] = {
370c2ebe 4712 { vect_recog_over_widening_pattern, "over_widening" },
0267732b
RS
4713 /* Must come after over_widening, which narrows the shift as much as
4714 possible beforehand. */
4715 { vect_recog_average_pattern, "average" },
370c2ebe 4716 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
1cbfeccc
RS
4717 { vect_recog_widen_mult_pattern, "widen_mult" },
4718 { vect_recog_dot_prod_pattern, "dot_prod" },
4719 { vect_recog_sad_pattern, "sad" },
4720 { vect_recog_widen_sum_pattern, "widen_sum" },
4721 { vect_recog_pow_pattern, "pow" },
4722 { vect_recog_widen_shift_pattern, "widen_shift" },
1cbfeccc
RS
4723 { vect_recog_rotate_pattern, "rotate" },
4724 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4725 { vect_recog_divmod_pattern, "divmod" },
4726 { vect_recog_mult_pattern, "mult" },
4727 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4728 { vect_recog_bool_pattern, "bool" },
4729 /* This must come before mask conversion, and includes the parts
4730 of mask conversion that are needed for gather and scatter
4731 internal functions. */
4732 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4733 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4734};
4735
4736const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4737
1107f3ae
IR
4738/* Mark statements that are involved in a pattern. */
4739
4740static inline void
cef6cac8 4741vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
1107f3ae
IR
4742 tree pattern_vectype)
4743{
41949de9 4744 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
1107f3ae 4745
cef6cac8
RS
4746 gimple *orig_pattern_stmt = NULL;
4747 if (is_pattern_stmt_p (orig_stmt_info))
ab0ef706 4748 {
41949de9
RS
4749 /* We're replacing a statement in an existing pattern definition
4750 sequence. */
cef6cac8 4751 orig_pattern_stmt = orig_stmt_info->stmt;
41949de9 4752 if (dump_enabled_p ())
3c2a8ed0
DM
4753 dump_printf_loc (MSG_NOTE, vect_location,
4754 "replacing earlier pattern %G", orig_pattern_stmt);
41949de9
RS
4755
4756 /* To keep the book-keeping simple, just swap the lhs of the
4757 old and new statements, so that the old one has a valid but
4758 unused lhs. */
cef6cac8
RS
4759 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4760 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
41949de9
RS
4761 gimple_set_lhs (pattern_stmt, old_lhs);
4762
4763 if (dump_enabled_p ())
3c2a8ed0 4764 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
41949de9
RS
4765
4766 /* Switch to the statement that ORIG replaces. */
10681ce8 4767 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
41949de9
RS
4768
4769 /* We shouldn't be replacing the main pattern statement. */
cef6cac8
RS
4770 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4771 != orig_pattern_stmt);
ab0ef706 4772 }
1107f3ae 4773
41949de9 4774 if (def_seq)
e3947d80
RS
4775 for (gimple_stmt_iterator si = gsi_start (def_seq);
4776 !gsi_end_p (si); gsi_next (&si))
154fb72b
RB
4777 {
4778 stmt_vec_info pattern_stmt_info
4779 = vect_init_pattern_stmt (gsi_stmt (si),
4780 orig_stmt_info, pattern_vectype);
4781 /* Stmts in the def sequence are not vectorizable cycle or
4782 induction defs, instead they should all be vect_internal_def
4783 feeding the main pattern stmt which retains this def type. */
4784 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
4785 }
41949de9 4786
cef6cac8 4787 if (orig_pattern_stmt)
41949de9
RS
4788 {
4789 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4790
4791 /* Insert all the new pattern statements before the original one. */
4792 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
cef6cac8
RS
4793 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4794 orig_def_seq);
41949de9
RS
4795 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4796 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
3239dde9
RS
4797
4798 /* Remove the pattern statement that this new pattern replaces. */
4799 gsi_remove (&gsi, false);
41949de9
RS
4800 }
4801 else
4802 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
1107f3ae
IR
4803}
4804
b8698a0f 4805/* Function vect_pattern_recog_1
20f06221
DN
4806
4807 Input:
4808 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4809 computation pattern.
cef6cac8 4810 STMT_INFO: A stmt from which the pattern search should start.
20f06221 4811
1cbfeccc
RS
4812 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4813 a sequence of statements that has the same functionality and can be
cef6cac8
RS
4814 used to replace STMT_INFO. It returns the last statement in the sequence
4815 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
1cbfeccc
RS
4816 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4817 statement, having first checked that the target supports the new operation
4818 in that type.
20f06221 4819
b8698a0f 4820 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
4821 for vect_recog_pattern. */
4822
41949de9 4823static void
cef6cac8 4824vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
20f06221 4825{
cef6cac8
RS
4826 vec_info *vinfo = stmt_info->vinfo;
4827 gimple *pattern_stmt;
383d9c83 4828 loop_vec_info loop_vinfo;
20f06221 4829 tree pattern_vectype;
20f06221 4830
41949de9
RS
4831 /* If this statement has already been replaced with pattern statements,
4832 leave the original statement alone, since the first match wins.
4833 Instead try to match against the definition statements that feed
4834 the main pattern statement. */
41949de9
RS
4835 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4836 {
4837 gimple_stmt_iterator gsi;
4838 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4839 !gsi_end_p (gsi); gsi_next (&gsi))
cef6cac8 4840 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
41949de9
RS
4841 return;
4842 }
4843
9c58fb7a 4844 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
ba9728b0 4845 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
726a989a 4846 if (!pattern_stmt)
df0aef6d 4847 {
3239dde9
RS
4848 /* Clear any half-formed pattern definition sequence. */
4849 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
41949de9 4850 return;
df0aef6d 4851 }
b8698a0f 4852
383d9c83 4853 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1cbfeccc 4854 gcc_assert (pattern_vectype);
383d9c83 4855
20f06221 4856 /* Found a vectorizable pattern. */
73fbfcad 4857 if (dump_enabled_p ())
3c2a8ed0
DM
4858 dump_printf_loc (MSG_NOTE, vect_location,
4859 "%s pattern recognized: %G",
4860 recog_func->name, pattern_stmt);
b8698a0f 4861
726a989a 4862 /* Mark the stmts that are involved in the pattern. */
cef6cac8 4863 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
20f06221 4864
b5aeb3bb
IR
4865 /* Patterns cannot be vectorized using SLP, because they change the order of
4866 computation. */
f5709183 4867 if (loop_vinfo)
b94c2dc1
TV
4868 {
4869 unsigned ix, ix2;
32c91dfc 4870 stmt_vec_info *elem_ptr;
b94c2dc1 4871 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
32c91dfc 4872 elem_ptr, *elem_ptr == stmt_info);
b94c2dc1 4873 }
20f06221
DN
4874}
4875
4876
4877/* Function vect_pattern_recog
4878
4879 Input:
4880 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4881 computation idioms.
4882
9d5e7640
IR
4883 Output - for each computation idiom that is detected we create a new stmt
4884 that provides the same functionality and that can be vectorized. We
20f06221
DN
4885 also record some information in the struct_stmt_info of the relevant
4886 stmts, as explained below:
4887
4888 At the entry to this function we have the following stmts, with the
4889 following initial value in the STMT_VINFO fields:
4890
4891 stmt in_pattern_p related_stmt vec_stmt
4892 S1: a_i = .... - - -
4893 S2: a_2 = ..use(a_i).. - - -
4894 S3: a_1 = ..use(a_2).. - - -
4895 S4: a_0 = ..use(a_1).. - - -
4896 S5: ... = ..use(a_0).. - - -
4897
4898 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
4899 represented by a single stmt. We then:
4900 - create a new stmt S6 equivalent to the pattern (the stmt is not
4901 inserted into the code)
20f06221
DN
4902 - fill in the STMT_VINFO fields as follows:
4903
4904 in_pattern_p related_stmt vec_stmt
b8698a0f 4905 S1: a_i = .... - - -
20f06221
DN
4906 S2: a_2 = ..use(a_i).. - - -
4907 S3: a_1 = ..use(a_2).. - - -
20f06221 4908 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 4909 '---> S6: a_new = .... - S4 -
20f06221
DN
4910 S5: ... = ..use(a_0).. - - -
4911
4912 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 4913 to each other through the RELATED_STMT field).
20f06221
DN
4914
4915 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4916 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4917 remain irrelevant unless used by stmts other than S4.
4918
4919 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 4920 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
4921 a pointer to the new vector stmt VS6 from S6 (as usual).
4922 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
4923
4924 in_pattern_p related_stmt vec_stmt
4925 S1: a_i = .... - - -
4926 S2: a_2 = ..use(a_i).. - - -
4927 S3: a_1 = ..use(a_2).. - - -
4928 > VS6: va_new = .... - - -
20f06221 4929 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 4930 '---> S6: a_new = .... - S4 VS6
20f06221
DN
4931 > VS5: ... = ..vuse(va_new).. - - -
4932 S5: ... = ..use(a_0).. - - -
4933
9d5e7640 4934 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
4935 elsewhere), and we'll end up with:
4936
b8698a0f 4937 VS6: va_new = ....
83197f37
IR
4938 VS5: ... = ..vuse(va_new)..
4939
4940 In case of more than one pattern statements, e.g., widen-mult with
4941 intermediate type:
4942
4943 S1 a_t = ;
4944 S2 a_T = (TYPE) a_t;
4945 '--> S3: a_it = (interm_type) a_t;
4946 S4 prod_T = a_T * CONST;
4947 '--> S5: prod_T' = a_it w* CONST;
4948
4949 there may be other users of a_T outside the pattern. In that case S2 will
4950 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4951 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4952 be recorded in S3. */
20f06221
DN
4953
4954void
310213d4 4955vect_pattern_recog (vec_info *vinfo)
20f06221 4956{
99b1c316 4957 class loop *loop;
772e61e1 4958 basic_block *bbs;
f5709183 4959 unsigned int nbbs;
726a989a 4960 gimple_stmt_iterator si;
20f06221 4961 unsigned int i, j;
20f06221 4962
370c2ebe
RS
4963 vect_determine_precisions (vinfo);
4964
adac3a68 4965 DUMP_VECT_SCOPE ("vect_pattern_recog");
20f06221 4966
310213d4 4967 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
f5709183
IR
4968 {
4969 loop = LOOP_VINFO_LOOP (loop_vinfo);
4970 bbs = LOOP_VINFO_BBS (loop_vinfo);
4971 nbbs = loop->num_nodes;
61d371eb
RB
4972
4973 /* Scan through the loop stmts, applying the pattern recognition
4974 functions starting at each stmt visited: */
4975 for (i = 0; i < nbbs; i++)
4976 {
4977 basic_block bb = bbs[i];
4978 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
cef6cac8
RS
4979 {
4980 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4981 /* Scan over all generic vect_recog_xxx_pattern functions. */
4982 for (j = 0; j < NUM_PATTERNS; j++)
4983 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4984 stmt_info);
4985 }
61d371eb 4986 }
f5709183
IR
4987 }
4988 else
4989 {
61d371eb
RB
4990 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4991 for (si = bb_vinfo->region_begin;
4992 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4993 {
df0aef6d 4994 gimple *stmt = gsi_stmt (si);
6585ff8f 4995 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
41949de9 4996 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
61d371eb 4997 continue;
f5709183 4998
61d371eb
RB
4999 /* Scan over all generic vect_recog_xxx_pattern functions. */
5000 for (j = 0; j < NUM_PATTERNS; j++)
cef6cac8 5001 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
61d371eb 5002 }
20f06221
DN
5003 }
5004}