]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
4a61a337 1/* Analysis Utilities for Loop Vectorization.
fbd26352 2 Copyright (C) 2006-2019 Free Software Foundation, Inc.
4a61a337 3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
8c4c00c1 9Software Foundation; either version 3, or (at your option) any later
4a61a337 10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
4a61a337 20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
9ef16211 24#include "backend.h"
7c29e30e 25#include "rtl.h"
4a61a337 26#include "tree.h"
9ef16211 27#include "gimple.h"
9ef16211 28#include "ssa.h"
7c29e30e 29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
b20a8bb4 33#include "fold-const.h"
9ed99284 34#include "stor-layout.h"
bc61cadb 35#include "tree-eh.h"
a8783bee 36#include "gimplify.h"
dcf1a1ec 37#include "gimple-iterator.h"
4a61a337 38#include "cfgloop.h"
4a61a337 39#include "tree-vectorizer.h"
b9ed1410 40#include "dumpfile.h"
f7715905 41#include "builtins.h"
901db6c4 42#include "internal-fn.h"
62dbd209 43#include "case-cfn-macros.h"
a35ca444 44#include "fold-const-call.h"
45#include "attribs.h"
46#include "cgraph.h"
47#include "omp-simd-clone.h"
1c91e6c2 48#include "predict.h"
4a61a337 49
18bbd2f1 50/* Return true if we have a useful VR_RANGE range for VAR, storing it
51 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
52
53static bool
54vect_get_range_info (tree var, wide_int *min_value, wide_int *max_value)
55{
be44111e 56 value_range_kind vr_type = get_range_info (var, min_value, max_value);
18bbd2f1 57 wide_int nonzero = get_nonzero_bits (var);
58 signop sgn = TYPE_SIGN (TREE_TYPE (var));
59 if (intersect_range_with_nonzero_bits (vr_type, min_value, max_value,
60 nonzero, sgn) == VR_RANGE)
61 {
62 if (dump_enabled_p ())
63 {
64 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
65 dump_printf (MSG_NOTE, " has range [");
66 dump_hex (MSG_NOTE, *min_value);
67 dump_printf (MSG_NOTE, ", ");
68 dump_hex (MSG_NOTE, *max_value);
69 dump_printf (MSG_NOTE, "]\n");
70 }
71 return true;
72 }
73 else
74 {
75 if (dump_enabled_p ())
76 {
77 dump_generic_expr_loc (MSG_NOTE, vect_location, TDF_SLIM, var);
78 dump_printf (MSG_NOTE, " has no range info\n");
79 }
80 return false;
81 }
82}
83
972163bf 84/* Report that we've found an instance of pattern PATTERN in
85 statement STMT. */
86
87static void
88vect_pattern_detected (const char *name, gimple *stmt)
89{
90 if (dump_enabled_p ())
a4e972e3 91 dump_printf_loc (MSG_NOTE, vect_location, "%s: detected: %G", name, stmt);
972163bf 92}
93
aebdbd31 94/* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
95 return the pattern statement's stmt_vec_info. Set its vector type to
96 VECTYPE if it doesn't have one already. */
d5b5c2c5 97
aebdbd31 98static stmt_vec_info
d5b5c2c5 99vect_init_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
100 tree vectype)
101{
03c0d666 102 vec_info *vinfo = orig_stmt_info->vinfo;
103 stmt_vec_info pattern_stmt_info = vinfo->lookup_stmt (pattern_stmt);
a477acc5 104 if (pattern_stmt_info == NULL)
04b2391d 105 pattern_stmt_info = orig_stmt_info->vinfo->add_stmt (pattern_stmt);
d5b5c2c5 106 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt_info->stmt));
107
e05b01ad 108 pattern_stmt_info->pattern_stmt_p = true;
aebdbd31 109 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt_info;
d5b5c2c5 110 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
111 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
112 if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
113 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
aebdbd31 114 return pattern_stmt_info;
d5b5c2c5 115}
116
117/* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
118 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
119 have one already. */
120
121static void
122vect_set_pattern_stmt (gimple *pattern_stmt, stmt_vec_info orig_stmt_info,
123 tree vectype)
124{
125 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
aebdbd31 126 STMT_VINFO_RELATED_STMT (orig_stmt_info)
127 = vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, vectype);
d5b5c2c5 128}
129
e10293de 130/* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
131 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
132 be different from the vector type of the final pattern statement. */
133
6d741312 134static inline void
e10293de 135append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
136 tree vectype = NULL_TREE)
6d741312 137{
e10293de 138 vec_info *vinfo = stmt_info->vinfo;
139 if (vectype)
140 {
04b2391d 141 stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
e10293de 142 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
143 }
e0d98d5f 144 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
e10293de 145 new_stmt);
6d741312 146}
147
57abb697 148/* The caller wants to perform new operations on vect_external variable
149 VAR, so that the result of the operations would also be vect_external.
150 Return the edge on which the operations can be performed, if one exists.
151 Return null if the operations should instead be treated as part of
152 the pattern that needs them. */
153
154static edge
155vect_get_external_def_edge (vec_info *vinfo, tree var)
156{
157 edge e = NULL;
158 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
159 {
160 e = loop_preheader_edge (loop_vinfo->loop);
161 if (!SSA_NAME_IS_DEFAULT_DEF (var))
162 {
163 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (var));
164 if (bb == NULL
165 || !dominated_by_p (CDI_DOMINATORS, e->dest, bb))
166 e = NULL;
167 }
168 }
169 return e;
170}
171
1ba54d4e 172/* Return true if the target supports a vector version of CODE,
173 where CODE is known to map to a direct optab. ITYPE specifies
174 the type of (some of) the scalar inputs and OTYPE specifies the
175 type of the scalar result.
176
177 If CODE allows the inputs and outputs to have different type
178 (such as for WIDEN_SUM_EXPR), it is the input mode rather
179 than the output mode that determines the appropriate target pattern.
180 Operand 0 of the target pattern then specifies the mode that the output
181 must have.
182
183 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
184 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
185 is nonnull. */
186
187static bool
188vect_supportable_direct_optab_p (tree otype, tree_code code,
189 tree itype, tree *vecotype_out,
190 tree *vecitype_out = NULL)
191{
192 tree vecitype = get_vectype_for_scalar_type (itype);
193 if (!vecitype)
194 return false;
195
196 tree vecotype = get_vectype_for_scalar_type (otype);
197 if (!vecotype)
198 return false;
199
200 optab optab = optab_for_tree_code (code, vecitype, optab_default);
201 if (!optab)
202 return false;
203
204 insn_code icode = optab_handler (optab, TYPE_MODE (vecitype));
205 if (icode == CODE_FOR_nothing
206 || insn_data[icode].operand[0].mode != TYPE_MODE (vecotype))
207 return false;
208
209 *vecotype_out = vecotype;
210 if (vecitype_out)
211 *vecitype_out = vecitype;
212 return true;
213}
214
e10293de 215/* Round bit precision PRECISION up to a full element. */
216
217static unsigned int
218vect_element_precision (unsigned int precision)
219{
220 precision = 1 << ceil_log2 (precision);
221 return MAX (precision, BITS_PER_UNIT);
222}
223
c32b0020 224/* If OP is defined by a statement that's being considered for vectorization,
225 return information about that statement, otherwise return NULL. */
226
227static stmt_vec_info
228vect_get_internal_def (vec_info *vinfo, tree op)
229{
9cfd4e76 230 stmt_vec_info def_stmt_info = vinfo->lookup_def (op);
231 if (def_stmt_info
232 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def)
233 return def_stmt_info;
234 return NULL;
c32b0020 235}
236
ecc42a77 237/* Check whether NAME, an ssa-name used in STMT_VINFO,
a2287001 238 is a result of a type promotion, such that:
4a61a337 239 DEF_STMT: NAME = NOP (name0)
0eee81bc 240 If CHECK_SIGN is TRUE, check that either both types are signed or both are
241 unsigned. */
4a61a337 242
243static bool
ecc42a77 244type_conversion_p (tree name, stmt_vec_info stmt_vinfo, bool check_sign,
42acab1c 245 tree *orig_type, gimple **def_stmt, bool *promotion)
4a61a337 246{
4a61a337 247 tree type = TREE_TYPE (name);
248 tree oprnd0;
249 enum vect_def_type dt;
4a61a337 250
bfa5bad6 251 stmt_vec_info def_stmt_info;
252 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, &dt, &def_stmt_info,
253 def_stmt))
4a61a337 254 return false;
255
f083cd24 256 if (dt != vect_internal_def
257 && dt != vect_external_def && dt != vect_constant_def)
4a61a337 258 return false;
259
087903db 260 if (!*def_stmt)
4a61a337 261 return false;
262
75a70cf9 263 if (!is_gimple_assign (*def_stmt))
4a61a337 264 return false;
265
087903db 266 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
4a61a337 267 return false;
268
75a70cf9 269 oprnd0 = gimple_assign_rhs1 (*def_stmt);
4a61a337 270
087903db 271 *orig_type = TREE_TYPE (oprnd0);
272 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
273 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
274 return false;
275
276 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
277 *promotion = true;
087903db 278 else
a2287001 279 *promotion = false;
4a61a337 280
bf8b3614 281 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dt))
4a61a337 282 return false;
283
4a61a337 284 return true;
285}
286
e10293de 287/* Holds information about an input operand after some sign changes
288 and type promotions have been peeled away. */
289struct vect_unpromoted_value {
290 vect_unpromoted_value ();
291
292 void set_op (tree, vect_def_type, stmt_vec_info = NULL);
293
294 /* The value obtained after peeling away zero or more casts. */
295 tree op;
296
297 /* The type of OP. */
298 tree type;
299
300 /* The definition type of OP. */
301 vect_def_type dt;
302
303 /* If OP is the result of peeling at least one cast, and if the cast
304 of OP itself is a vectorizable statement, CASTER identifies that
305 statement, otherwise it is null. */
306 stmt_vec_info caster;
307};
308
309inline vect_unpromoted_value::vect_unpromoted_value ()
310 : op (NULL_TREE),
311 type (NULL_TREE),
312 dt (vect_uninitialized_def),
313 caster (NULL)
314{
315}
316
317/* Set the operand to OP_IN, its definition type to DT_IN, and the
318 statement that casts it to CASTER_IN. */
319
320inline void
321vect_unpromoted_value::set_op (tree op_in, vect_def_type dt_in,
322 stmt_vec_info caster_in)
323{
324 op = op_in;
325 type = TREE_TYPE (op);
326 dt = dt_in;
327 caster = caster_in;
328}
329
330/* If OP is a vectorizable SSA name, strip a sequence of integer conversions
331 to reach some vectorizable inner operand OP', continuing as long as it
332 is possible to convert OP' back to OP using a possible sign change
333 followed by a possible promotion P. Return this OP', or null if OP is
334 not a vectorizable SSA name. If there is a promotion P, describe its
18bbd2f1 335 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
336 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
337 have more than one user.
e10293de 338
339 A successful return means that it is possible to go from OP' to OP
340 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
341 whereas the cast from UNPROM to OP might be a promotion, a sign
342 change, or a nop.
343
344 E.g. say we have:
345
346 signed short *ptr = ...;
347 signed short C = *ptr;
348 unsigned short B = (unsigned short) C; // sign change
349 signed int A = (signed int) B; // unsigned promotion
350 ...possible other uses of A...
351 unsigned int OP = (unsigned int) A; // sign change
352
353 In this case it's possible to go directly from C to OP using:
354
355 OP = (unsigned int) (unsigned short) C;
356 +------------+ +--------------+
357 promotion sign change
358
359 so OP' would be C. The input to the promotion is B, so UNPROM
360 would describe B. */
361
362static tree
363vect_look_through_possible_promotion (vec_info *vinfo, tree op,
18bbd2f1 364 vect_unpromoted_value *unprom,
365 bool *single_use_p = NULL)
e10293de 366{
367 tree res = NULL_TREE;
368 tree op_type = TREE_TYPE (op);
369 unsigned int orig_precision = TYPE_PRECISION (op_type);
d45dad8d 370 unsigned int min_precision = orig_precision;
e10293de 371 stmt_vec_info caster = NULL;
372 while (TREE_CODE (op) == SSA_NAME && INTEGRAL_TYPE_P (op_type))
373 {
374 /* See whether OP is simple enough to vectorize. */
bfa5bad6 375 stmt_vec_info def_stmt_info;
e10293de 376 gimple *def_stmt;
377 vect_def_type dt;
bfa5bad6 378 if (!vect_is_simple_use (op, vinfo, &dt, &def_stmt_info, &def_stmt))
e10293de 379 break;
380
381 /* If OP is the input of a demotion, skip over it to see whether
382 OP is itself the result of a promotion. If so, the combined
383 effect of the promotion and the demotion might fit the required
384 pattern, otherwise neither operation fits.
385
386 This copes with cases such as the result of an arithmetic
387 operation being truncated before being stored, and where that
388 arithmetic operation has been recognized as an over-widened one. */
d45dad8d 389 if (TYPE_PRECISION (op_type) <= min_precision)
e10293de 390 {
391 /* Use OP as the UNPROM described above if we haven't yet
392 found a promotion, or if using the new input preserves the
393 sign of the previous promotion. */
394 if (!res
395 || TYPE_PRECISION (unprom->type) == orig_precision
396 || TYPE_SIGN (unprom->type) == TYPE_SIGN (op_type))
d45dad8d 397 {
398 unprom->set_op (op, dt, caster);
399 min_precision = TYPE_PRECISION (op_type);
400 }
e10293de 401 /* Stop if we've already seen a promotion and if this
402 conversion does more than change the sign. */
403 else if (TYPE_PRECISION (op_type)
404 != TYPE_PRECISION (unprom->type))
405 break;
406
407 /* The sequence now extends to OP. */
408 res = op;
409 }
410
411 /* See whether OP is defined by a cast. Record it as CASTER if
412 the cast is potentially vectorizable. */
413 if (!def_stmt)
414 break;
bfa5bad6 415 caster = def_stmt_info;
416
417 /* Ignore pattern statements, since we don't link uses for them. */
418 if (caster
419 && single_use_p
420 && !STMT_VINFO_RELATED_STMT (caster)
421 && !has_single_use (res))
422 *single_use_p = false;
423
e10293de 424 gassign *assign = dyn_cast <gassign *> (def_stmt);
425 if (!assign || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt)))
426 break;
427
428 /* Continue with the input to the cast. */
429 op = gimple_assign_rhs1 (def_stmt);
430 op_type = TREE_TYPE (op);
431 }
432 return res;
433}
434
435/* OP is an integer operand to an operation that returns TYPE, and we
436 want to treat the operation as a widening one. So far we can treat
437 it as widening from *COMMON_TYPE.
438
439 Return true if OP is suitable for such a widening operation,
440 either widening from *COMMON_TYPE or from some supertype of it.
441 Update *COMMON_TYPE to the supertype in the latter case.
442
443 SHIFT_P is true if OP is a shift amount. */
444
445static bool
446vect_joust_widened_integer (tree type, bool shift_p, tree op,
447 tree *common_type)
448{
449 /* Calculate the minimum precision required by OP, without changing
450 the sign of either operand. */
451 unsigned int precision;
452 if (shift_p)
453 {
454 if (!wi::leu_p (wi::to_widest (op), TYPE_PRECISION (type) / 2))
455 return false;
456 precision = TREE_INT_CST_LOW (op);
457 }
458 else
459 {
460 precision = wi::min_precision (wi::to_widest (op),
461 TYPE_SIGN (*common_type));
462 if (precision * 2 > TYPE_PRECISION (type))
463 return false;
464 }
465
466 /* If OP requires a wider type, switch to that type. The checks
467 above ensure that this is still narrower than the result. */
468 precision = vect_element_precision (precision);
469 if (TYPE_PRECISION (*common_type) < precision)
470 *common_type = build_nonstandard_integer_type
471 (precision, TYPE_UNSIGNED (*common_type));
472 return true;
473}
474
475/* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
476 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
477
478static bool
479vect_joust_widened_type (tree type, tree new_type, tree *common_type)
480{
481 if (types_compatible_p (*common_type, new_type))
482 return true;
483
484 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
485 if ((TYPE_PRECISION (new_type) < TYPE_PRECISION (*common_type))
486 && (TYPE_UNSIGNED (new_type) || !TYPE_UNSIGNED (*common_type)))
487 return true;
488
489 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
490 if (TYPE_PRECISION (*common_type) < TYPE_PRECISION (new_type)
491 && (TYPE_UNSIGNED (*common_type) || !TYPE_UNSIGNED (new_type)))
492 {
493 *common_type = new_type;
494 return true;
495 }
496
497 /* We have mismatched signs, with the signed type being
498 no wider than the unsigned type. In this case we need
499 a wider signed type. */
500 unsigned int precision = MAX (TYPE_PRECISION (*common_type),
501 TYPE_PRECISION (new_type));
502 precision *= 2;
503 if (precision * 2 > TYPE_PRECISION (type))
504 return false;
505
506 *common_type = build_nonstandard_integer_type (precision, false);
507 return true;
508}
509
510/* Check whether STMT_INFO can be viewed as a tree of integer operations
511 in which each node either performs CODE or WIDENED_CODE, and where
512 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
513 specifies the maximum number of leaf operands. SHIFT_P says whether
514 CODE and WIDENED_CODE are some sort of shift.
515
516 If STMT_INFO is such a tree, return the number of leaf operands
517 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
518 to a type that (a) is narrower than the result of STMT_INFO and
519 (b) can hold all leaf operand values.
520
521 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
522 exists. */
523
524static unsigned int
525vect_widened_op_tree (stmt_vec_info stmt_info, tree_code code,
526 tree_code widened_code, bool shift_p,
527 unsigned int max_nops,
528 vect_unpromoted_value *unprom, tree *common_type)
529{
530 /* Check for an integer operation with the right code. */
9cfd4e76 531 vec_info *vinfo = stmt_info->vinfo;
e10293de 532 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
533 if (!assign)
534 return 0;
535
536 tree_code rhs_code = gimple_assign_rhs_code (assign);
537 if (rhs_code != code && rhs_code != widened_code)
538 return 0;
539
540 tree type = gimple_expr_type (assign);
541 if (!INTEGRAL_TYPE_P (type))
542 return 0;
543
544 /* Assume that both operands will be leaf operands. */
545 max_nops -= 2;
546
547 /* Check the operands. */
548 unsigned int next_op = 0;
549 for (unsigned int i = 0; i < 2; ++i)
550 {
551 vect_unpromoted_value *this_unprom = &unprom[next_op];
552 unsigned int nops = 1;
553 tree op = gimple_op (assign, i + 1);
554 if (i == 1 && TREE_CODE (op) == INTEGER_CST)
555 {
556 /* We already have a common type from earlier operands.
557 Update it to account for OP. */
558 this_unprom->set_op (op, vect_constant_def);
559 if (!vect_joust_widened_integer (type, shift_p, op, common_type))
560 return 0;
561 }
562 else
563 {
564 /* Only allow shifts by constants. */
565 if (shift_p && i == 1)
566 return 0;
567
568 if (!vect_look_through_possible_promotion (stmt_info->vinfo, op,
569 this_unprom))
570 return 0;
571
572 if (TYPE_PRECISION (this_unprom->type) == TYPE_PRECISION (type))
573 {
574 /* The operand isn't widened. If STMT_INFO has the code
575 for an unwidened operation, recursively check whether
576 this operand is a node of the tree. */
577 if (rhs_code != code
578 || max_nops == 0
579 || this_unprom->dt != vect_internal_def)
580 return 0;
581
582 /* Give back the leaf slot allocated above now that we're
583 not treating this as a leaf operand. */
584 max_nops += 1;
585
586 /* Recursively process the definition of the operand. */
587 stmt_vec_info def_stmt_info
9cfd4e76 588 = vinfo->lookup_def (this_unprom->op);
e10293de 589 nops = vect_widened_op_tree (def_stmt_info, code, widened_code,
590 shift_p, max_nops, this_unprom,
591 common_type);
592 if (nops == 0)
593 return 0;
594
595 max_nops -= nops;
596 }
597 else
598 {
599 /* Make sure that the operand is narrower than the result. */
600 if (TYPE_PRECISION (this_unprom->type) * 2
601 > TYPE_PRECISION (type))
602 return 0;
603
604 /* Update COMMON_TYPE for the new operand. */
605 if (i == 0)
606 *common_type = this_unprom->type;
607 else if (!vect_joust_widened_type (type, this_unprom->type,
608 common_type))
609 return 0;
610 }
611 }
612 next_op += nops;
613 }
614 return next_op;
615}
616
75a70cf9 617/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
618 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
619
620static tree
42acab1c 621vect_recog_temp_ssa_var (tree type, gimple *stmt)
75a70cf9 622{
03d37e4e 623 return make_temp_ssa_name (type, stmt, "patt");
75a70cf9 624}
4a61a337 625
f0daa5a6 626/* STMT2_INFO describes a type conversion that could be split into STMT1
627 followed by a version of STMT2_INFO that takes NEW_RHS as its first
628 input. Try to do this using pattern statements, returning true on
629 success. */
630
631static bool
632vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
633 gimple *stmt1, tree vectype)
634{
635 if (is_pattern_stmt_p (stmt2_info))
636 {
637 /* STMT2_INFO is part of a pattern. Get the statement to which
638 the pattern is attached. */
aebdbd31 639 stmt_vec_info orig_stmt2_info = STMT_VINFO_RELATED_STMT (stmt2_info);
f0daa5a6 640 vect_init_pattern_stmt (stmt1, orig_stmt2_info, vectype);
641
642 if (dump_enabled_p ())
a4e972e3 643 dump_printf_loc (MSG_NOTE, vect_location,
644 "Splitting pattern statement: %G", stmt2_info->stmt);
f0daa5a6 645
646 /* Since STMT2_INFO is a pattern statement, we can change it
647 in-situ without worrying about changing the code for the
648 containing block. */
649 gimple_assign_set_rhs1 (stmt2_info->stmt, new_rhs);
650
651 if (dump_enabled_p ())
652 {
a4e972e3 653 dump_printf_loc (MSG_NOTE, vect_location, "into: %G", stmt1);
654 dump_printf_loc (MSG_NOTE, vect_location, "and: %G",
655 stmt2_info->stmt);
f0daa5a6 656 }
657
658 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info);
aebdbd31 659 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info) == stmt2_info)
f0daa5a6 660 /* STMT2_INFO is the actual pattern statement. Add STMT1
661 to the end of the definition sequence. */
662 gimple_seq_add_stmt_without_update (def_seq, stmt1);
663 else
664 {
665 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
666 before it. */
667 gimple_stmt_iterator gsi = gsi_for_stmt (stmt2_info->stmt, def_seq);
668 gsi_insert_before_without_update (&gsi, stmt1, GSI_SAME_STMT);
669 }
670 return true;
671 }
672 else
673 {
674 /* STMT2_INFO doesn't yet have a pattern. Try to create a
675 two-statement pattern now. */
676 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
677 tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
678 tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
679 if (!lhs_vectype)
680 return false;
681
682 if (dump_enabled_p ())
a4e972e3 683 dump_printf_loc (MSG_NOTE, vect_location,
684 "Splitting statement: %G", stmt2_info->stmt);
f0daa5a6 685
686 /* Add STMT1 as a singleton pattern definition sequence. */
687 gimple_seq *def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info);
688 vect_init_pattern_stmt (stmt1, stmt2_info, vectype);
689 gimple_seq_add_stmt_without_update (def_seq, stmt1);
690
691 /* Build the second of the two pattern statements. */
692 tree new_lhs = vect_recog_temp_ssa_var (lhs_type, NULL);
693 gassign *new_stmt2 = gimple_build_assign (new_lhs, NOP_EXPR, new_rhs);
694 vect_set_pattern_stmt (new_stmt2, stmt2_info, lhs_vectype);
695
696 if (dump_enabled_p ())
697 {
698 dump_printf_loc (MSG_NOTE, vect_location,
a4e972e3 699 "into pattern statements: %G", stmt1);
700 dump_printf_loc (MSG_NOTE, vect_location, "and: %G", new_stmt2);
f0daa5a6 701 }
702
703 return true;
704 }
705}
706
e10293de 707/* Convert UNPROM to TYPE and return the result, adding new statements
708 to STMT_INFO's pattern definition statements if no better way is
709 available. VECTYPE is the vector form of TYPE. */
710
711static tree
712vect_convert_input (stmt_vec_info stmt_info, tree type,
713 vect_unpromoted_value *unprom, tree vectype)
714{
715 /* Check for a no-op conversion. */
716 if (types_compatible_p (type, TREE_TYPE (unprom->op)))
717 return unprom->op;
718
719 /* Allow the caller to create constant vect_unpromoted_values. */
720 if (TREE_CODE (unprom->op) == INTEGER_CST)
721 return wide_int_to_tree (type, wi::to_widest (unprom->op));
722
6a1665ab 723 tree input = unprom->op;
e10293de 724 if (unprom->caster)
725 {
726 tree lhs = gimple_get_lhs (unprom->caster->stmt);
6a1665ab 727 tree lhs_type = TREE_TYPE (lhs);
728
729 /* If the result of the existing cast is the right width, use it
730 instead of the source of the cast. */
731 if (TYPE_PRECISION (lhs_type) == TYPE_PRECISION (type))
732 input = lhs;
733 /* If the precision we want is between the source and result
734 precisions of the existing cast, try splitting the cast into
735 two and tapping into a mid-way point. */
736 else if (TYPE_PRECISION (lhs_type) > TYPE_PRECISION (type)
737 && TYPE_PRECISION (type) > TYPE_PRECISION (unprom->type))
738 {
739 /* In order to preserve the semantics of the original cast,
740 give the mid-way point the same signedness as the input value.
741
742 It would be possible to use a signed type here instead if
743 TYPE is signed and UNPROM->TYPE is unsigned, but that would
744 make the sign of the midtype sensitive to the order in
745 which we process the statements, since the signedness of
746 TYPE is the signedness required by just one of possibly
747 many users. Also, unsigned promotions are usually as cheap
748 as or cheaper than signed ones, so it's better to keep an
749 unsigned promotion. */
750 tree midtype = build_nonstandard_integer_type
751 (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
752 tree vec_midtype = get_vectype_for_scalar_type (midtype);
753 if (vec_midtype)
754 {
755 input = vect_recog_temp_ssa_var (midtype, NULL);
756 gassign *new_stmt = gimple_build_assign (input, NOP_EXPR,
757 unprom->op);
758 if (!vect_split_statement (unprom->caster, input, new_stmt,
759 vec_midtype))
760 append_pattern_def_seq (stmt_info, new_stmt, vec_midtype);
761 }
762 }
763
764 /* See if we can reuse an existing result. */
765 if (types_compatible_p (type, TREE_TYPE (input)))
766 return input;
e10293de 767 }
768
769 /* We need a new conversion statement. */
770 tree new_op = vect_recog_temp_ssa_var (type, NULL);
6a1665ab 771 gassign *new_stmt = gimple_build_assign (new_op, NOP_EXPR, input);
f0daa5a6 772
57abb697 773 /* If OP is an external value, see if we can insert the new statement
774 on an incoming edge. */
6a1665ab 775 if (input == unprom->op && unprom->dt == vect_external_def)
776 if (edge e = vect_get_external_def_edge (stmt_info->vinfo, input))
57abb697 777 {
778 basic_block new_bb = gsi_insert_on_edge_immediate (e, new_stmt);
779 gcc_assert (!new_bb);
780 return new_op;
781 }
782
e10293de 783 /* As a (common) last resort, add the statement to the pattern itself. */
784 append_pattern_def_seq (stmt_info, new_stmt, vectype);
785 return new_op;
786}
787
788/* Invoke vect_convert_input for N elements of UNPROM and store the
789 result in the corresponding elements of RESULT. */
790
791static void
792vect_convert_inputs (stmt_vec_info stmt_info, unsigned int n,
793 tree *result, tree type, vect_unpromoted_value *unprom,
794 tree vectype)
795{
796 for (unsigned int i = 0; i < n; ++i)
797 {
798 unsigned int j;
799 for (j = 0; j < i; ++j)
800 if (unprom[j].op == unprom[i].op)
801 break;
802 if (j < i)
803 result[i] = result[j];
804 else
805 result[i] = vect_convert_input (stmt_info, type, &unprom[i], vectype);
806 }
807}
808
809/* The caller has created a (possibly empty) sequence of pattern definition
810 statements followed by a single statement PATTERN_STMT. Cast the result
811 of this final statement to TYPE. If a new statement is needed, add
812 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
813 and return the new statement, otherwise return PATTERN_STMT as-is.
814 VECITYPE is the vector form of PATTERN_STMT's result type. */
815
816static gimple *
817vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
818 tree vecitype)
819{
820 tree lhs = gimple_get_lhs (pattern_stmt);
821 if (!types_compatible_p (type, TREE_TYPE (lhs)))
822 {
823 append_pattern_def_seq (stmt_info, pattern_stmt, vecitype);
824 tree cast_var = vect_recog_temp_ssa_var (type, NULL);
825 pattern_stmt = gimple_build_assign (cast_var, NOP_EXPR, lhs);
826 }
827 return pattern_stmt;
828}
829
82f60137 830/* Return true if STMT_VINFO describes a reduction for which reassociation
921e542b 831 is allowed. If STMT_INFO is part of a group, assume that it's part of
832 a reduction chain and optimistically assume that all statements
833 except the last allow reassociation. */
82f60137 834
835static bool
836vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
837{
838 return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
921e542b 839 ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
a477acc5 840 : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
82f60137 841}
842
d04bd45a 843/* As above, but also require it to have code CODE and to be a reduction
844 in the outermost loop. When returning true, store the operands in
845 *OP0_OUT and *OP1_OUT. */
846
847static bool
848vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
849 tree *op0_out, tree *op1_out)
850{
851 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
852 if (!loop_info)
853 return false;
854
855 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
856 if (!assign || gimple_assign_rhs_code (assign) != code)
857 return false;
858
859 /* We don't allow changing the order of the computation in the inner-loop
860 when doing outer-loop vectorization. */
861 struct loop *loop = LOOP_VINFO_LOOP (loop_info);
a73182ff 862 if (loop && nested_in_vect_loop_p (loop, stmt_info))
d04bd45a 863 return false;
864
865 if (!vect_reassociating_reduction_p (stmt_info))
866 return false;
867
868 *op0_out = gimple_assign_rhs1 (assign);
869 *op1_out = gimple_assign_rhs2 (assign);
870 return true;
871}
872
4a61a337 873/* Function vect_recog_dot_prod_pattern
874
875 Try to find the following pattern:
876
877 type x_t, y_t;
878 TYPE1 prod;
879 TYPE2 sum = init;
880 loop:
881 sum_0 = phi <init, sum_1>
882 S1 x_t = ...
883 S2 y_t = ...
884 S3 x_T = (TYPE1) x_t;
885 S4 y_T = (TYPE1) y_t;
886 S5 prod = x_T * y_T;
887 [S6 prod = (TYPE2) prod; #optional]
888 S7 sum_1 = prod + sum_0;
889
48e1416a 890 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
891 same size of 'TYPE1' or bigger. This is a special case of a reduction
4a61a337 892 computation.
48e1416a 893
4a61a337 894 Input:
895
7987f646 896 * STMT_VINFO: The stmt from which the pattern search begins. In the
0187b74e 897 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
898 will be detected.
4a61a337 899
900 Output:
901
4a61a337 902 * TYPE_OUT: The type of the output of this pattern.
903
904 * Return value: A new stmt that will be used to replace the sequence of
905 stmts that constitute the pattern. In this case it will be:
906 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
221e9a92 907
908 Note: The dot-prod idiom is a widening reduction pattern that is
909 vectorized without preserving all the intermediate results. It
910 produces only N/2 (widened) results (by summing up pairs of
911 intermediate results) rather than all N results. Therefore, we
912 cannot allow this pattern when we want to get all the results and in
913 the correct order (as is the case when this computation is in an
914 inner-loop nested in an outer-loop that us being vectorized). */
4a61a337 915
42acab1c 916static gimple *
7987f646 917vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4a61a337 918{
4a61a337 919 tree oprnd0, oprnd1;
7987f646 920 gimple *last_stmt = stmt_vinfo->stmt;
c32b0020 921 vec_info *vinfo = stmt_vinfo->vinfo;
4a61a337 922 tree type, half_type;
42acab1c 923 gimple *pattern_stmt;
c86930b0 924 tree var;
4a61a337 925
48e1416a 926 /* Look for the following pattern
4a61a337 927 DX = (TYPE1) X;
928 DY = (TYPE1) Y;
48e1416a 929 DPROD = DX * DY;
4a61a337 930 DDPROD = (TYPE2) DPROD;
931 sum_1 = DDPROD + sum_0;
48e1416a 932 In which
4a61a337 933 - DX is double the size of X
934 - DY is double the size of Y
935 - DX, DY, DPROD all have the same type
936 - sum is the same size of DPROD or bigger
937 - sum has been recognized as a reduction variable.
938
939 This is equivalent to:
940 DPROD = X w* Y; #widen mult
941 sum_1 = DPROD w+ sum_0; #widen summation
942 or
943 DPROD = X w* Y; #widen mult
944 sum_1 = DPROD + sum_0; #summation
945 */
946
947 /* Starting from LAST_STMT, follow the defs of its uses in search
948 of the above pattern. */
949
d04bd45a 950 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
951 &oprnd0, &oprnd1))
1c9d2c2a 952 return NULL;
953
d04bd45a 954 type = gimple_expr_type (last_stmt);
1c9d2c2a 955
e10293de 956 vect_unpromoted_value unprom_mult;
957 oprnd0 = vect_look_through_possible_promotion (vinfo, oprnd0, &unprom_mult);
4a61a337 958
0187b74e 959 /* So far so good. Since last_stmt was detected as a (summation) reduction,
4a61a337 960 we know that oprnd1 is the reduction variable (defined by a loop-header
961 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
962 Left to check that oprnd0 is defined by a (widen_)mult_expr */
e10293de 963 if (!oprnd0)
1991698f 964 return NULL;
4a61a337 965
c32b0020 966 stmt_vec_info mult_vinfo = vect_get_internal_def (vinfo, oprnd0);
967 if (!mult_vinfo)
4af22cd9 968 return NULL;
969
48e1416a 970 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
e3e3ee4b 971 inside the loop (in case we are analyzing an outer-loop). */
e10293de 972 vect_unpromoted_value unprom0[2];
973 if (!vect_widened_op_tree (mult_vinfo, MULT_EXPR, WIDEN_MULT_EXPR,
974 false, 2, unprom0, &half_type))
48e1416a 975 return NULL;
142c3207 976
e10293de 977 /* If there are two widening operations, make sure they agree on
978 the sign of the extension. */
979 if (TYPE_PRECISION (unprom_mult.type) != TYPE_PRECISION (type)
980 && TYPE_SIGN (unprom_mult.type) != TYPE_SIGN (half_type))
981 return NULL;
4a61a337 982
972163bf 983 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
984
e10293de 985 tree half_vectype;
1ba54d4e 986 if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
e10293de 987 type_out, &half_vectype))
1ba54d4e 988 return NULL;
48e1416a 989
e10293de 990 /* Get the inputs in the appropriate types. */
e10293de 991 tree mult_oprnd[2];
992 vect_convert_inputs (stmt_vinfo, 2, mult_oprnd, half_type,
993 unprom0, half_vectype);
994
75a70cf9 995 var = vect_recog_temp_ssa_var (type, NULL);
e9cf809e 996 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
e10293de 997 mult_oprnd[0], mult_oprnd[1], oprnd1);
48e1416a 998
75a70cf9 999 return pattern_stmt;
4a61a337 1000}
48e1416a 1001
0187b74e 1002
a2287001 1003/* Function vect_recog_sad_pattern
1004
1005 Try to find the following Sum of Absolute Difference (SAD) pattern:
1006
1007 type x_t, y_t;
1008 signed TYPE1 diff, abs_diff;
1009 TYPE2 sum = init;
1010 loop:
1011 sum_0 = phi <init, sum_1>
1012 S1 x_t = ...
1013 S2 y_t = ...
1014 S3 x_T = (TYPE1) x_t;
1015 S4 y_T = (TYPE1) y_t;
1016 S5 diff = x_T - y_T;
1017 S6 abs_diff = ABS_EXPR <diff>;
1018 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1019 S8 sum_1 = abs_diff + sum_0;
1020
1021 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1022 same size of 'TYPE1' or bigger. This is a special case of a reduction
1023 computation.
1024
1025 Input:
1026
7987f646 1027 * STMT_VINFO: The stmt from which the pattern search begins. In the
a2287001 1028 example, when this function is called with S8, the pattern
1029 {S3,S4,S5,S6,S7,S8} will be detected.
1030
1031 Output:
1032
a2287001 1033 * TYPE_OUT: The type of the output of this pattern.
1034
1035 * Return value: A new stmt that will be used to replace the sequence of
1036 stmts that constitute the pattern. In this case it will be:
1037 SAD_EXPR <x_t, y_t, sum_0>
1038 */
1039
42acab1c 1040static gimple *
7987f646 1041vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
a2287001 1042{
7987f646 1043 gimple *last_stmt = stmt_vinfo->stmt;
c32b0020 1044 vec_info *vinfo = stmt_vinfo->vinfo;
a2287001 1045 tree half_type;
a2287001 1046
a2287001 1047 /* Look for the following pattern
1048 DX = (TYPE1) X;
1049 DY = (TYPE1) Y;
1050 DDIFF = DX - DY;
1051 DAD = ABS_EXPR <DDIFF>;
1052 DDPROD = (TYPE2) DPROD;
1053 sum_1 = DAD + sum_0;
1054 In which
1055 - DX is at least double the size of X
1056 - DY is at least double the size of Y
1057 - DX, DY, DDIFF, DAD all have the same type
1058 - sum is the same size of DAD or bigger
1059 - sum has been recognized as a reduction variable.
1060
1061 This is equivalent to:
1062 DDIFF = X w- Y; #widen sub
1063 DAD = ABS_EXPR <DDIFF>;
1064 sum_1 = DAD w+ sum_0; #widen summation
1065 or
1066 DDIFF = X w- Y; #widen sub
1067 DAD = ABS_EXPR <DDIFF>;
1068 sum_1 = DAD + sum_0; #summation
1069 */
1070
1071 /* Starting from LAST_STMT, follow the defs of its uses in search
1072 of the above pattern. */
1073
d04bd45a 1074 tree plus_oprnd0, plus_oprnd1;
1075 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1076 &plus_oprnd0, &plus_oprnd1))
1c9d2c2a 1077 return NULL;
a2287001 1078
d04bd45a 1079 tree sum_type = gimple_expr_type (last_stmt);
1c9d2c2a 1080
e10293de 1081 /* Any non-truncating sequence of conversions is OK here, since
1082 with a successful match, the result of the ABS(U) is known to fit
1083 within the nonnegative range of the result type. (It cannot be the
1084 negative of the minimum signed value due to the range of the widening
1085 MINUS_EXPR.) */
1086 vect_unpromoted_value unprom_abs;
1087 plus_oprnd0 = vect_look_through_possible_promotion (vinfo, plus_oprnd0,
1088 &unprom_abs);
a2287001 1089
1090 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1091 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1092 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1093 Then check that plus_oprnd0 is defined by an abs_expr. */
1094
e10293de 1095 if (!plus_oprnd0)
a2287001 1096 return NULL;
1097
c32b0020 1098 stmt_vec_info abs_stmt_vinfo = vect_get_internal_def (vinfo, plus_oprnd0);
1099 if (!abs_stmt_vinfo)
a2287001 1100 return NULL;
1101
1102 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1103 inside the loop (in case we are analyzing an outer-loop). */
c32b0020 1104 gassign *abs_stmt = dyn_cast <gassign *> (abs_stmt_vinfo->stmt);
1105 if (!abs_stmt
1106 || (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR
1107 && gimple_assign_rhs_code (abs_stmt) != ABSU_EXPR))
a2287001 1108 return NULL;
1109
1110 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
e10293de 1111 tree abs_type = TREE_TYPE (abs_oprnd);
a2287001 1112 if (TYPE_UNSIGNED (abs_type))
1113 return NULL;
1114
e10293de 1115 /* Peel off conversions from the ABS input. This can involve sign
1116 changes (e.g. from an unsigned subtraction to a signed ABS input)
1117 or signed promotion, but it can't include unsigned promotion.
1118 (Note that ABS of an unsigned promotion should have been folded
1119 away before now anyway.) */
1120 vect_unpromoted_value unprom_diff;
1121 abs_oprnd = vect_look_through_possible_promotion (vinfo, abs_oprnd,
1122 &unprom_diff);
1123 if (!abs_oprnd)
1124 return NULL;
1125 if (TYPE_PRECISION (unprom_diff.type) != TYPE_PRECISION (abs_type)
1126 && TYPE_UNSIGNED (unprom_diff.type))
a2287001 1127 return NULL;
1128
e10293de 1129 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
c32b0020 1130 stmt_vec_info diff_stmt_vinfo = vect_get_internal_def (vinfo, abs_oprnd);
1131 if (!diff_stmt_vinfo)
a2287001 1132 return NULL;
1133
1134 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1135 inside the loop (in case we are analyzing an outer-loop). */
e10293de 1136 vect_unpromoted_value unprom[2];
1137 if (!vect_widened_op_tree (diff_stmt_vinfo, MINUS_EXPR, MINUS_EXPR,
1138 false, 2, unprom, &half_type))
a2287001 1139 return NULL;
1140
972163bf 1141 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
1142
e10293de 1143 tree half_vectype;
1144 if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
1145 type_out, &half_vectype))
1ba54d4e 1146 return NULL;
a2287001 1147
e10293de 1148 /* Get the inputs to the SAD_EXPR in the appropriate types. */
e10293de 1149 tree sad_oprnd[2];
1150 vect_convert_inputs (stmt_vinfo, 2, sad_oprnd, half_type,
1151 unprom, half_vectype);
1152
a2287001 1153 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
e10293de 1154 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd[0],
1155 sad_oprnd[1], plus_oprnd1);
a2287001 1156
a2287001 1157 return pattern_stmt;
1158}
1159
e10293de 1160/* Recognize an operation that performs ORIG_CODE on widened inputs,
1161 so that it can be treated as though it had the form:
a2287001 1162
e10293de 1163 A_TYPE a;
1164 B_TYPE b;
1165 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1166 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1167 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1168 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1169 | RES_TYPE res = a_extend ORIG_CODE b_extend;
6083c152 1170
e10293de 1171 Try to replace the pattern with:
0187b74e 1172
e10293de 1173 A_TYPE a;
1174 B_TYPE b;
1175 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1176 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1177 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1178 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
0187b74e 1179
e10293de 1180 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
0187b74e 1181
e10293de 1182 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1183 name of the pattern being matched, for dump purposes. */
4a61a337 1184
42acab1c 1185static gimple *
7987f646 1186vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
e10293de 1187 tree_code orig_code, tree_code wide_code,
1188 bool shift_p, const char *name)
4a61a337 1189{
7987f646 1190 gimple *last_stmt = last_stmt_info->stmt;
c6c91d61 1191
e10293de 1192 vect_unpromoted_value unprom[2];
1193 tree half_type;
1194 if (!vect_widened_op_tree (last_stmt_info, orig_code, orig_code,
1195 shift_p, 2, unprom, &half_type))
c6c91d61 1196 return NULL;
1197
e10293de 1198 /* Pattern detected. */
1199 vect_pattern_detected (name, last_stmt);
c6c91d61 1200
e10293de 1201 tree type = gimple_expr_type (last_stmt);
c9156338 1202 tree itype = type;
e10293de 1203 if (TYPE_PRECISION (type) != TYPE_PRECISION (half_type) * 2
1204 || TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type))
1205 itype = build_nonstandard_integer_type (TYPE_PRECISION (half_type) * 2,
1206 TYPE_UNSIGNED (half_type));
c6c91d61 1207
1208 /* Check target support */
e10293de 1209 tree vectype = get_vectype_for_scalar_type (half_type);
1210 tree vecitype = get_vectype_for_scalar_type (itype);
1211 enum tree_code dummy_code;
1212 int dummy_int;
1213 auto_vec<tree> dummy_vec;
f031fa03 1214 if (!vectype
c9156338 1215 || !vecitype
a73182ff 1216 || !supportable_widening_operation (wide_code, last_stmt_info,
c9156338 1217 vecitype, vectype,
087dde2d 1218 &dummy_code, &dummy_code,
1219 &dummy_int, &dummy_vec))
c6c91d61 1220 return NULL;
1221
c9156338 1222 *type_out = get_vectype_for_scalar_type (type);
1ba54d4e 1223 if (!*type_out)
1224 return NULL;
c6c91d61 1225
e10293de 1226 tree oprnd[2];
1227 vect_convert_inputs (last_stmt_info, 2, oprnd, half_type, unprom, vectype);
c9156338 1228
e10293de 1229 tree var = vect_recog_temp_ssa_var (itype, NULL);
1230 gimple *pattern_stmt = gimple_build_assign (var, wide_code,
1231 oprnd[0], oprnd[1]);
c9156338 1232
e10293de 1233 return vect_convert_output (last_stmt_info, type, pattern_stmt, vecitype);
4a61a337 1234}
1235
e10293de 1236/* Try to detect multiplication on widened inputs, converting MULT_EXPR
1237 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1238
1239static gimple *
7987f646 1240vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
e10293de 1241{
7987f646 1242 return vect_recog_widen_op_pattern (last_stmt_info, type_out, MULT_EXPR,
e10293de 1243 WIDEN_MULT_EXPR, false,
1244 "vect_recog_widen_mult_pattern");
1245}
4a61a337 1246
c37bea13 1247/* Function vect_recog_pow_pattern
1248
1249 Try to find the following pattern:
1250
1251 x = POW (y, N);
1252
1253 with POW being one of pow, powf, powi, powif and N being
1254 either 2 or 0.5.
1255
1256 Input:
1257
7987f646 1258 * STMT_VINFO: The stmt from which the pattern search begins.
c37bea13 1259
1260 Output:
1261
c37bea13 1262 * TYPE_OUT: The type of the output of this pattern.
1263
1264 * Return value: A new stmt that will be used to replace the sequence of
1265 stmts that constitute the pattern. In this case it will be:
75a70cf9 1266 x = x * x
c37bea13 1267 or
75a70cf9 1268 x = sqrt (x)
c37bea13 1269*/
1270
42acab1c 1271static gimple *
7987f646 1272vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
c37bea13 1273{
7987f646 1274 gimple *last_stmt = stmt_vinfo->stmt;
a35ca444 1275 tree base, exp;
42acab1c 1276 gimple *stmt;
75a70cf9 1277 tree var;
c37bea13 1278
0187b74e 1279 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
c37bea13 1280 return NULL;
1281
62dbd209 1282 switch (gimple_call_combined_fn (last_stmt))
c37bea13 1283 {
62dbd209 1284 CASE_CFN_POW:
1285 CASE_CFN_POWI:
c37bea13 1286 break;
1287
75a70cf9 1288 default:
1289 return NULL;
c37bea13 1290 }
1291
a35ca444 1292 base = gimple_call_arg (last_stmt, 0);
1293 exp = gimple_call_arg (last_stmt, 1);
1294 if (TREE_CODE (exp) != REAL_CST
1295 && TREE_CODE (exp) != INTEGER_CST)
1296 {
1297 if (flag_unsafe_math_optimizations
1298 && TREE_CODE (base) == REAL_CST
1299 && !gimple_call_internal_p (last_stmt))
1300 {
1301 combined_fn log_cfn;
1302 built_in_function exp_bfn;
1303 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt)))
1304 {
1305 case BUILT_IN_POW:
1306 log_cfn = CFN_BUILT_IN_LOG;
1307 exp_bfn = BUILT_IN_EXP;
1308 break;
1309 case BUILT_IN_POWF:
1310 log_cfn = CFN_BUILT_IN_LOGF;
1311 exp_bfn = BUILT_IN_EXPF;
1312 break;
1313 case BUILT_IN_POWL:
1314 log_cfn = CFN_BUILT_IN_LOGL;
1315 exp_bfn = BUILT_IN_EXPL;
1316 break;
1317 default:
1318 return NULL;
1319 }
1320 tree logc = fold_const_call (log_cfn, TREE_TYPE (base), base);
1321 tree exp_decl = builtin_decl_implicit (exp_bfn);
1322 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
1323 does that, but if C is a power of 2, we want to use
1324 exp2 (log2 (C) * x) in the non-vectorized version, but for
1325 vectorization we don't have vectorized exp2. */
1326 if (logc
1327 && TREE_CODE (logc) == REAL_CST
1328 && exp_decl
1329 && lookup_attribute ("omp declare simd",
1330 DECL_ATTRIBUTES (exp_decl)))
1331 {
1332 cgraph_node *node = cgraph_node::get_create (exp_decl);
1333 if (node->simd_clones == NULL)
1334 {
c4d48f95 1335 if (targetm.simd_clone.compute_vecsize_and_simdlen == NULL
1336 || node->definition)
a35ca444 1337 return NULL;
1338 expand_simd_clones (node);
1339 if (node->simd_clones == NULL)
1340 return NULL;
1341 }
1ba54d4e 1342 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1343 if (!*type_out)
1344 return NULL;
a35ca444 1345 tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1346 gimple *g = gimple_build_assign (def, MULT_EXPR, exp, logc);
05499cc7 1347 append_pattern_def_seq (stmt_vinfo, g);
a35ca444 1348 tree res = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1349 g = gimple_build_call (exp_decl, 1, def);
1350 gimple_call_set_lhs (g, res);
1351 return g;
1352 }
1353 }
1354
1355 return NULL;
1356 }
1357
c37bea13 1358 /* We now have a pow or powi builtin function call with a constant
1359 exponent. */
1360
c37bea13 1361 /* Catch squaring. */
e913b5cd 1362 if ((tree_fits_shwi_p (exp)
1363 && tree_to_shwi (exp) == 2)
c37bea13 1364 || (TREE_CODE (exp) == REAL_CST
20cb53c9 1365 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
4957645d 1366 {
1ba54d4e 1367 if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
1368 TREE_TYPE (base), type_out))
1369 return NULL;
75a70cf9 1370
1371 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
e9cf809e 1372 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
75a70cf9 1373 return stmt;
4957645d 1374 }
c37bea13 1375
1376 /* Catch square root. */
1377 if (TREE_CODE (exp) == REAL_CST
20cb53c9 1378 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
c37bea13 1379 {
1ba54d4e 1380 *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
1381 if (*type_out
1382 && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
acdfe9e0 1383 OPTIMIZE_FOR_SPEED))
4957645d 1384 {
901db6c4 1385 gcall *stmt = gimple_build_call_internal (IFN_SQRT, 1, base);
1386 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
1387 gimple_call_set_lhs (stmt, var);
989f02dc 1388 gimple_call_set_nothrow (stmt, true);
901db6c4 1389 return stmt;
4957645d 1390 }
c37bea13 1391 }
1392
75a70cf9 1393 return NULL;
c37bea13 1394}
1395
1396
4a61a337 1397/* Function vect_recog_widen_sum_pattern
1398
1399 Try to find the following pattern:
1400
48e1416a 1401 type x_t;
4a61a337 1402 TYPE x_T, sum = init;
1403 loop:
1404 sum_0 = phi <init, sum_1>
1405 S1 x_t = *p;
1406 S2 x_T = (TYPE) x_t;
1407 S3 sum_1 = x_T + sum_0;
1408
48e1416a 1409 where type 'TYPE' is at least double the size of type 'type', i.e - we're
4a61a337 1410 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
9ca2c29a 1411 a special case of a reduction computation.
4a61a337 1412
1413 Input:
1414
7987f646 1415 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
4a61a337 1416 when this function is called with S3, the pattern {S2,S3} will be detected.
48e1416a 1417
4a61a337 1418 Output:
48e1416a 1419
4a61a337 1420 * TYPE_OUT: The type of the output of this pattern.
1421
1422 * Return value: A new stmt that will be used to replace the sequence of
1423 stmts that constitute the pattern. In this case it will be:
1424 WIDEN_SUM <x_t, sum_0>
221e9a92 1425
48e1416a 1426 Note: The widening-sum idiom is a widening reduction pattern that is
221e9a92 1427 vectorized without preserving all the intermediate results. It
48e1416a 1428 produces only N/2 (widened) results (by summing up pairs of
1429 intermediate results) rather than all N results. Therefore, we
1430 cannot allow this pattern when we want to get all the results and in
1431 the correct order (as is the case when this computation is in an
221e9a92 1432 inner-loop nested in an outer-loop that us being vectorized). */
4a61a337 1433
42acab1c 1434static gimple *
7987f646 1435vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
4a61a337 1436{
7987f646 1437 gimple *last_stmt = stmt_vinfo->stmt;
4a61a337 1438 tree oprnd0, oprnd1;
e10293de 1439 vec_info *vinfo = stmt_vinfo->vinfo;
1440 tree type;
42acab1c 1441 gimple *pattern_stmt;
75a70cf9 1442 tree var;
4a61a337 1443
4a61a337 1444 /* Look for the following pattern
1445 DX = (TYPE) X;
1446 sum_1 = DX + sum_0;
1447 In which DX is at least double the size of X, and sum_1 has been
1448 recognized as a reduction variable.
1449 */
1450
1451 /* Starting from LAST_STMT, follow the defs of its uses in search
1452 of the above pattern. */
1453
d04bd45a 1454 if (!vect_reassociating_reduction_p (stmt_vinfo, PLUS_EXPR,
1455 &oprnd0, &oprnd1))
22f4144a 1456 return NULL;
1457
d04bd45a 1458 type = gimple_expr_type (last_stmt);
4a61a337 1459
0187b74e 1460 /* So far so good. Since last_stmt was detected as a (summation) reduction,
4a61a337 1461 we know that oprnd1 is the reduction variable (defined by a loop-header
1462 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1463 Left to check that oprnd0 is defined by a cast from type 'type' to type
1464 'TYPE'. */
1465
e10293de 1466 vect_unpromoted_value unprom0;
1467 if (!vect_look_through_possible_promotion (vinfo, oprnd0, &unprom0)
1468 || TYPE_PRECISION (unprom0.type) * 2 > TYPE_PRECISION (type))
1ba54d4e 1469 return NULL;
4a61a337 1470
972163bf 1471 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
1472
e10293de 1473 if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
1ba54d4e 1474 type_out))
1475 return NULL;
4a61a337 1476
75a70cf9 1477 var = vect_recog_temp_ssa_var (type, NULL);
e10293de 1478 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, unprom0.op, oprnd1);
75a70cf9 1479
75a70cf9 1480 return pattern_stmt;
4a61a337 1481}
1482
18bbd2f1 1483/* Recognize cases in which an operation is performed in one type WTYPE
1484 but could be done more efficiently in a narrower type NTYPE. For example,
1485 if we have:
4a61a337 1486
18bbd2f1 1487 ATYPE a; // narrower than NTYPE
1488 BTYPE b; // narrower than NTYPE
1489 WTYPE aw = (WTYPE) a;
1490 WTYPE bw = (WTYPE) b;
1491 WTYPE res = aw + bw; // only uses of aw and bw
45eea33f 1492
18bbd2f1 1493 then it would be more efficient to do:
45eea33f 1494
18bbd2f1 1495 NTYPE an = (NTYPE) a;
1496 NTYPE bn = (NTYPE) b;
1497 NTYPE resn = an + bn;
1498 WTYPE res = (WTYPE) resn;
45eea33f 1499
18bbd2f1 1500 Other situations include things like:
4c0c783a 1501
18bbd2f1 1502 ATYPE a; // NTYPE or narrower
1503 WTYPE aw = (WTYPE) a;
1504 WTYPE res = aw + b;
45eea33f 1505
18bbd2f1 1506 when only "(NTYPE) res" is significant. In that case it's more efficient
1507 to truncate "b" and do the operation on NTYPE instead:
45eea33f 1508
18bbd2f1 1509 NTYPE an = (NTYPE) a;
1510 NTYPE bn = (NTYPE) b; // truncation
1511 NTYPE resn = an + bn;
1512 WTYPE res = (WTYPE) resn;
45eea33f 1513
18bbd2f1 1514 All users of "res" should then use "resn" instead, making the final
1515 statement dead (not marked as relevant). The final statement is still
1516 needed to maintain the type correctness of the IR.
45eea33f 1517
18bbd2f1 1518 vect_determine_precisions has already determined the minimum
1519 precison of the operation and the minimum precision required
1520 by users of the result. */
45eea33f 1521
18bbd2f1 1522static gimple *
7987f646 1523vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
18bbd2f1 1524{
7987f646 1525 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
18bbd2f1 1526 if (!last_stmt)
1527 return NULL;
897c6e08 1528
18bbd2f1 1529 /* See whether we have found that this operation can be done on a
1530 narrower type without changing its semantics. */
18bbd2f1 1531 unsigned int new_precision = last_stmt_info->operation_precision;
1532 if (!new_precision)
1533 return NULL;
45eea33f 1534
18bbd2f1 1535 vec_info *vinfo = last_stmt_info->vinfo;
1536 tree lhs = gimple_assign_lhs (last_stmt);
1537 tree type = TREE_TYPE (lhs);
1538 tree_code code = gimple_assign_rhs_code (last_stmt);
1539
1540 /* Keep the first operand of a COND_EXPR as-is: only the other two
1541 operands are interesting. */
1542 unsigned int first_op = (code == COND_EXPR ? 2 : 1);
1543
1544 /* Check the operands. */
1545 unsigned int nops = gimple_num_ops (last_stmt) - first_op;
1546 auto_vec <vect_unpromoted_value, 3> unprom (nops);
1547 unprom.quick_grow (nops);
1548 unsigned int min_precision = 0;
1549 bool single_use_p = false;
1550 for (unsigned int i = 0; i < nops; ++i)
45eea33f 1551 {
18bbd2f1 1552 tree op = gimple_op (last_stmt, first_op + i);
1553 if (TREE_CODE (op) == INTEGER_CST)
1554 unprom[i].set_op (op, vect_constant_def);
1555 else if (TREE_CODE (op) == SSA_NAME)
1556 {
1557 bool op_single_use_p = true;
1558 if (!vect_look_through_possible_promotion (vinfo, op, &unprom[i],
1559 &op_single_use_p))
1560 return NULL;
1561 /* If:
45eea33f 1562
18bbd2f1 1563 (1) N bits of the result are needed;
1564 (2) all inputs are widened from M<N bits; and
1565 (3) one operand OP is a single-use SSA name
45eea33f 1566
18bbd2f1 1567 we can shift the M->N widening from OP to the output
1568 without changing the number or type of extensions involved.
1569 This then reduces the number of copies of STMT_INFO.
45eea33f 1570
18bbd2f1 1571 If instead of (3) more than one operand is a single-use SSA name,
1572 shifting the extension to the output is even more of a win.
45eea33f 1573
18bbd2f1 1574 If instead:
45eea33f 1575
18bbd2f1 1576 (1) N bits of the result are needed;
1577 (2) one operand OP2 is widened from M2<N bits;
1578 (3) another operand OP1 is widened from M1<M2 bits; and
1579 (4) both OP1 and OP2 are single-use
45eea33f 1580
18bbd2f1 1581 the choice is between:
45eea33f 1582
18bbd2f1 1583 (a) truncating OP2 to M1, doing the operation on M1,
1584 and then widening the result to N
45eea33f 1585
18bbd2f1 1586 (b) widening OP1 to M2, doing the operation on M2, and then
1587 widening the result to N
45eea33f 1588
18bbd2f1 1589 Both shift the M2->N widening of the inputs to the output.
1590 (a) additionally shifts the M1->M2 widening to the output;
1591 it requires fewer copies of STMT_INFO but requires an extra
1592 M2->M1 truncation.
45eea33f 1593
18bbd2f1 1594 Which is better will depend on the complexity and cost of
1595 STMT_INFO, which is hard to predict at this stage. However,
1596 a clear tie-breaker in favor of (b) is the fact that the
1597 truncation in (a) increases the length of the operation chain.
45eea33f 1598
18bbd2f1 1599 If instead of (4) only one of OP1 or OP2 is single-use,
1600 (b) is still a win over doing the operation in N bits:
1601 it still shifts the M2->N widening on the single-use operand
1602 to the output and reduces the number of STMT_INFO copies.
45eea33f 1603
18bbd2f1 1604 If neither operand is single-use then operating on fewer than
1605 N bits might lead to more extensions overall. Whether it does
1606 or not depends on global information about the vectorization
1607 region, and whether that's a good trade-off would again
1608 depend on the complexity and cost of the statements involved,
1609 as well as things like register pressure that are not normally
1610 modelled at this stage. We therefore ignore these cases
1611 and just optimize the clear single-use wins above.
45eea33f 1612
18bbd2f1 1613 Thus we take the maximum precision of the unpromoted operands
1614 and record whether any operand is single-use. */
1615 if (unprom[i].dt == vect_internal_def)
1616 {
1617 min_precision = MAX (min_precision,
1618 TYPE_PRECISION (unprom[i].type));
1619 single_use_p |= op_single_use_p;
1620 }
1621 }
45eea33f 1622 }
1623
18bbd2f1 1624 /* Although the operation could be done in operation_precision, we have
1625 to balance that against introducing extra truncations or extensions.
1626 Calculate the minimum precision that can be handled efficiently.
1627
1628 The loop above determined that the operation could be handled
1629 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
1630 extension from the inputs to the output without introducing more
1631 instructions, and would reduce the number of instructions required
1632 for STMT_INFO itself.
1633
1634 vect_determine_precisions has also determined that the result only
1635 needs min_output_precision bits. Truncating by a factor of N times
1636 requires a tree of N - 1 instructions, so if TYPE is N times wider
1637 than min_output_precision, doing the operation in TYPE and truncating
1638 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
1639 In contrast:
1640
1641 - truncating the input to a unary operation and doing the operation
1642 in the new type requires at most N - 1 + 1 = N instructions per
1643 output vector
1644
1645 - doing the same for a binary operation requires at most
1646 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
1647
1648 Both unary and binary operations require fewer instructions than
1649 this if the operands were extended from a suitable truncated form.
1650 Thus there is usually nothing to lose by doing operations in
1651 min_output_precision bits, but there can be something to gain. */
1652 if (!single_use_p)
1653 min_precision = last_stmt_info->min_output_precision;
45eea33f 1654 else
18bbd2f1 1655 min_precision = MIN (min_precision, last_stmt_info->min_output_precision);
1656
1657 /* Apply the minimum efficient precision we just calculated. */
1658 if (new_precision < min_precision)
1659 new_precision = min_precision;
1660 if (new_precision >= TYPE_PRECISION (type))
1661 return NULL;
1662
1663 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
1664
1665 *type_out = get_vectype_for_scalar_type (type);
1666 if (!*type_out)
1667 return NULL;
1668
1669 /* We've found a viable pattern. Get the new type of the operation. */
1670 bool unsigned_p = (last_stmt_info->operation_sign == UNSIGNED);
1671 tree new_type = build_nonstandard_integer_type (new_precision, unsigned_p);
1672
6a1665ab 1673 /* If we're truncating an operation, we need to make sure that we
1674 don't introduce new undefined overflow. The codes tested here are
1675 a subset of those accepted by vect_truncatable_operation_p. */
1676 tree op_type = new_type;
1677 if (TYPE_OVERFLOW_UNDEFINED (new_type)
1678 && (code == PLUS_EXPR || code == MINUS_EXPR || code == MULT_EXPR))
1679 op_type = build_nonstandard_integer_type (new_precision, true);
1680
18bbd2f1 1681 /* We specifically don't check here whether the target supports the
1682 new operation, since it might be something that a later pattern
1683 wants to rewrite anyway. If targets have a minimum element size
1684 for some optabs, we should pattern-match smaller ops to larger ops
1685 where beneficial. */
1686 tree new_vectype = get_vectype_for_scalar_type (new_type);
6a1665ab 1687 tree op_vectype = get_vectype_for_scalar_type (op_type);
1688 if (!new_vectype || !op_vectype)
18bbd2f1 1689 return NULL;
45eea33f 1690
18bbd2f1 1691 if (dump_enabled_p ())
a4e972e3 1692 dump_printf_loc (MSG_NOTE, vect_location, "demoting %T to %T\n",
1693 type, new_type);
45eea33f 1694
6a1665ab 1695 /* Calculate the rhs operands for an operation on OP_TYPE. */
18bbd2f1 1696 tree ops[3] = {};
1697 for (unsigned int i = 1; i < first_op; ++i)
1698 ops[i - 1] = gimple_op (last_stmt, i);
1699 vect_convert_inputs (last_stmt_info, nops, &ops[first_op - 1],
6a1665ab 1700 op_type, &unprom[0], op_vectype);
18bbd2f1 1701
6a1665ab 1702 /* Use the operation to produce a result of type OP_TYPE. */
1703 tree new_var = vect_recog_temp_ssa_var (op_type, NULL);
18bbd2f1 1704 gimple *pattern_stmt = gimple_build_assign (new_var, code,
1705 ops[0], ops[1], ops[2]);
1706 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
1707
1708 if (dump_enabled_p ())
a4e972e3 1709 dump_printf_loc (MSG_NOTE, vect_location,
1710 "created pattern stmt: %G", pattern_stmt);
45eea33f 1711
6a1665ab 1712 /* Convert back to the original signedness, if OP_TYPE is different
1713 from NEW_TYPE. */
1714 if (op_type != new_type)
1715 pattern_stmt = vect_convert_output (last_stmt_info, new_type,
1716 pattern_stmt, op_vectype);
1717
1718 /* Promote the result to the original type. */
18bbd2f1 1719 pattern_stmt = vect_convert_output (last_stmt_info, type,
1720 pattern_stmt, new_vectype);
45eea33f 1721
18bbd2f1 1722 return pattern_stmt;
45eea33f 1723}
1724
3f887927 1725/* Recognize the patterns:
1726
1727 ATYPE a; // narrower than TYPE
1728 BTYPE b; // narrower than TYPE
1729 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
1730 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
1731
1732 where only the bottom half of avg is used. Try to transform them into:
1733
1734 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
1735 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
1736
1737 followed by:
1738
1739 TYPE avg = (TYPE) avg';
1740
1741 where NTYPE is no wider than half of TYPE. Since only the bottom half
1742 of avg is used, all or part of the cast of avg' should become redundant. */
1743
1744static gimple *
7987f646 1745vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
3f887927 1746{
1747 /* Check for a shift right by one bit. */
7987f646 1748 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
1749 vec_info *vinfo = last_stmt_info->vinfo;
3f887927 1750 if (!last_stmt
1751 || gimple_assign_rhs_code (last_stmt) != RSHIFT_EXPR
1752 || !integer_onep (gimple_assign_rhs2 (last_stmt)))
1753 return NULL;
1754
3f887927 1755 /* Check that the shift result is wider than the users of the
1756 result need (i.e. that narrowing would be a natural choice). */
1757 tree lhs = gimple_assign_lhs (last_stmt);
1758 tree type = TREE_TYPE (lhs);
1759 unsigned int target_precision
1760 = vect_element_precision (last_stmt_info->min_output_precision);
1761 if (!INTEGRAL_TYPE_P (type) || target_precision >= TYPE_PRECISION (type))
1762 return NULL;
1763
6a1665ab 1764 /* Look through any change in sign on the shift input. */
3f887927 1765 tree rshift_rhs = gimple_assign_rhs1 (last_stmt);
6a1665ab 1766 vect_unpromoted_value unprom_plus;
1767 rshift_rhs = vect_look_through_possible_promotion (vinfo, rshift_rhs,
1768 &unprom_plus);
1769 if (!rshift_rhs
1770 || TYPE_PRECISION (TREE_TYPE (rshift_rhs)) != TYPE_PRECISION (type))
1771 return NULL;
1772
1773 /* Get the definition of the shift input. */
3f887927 1774 stmt_vec_info plus_stmt_info = vect_get_internal_def (vinfo, rshift_rhs);
1775 if (!plus_stmt_info)
1776 return NULL;
1777
1778 /* Check whether the shift input can be seen as a tree of additions on
1779 2 or 3 widened inputs.
1780
1781 Note that the pattern should be a win even if the result of one or
1782 more additions is reused elsewhere: if the pattern matches, we'd be
1783 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
1784 internal_fn ifn = IFN_AVG_FLOOR;
1785 vect_unpromoted_value unprom[3];
1786 tree new_type;
1787 unsigned int nops = vect_widened_op_tree (plus_stmt_info, PLUS_EXPR,
1788 PLUS_EXPR, false, 3,
1789 unprom, &new_type);
1790 if (nops == 0)
1791 return NULL;
1792 if (nops == 3)
1793 {
1794 /* Check that one operand is 1. */
1795 unsigned int i;
1796 for (i = 0; i < 3; ++i)
1797 if (integer_onep (unprom[i].op))
1798 break;
1799 if (i == 3)
1800 return NULL;
1801 /* Throw away the 1 operand and keep the other two. */
1802 if (i < 2)
1803 unprom[i] = unprom[2];
1804 ifn = IFN_AVG_CEIL;
1805 }
1806
1807 vect_pattern_detected ("vect_recog_average_pattern", last_stmt);
1808
1809 /* We know that:
1810
1811 (a) the operation can be viewed as:
1812
1813 TYPE widened0 = (TYPE) UNPROM[0];
1814 TYPE widened1 = (TYPE) UNPROM[1];
1815 TYPE tmp1 = widened0 + widened1 {+ 1};
1816 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
1817
1818 (b) the first two statements are equivalent to:
1819
1820 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
1821 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
1822
1823 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
1824 where sensible;
1825
1826 (d) all the operations can be performed correctly at twice the width of
1827 NEW_TYPE, due to the nature of the average operation; and
1828
1829 (e) users of the result of the right shift need only TARGET_PRECISION
1830 bits, where TARGET_PRECISION is no more than half of TYPE's
1831 precision.
1832
1833 Under these circumstances, the only situation in which NEW_TYPE
1834 could be narrower than TARGET_PRECISION is if widened0, widened1
1835 and an addition result are all used more than once. Thus we can
1836 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
1837 as "free", whereas widening the result of the average instruction
1838 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
1839 therefore better not to go narrower than TARGET_PRECISION. */
1840 if (TYPE_PRECISION (new_type) < target_precision)
1841 new_type = build_nonstandard_integer_type (target_precision,
1842 TYPE_UNSIGNED (new_type));
1843
1844 /* Check for target support. */
1845 tree new_vectype = get_vectype_for_scalar_type (new_type);
1846 if (!new_vectype
1847 || !direct_internal_fn_supported_p (ifn, new_vectype,
1848 OPTIMIZE_FOR_SPEED))
1849 return NULL;
1850
1851 /* The IR requires a valid vector type for the cast result, even though
1852 it's likely to be discarded. */
1853 *type_out = get_vectype_for_scalar_type (type);
1854 if (!*type_out)
1855 return NULL;
1856
1857 /* Generate the IFN_AVG* call. */
1858 tree new_var = vect_recog_temp_ssa_var (new_type, NULL);
1859 tree new_ops[2];
1860 vect_convert_inputs (last_stmt_info, 2, new_ops, new_type,
1861 unprom, new_vectype);
1862 gcall *average_stmt = gimple_build_call_internal (ifn, 2, new_ops[0],
1863 new_ops[1]);
1864 gimple_call_set_lhs (average_stmt, new_var);
1865 gimple_set_location (average_stmt, gimple_location (last_stmt));
1866
1867 if (dump_enabled_p ())
a4e972e3 1868 dump_printf_loc (MSG_NOTE, vect_location,
1869 "created pattern stmt: %G", average_stmt);
3f887927 1870
3f887927 1871 return vect_convert_output (last_stmt_info, type, average_stmt, new_vectype);
1872}
1873
18bbd2f1 1874/* Recognize cases in which the input to a cast is wider than its
1875 output, and the input is fed by a widening operation. Fold this
1876 by removing the unnecessary intermediate widening. E.g.:
45eea33f 1877
18bbd2f1 1878 unsigned char a;
1879 unsigned int b = (unsigned int) a;
1880 unsigned short c = (unsigned short) b;
45eea33f 1881
18bbd2f1 1882 -->
45eea33f 1883
18bbd2f1 1884 unsigned short c = (unsigned short) a;
45eea33f 1885
18bbd2f1 1886 Although this is rare in input IR, it is an expected side-effect
1887 of the over-widening pattern above.
45eea33f 1888
18bbd2f1 1889 This is beneficial also for integer-to-float conversions, if the
1890 widened integer has more bits than the float, and if the unwidened
1891 input doesn't. */
45eea33f 1892
18bbd2f1 1893static gimple *
7987f646 1894vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
18bbd2f1 1895{
1896 /* Check for a cast, including an integer-to-float conversion. */
7987f646 1897 gassign *last_stmt = dyn_cast <gassign *> (last_stmt_info->stmt);
18bbd2f1 1898 if (!last_stmt)
1899 return NULL;
1900 tree_code code = gimple_assign_rhs_code (last_stmt);
1901 if (!CONVERT_EXPR_CODE_P (code) && code != FLOAT_EXPR)
1902 return NULL;
45eea33f 1903
18bbd2f1 1904 /* Make sure that the rhs is a scalar with a natural bitsize. */
1905 tree lhs = gimple_assign_lhs (last_stmt);
1906 if (!lhs)
1907 return NULL;
1908 tree lhs_type = TREE_TYPE (lhs);
1909 scalar_mode lhs_mode;
1910 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type)
1911 || !is_a <scalar_mode> (TYPE_MODE (lhs_type), &lhs_mode))
1912 return NULL;
45eea33f 1913
18bbd2f1 1914 /* Check for a narrowing operation (from a vector point of view). */
1915 tree rhs = gimple_assign_rhs1 (last_stmt);
1916 tree rhs_type = TREE_TYPE (rhs);
1917 if (!INTEGRAL_TYPE_P (rhs_type)
1918 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type)
1919 || TYPE_PRECISION (rhs_type) <= GET_MODE_BITSIZE (lhs_mode))
1920 return NULL;
6175d24a 1921
18bbd2f1 1922 /* Try to find an unpromoted input. */
18bbd2f1 1923 vec_info *vinfo = last_stmt_info->vinfo;
1924 vect_unpromoted_value unprom;
1925 if (!vect_look_through_possible_promotion (vinfo, rhs, &unprom)
1926 || TYPE_PRECISION (unprom.type) >= TYPE_PRECISION (rhs_type))
1927 return NULL;
1ba54d4e 1928
18bbd2f1 1929 /* If the bits above RHS_TYPE matter, make sure that they're the
1930 same when extending from UNPROM as they are when extending from RHS. */
1931 if (!INTEGRAL_TYPE_P (lhs_type)
1932 && TYPE_SIGN (rhs_type) != TYPE_SIGN (unprom.type))
1933 return NULL;
45eea33f 1934
18bbd2f1 1935 /* We can get the same result by casting UNPROM directly, to avoid
1936 the unnecessary widening and narrowing. */
1937 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
45eea33f 1938
18bbd2f1 1939 *type_out = get_vectype_for_scalar_type (lhs_type);
1940 if (!*type_out)
45eea33f 1941 return NULL;
1942
18bbd2f1 1943 tree new_var = vect_recog_temp_ssa_var (lhs_type, NULL);
1944 gimple *pattern_stmt = gimple_build_assign (new_var, code, unprom.op);
1945 gimple_set_location (pattern_stmt, gimple_location (last_stmt));
45eea33f 1946
1947 return pattern_stmt;
1948}
1949
e10293de 1950/* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
1951 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
6083c152 1952
42acab1c 1953static gimple *
7987f646 1954vect_recog_widen_shift_pattern (stmt_vec_info last_stmt_info, tree *type_out)
6083c152 1955{
7987f646 1956 return vect_recog_widen_op_pattern (last_stmt_info, type_out, LSHIFT_EXPR,
e10293de 1957 WIDEN_LSHIFT_EXPR, true,
d43e44ac 1958 "vect_recog_widen_shift_pattern");
cb4792d2 1959}
1960
1961/* Detect a rotate pattern wouldn't be otherwise vectorized:
1962
1963 type a_t, b_t, c_t;
1964
1965 S0 a_t = b_t r<< c_t;
1966
1967 Input/Output:
1968
7987f646 1969 * STMT_VINFO: The stmt from which the pattern search begins,
cb4792d2 1970 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1971 with a sequence:
1972
1973 S1 d_t = -c_t;
1974 S2 e_t = d_t & (B - 1);
1975 S3 f_t = b_t << c_t;
1976 S4 g_t = b_t >> e_t;
1977 S0 a_t = f_t | g_t;
1978
1979 where B is element bitsize of type.
1980
1981 Output:
1982
cb4792d2 1983 * TYPE_OUT: The type of the output of this pattern.
1984
1985 * Return value: A new stmt that will be used to replace the rotate
1986 S0 stmt. */
1987
42acab1c 1988static gimple *
7987f646 1989vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
cb4792d2 1990{
7987f646 1991 gimple *last_stmt = stmt_vinfo->stmt;
cb4792d2 1992 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
42acab1c 1993 gimple *pattern_stmt, *def_stmt;
cb4792d2 1994 enum tree_code rhs_code;
e2c5c678 1995 vec_info *vinfo = stmt_vinfo->vinfo;
cb4792d2 1996 enum vect_def_type dt;
1997 optab optab1, optab2;
0a015fc7 1998 edge ext_def = NULL;
cb4792d2 1999
2000 if (!is_gimple_assign (last_stmt))
2001 return NULL;
2002
2003 rhs_code = gimple_assign_rhs_code (last_stmt);
2004 switch (rhs_code)
2005 {
2006 case LROTATE_EXPR:
2007 case RROTATE_EXPR:
2008 break;
2009 default:
2010 return NULL;
2011 }
2012
cb4792d2 2013 lhs = gimple_assign_lhs (last_stmt);
2014 oprnd0 = gimple_assign_rhs1 (last_stmt);
2015 type = TREE_TYPE (oprnd0);
2016 oprnd1 = gimple_assign_rhs2 (last_stmt);
2017 if (TREE_CODE (oprnd0) != SSA_NAME
2018 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
2019 || !INTEGRAL_TYPE_P (type)
2020 || !TYPE_UNSIGNED (type))
2021 return NULL;
2022
bfa5bad6 2023 stmt_vec_info def_stmt_info;
2024 if (!vect_is_simple_use (oprnd1, vinfo, &dt, &def_stmt_info, &def_stmt))
cb4792d2 2025 return NULL;
2026
2027 if (dt != vect_internal_def
2028 && dt != vect_constant_def
2029 && dt != vect_external_def)
2030 return NULL;
2031
2032 vectype = get_vectype_for_scalar_type (type);
2033 if (vectype == NULL_TREE)
2034 return NULL;
2035
2036 /* If vector/vector or vector/scalar rotate is supported by the target,
2037 don't do anything here. */
2038 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
2039 if (optab1
2040 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2041 return NULL;
2042
e2c5c678 2043 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
cb4792d2 2044 {
2045 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
2046 if (optab2
2047 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
2048 return NULL;
2049 }
2050
2051 /* If vector/vector or vector/scalar shifts aren't supported by the target,
2052 don't do anything here either. */
2053 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2054 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
2055 if (!optab1
2056 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2057 || !optab2
2058 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2059 {
e2c5c678 2060 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
cb4792d2 2061 return NULL;
2062 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
2063 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
2064 if (!optab1
2065 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
2066 || !optab2
2067 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2068 return NULL;
2069 }
2070
cb4792d2 2071 *type_out = vectype;
cb4792d2 2072
0a015fc7 2073 if (dt == vect_external_def
57abb697 2074 && TREE_CODE (oprnd1) == SSA_NAME)
2075 ext_def = vect_get_external_def_edge (vinfo, oprnd1);
0a015fc7 2076
cb4792d2 2077 def = NULL_TREE;
03b7a719 2078 scalar_int_mode mode = SCALAR_INT_TYPE_MODE (type);
cb4792d2 2079 if (TREE_CODE (oprnd1) == INTEGER_CST
03b7a719 2080 || TYPE_MODE (TREE_TYPE (oprnd1)) == mode)
cb4792d2 2081 def = oprnd1;
2082 else if (def_stmt && gimple_assign_cast_p (def_stmt))
2083 {
2084 tree rhs1 = gimple_assign_rhs1 (def_stmt);
03b7a719 2085 if (TYPE_MODE (TREE_TYPE (rhs1)) == mode
cb4792d2 2086 && TYPE_PRECISION (TREE_TYPE (rhs1))
2087 == TYPE_PRECISION (type))
2088 def = rhs1;
2089 }
2090
cb4792d2 2091 if (def == NULL_TREE)
2092 {
2093 def = vect_recog_temp_ssa_var (type, NULL);
e9cf809e 2094 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
0a015fc7 2095 if (ext_def)
2096 {
2097 basic_block new_bb
2098 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2099 gcc_assert (!new_bb);
2100 }
2101 else
2102 append_pattern_def_seq (stmt_vinfo, def_stmt);
cb4792d2 2103 }
2104 stype = TREE_TYPE (def);
03b7a719 2105 scalar_int_mode smode = SCALAR_INT_TYPE_MODE (stype);
cb4792d2 2106
2107 if (TREE_CODE (def) == INTEGER_CST)
2108 {
e913b5cd 2109 if (!tree_fits_uhwi_p (def)
03b7a719 2110 || tree_to_uhwi (def) >= GET_MODE_PRECISION (mode)
cb4792d2 2111 || integer_zerop (def))
2112 return NULL;
2113 def2 = build_int_cst (stype,
03b7a719 2114 GET_MODE_PRECISION (mode) - tree_to_uhwi (def));
cb4792d2 2115 }
2116 else
2117 {
2118 tree vecstype = get_vectype_for_scalar_type (stype);
cb4792d2 2119
2120 if (vecstype == NULL_TREE)
2121 return NULL;
2122 def2 = vect_recog_temp_ssa_var (stype, NULL);
e9cf809e 2123 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
0a015fc7 2124 if (ext_def)
2125 {
2126 basic_block new_bb
2127 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2128 gcc_assert (!new_bb);
2129 }
2130 else
76f769a4 2131 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
cb4792d2 2132
2133 def2 = vect_recog_temp_ssa_var (stype, NULL);
03b7a719 2134 tree mask = build_int_cst (stype, GET_MODE_PRECISION (smode) - 1);
e9cf809e 2135 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
2136 gimple_assign_lhs (def_stmt), mask);
0a015fc7 2137 if (ext_def)
2138 {
2139 basic_block new_bb
2140 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2141 gcc_assert (!new_bb);
2142 }
2143 else
76f769a4 2144 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
cb4792d2 2145 }
2146
2147 var1 = vect_recog_temp_ssa_var (type, NULL);
e9cf809e 2148 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2149 ? LSHIFT_EXPR : RSHIFT_EXPR,
2150 oprnd0, def);
cb4792d2 2151 append_pattern_def_seq (stmt_vinfo, def_stmt);
2152
2153 var2 = vect_recog_temp_ssa_var (type, NULL);
e9cf809e 2154 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2155 ? RSHIFT_EXPR : LSHIFT_EXPR,
2156 oprnd0, def2);
cb4792d2 2157 append_pattern_def_seq (stmt_vinfo, def_stmt);
2158
2159 /* Pattern detected. */
972163bf 2160 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
cb4792d2 2161
2162 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2163 var = vect_recog_temp_ssa_var (type, NULL);
e9cf809e 2164 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
cb4792d2 2165
6083c152 2166 return pattern_stmt;
2167}
45eea33f 2168
40dfedc8 2169/* Detect a vector by vector shift pattern that wouldn't be otherwise
2170 vectorized:
2171
2172 type a_t;
2173 TYPE b_T, res_T;
2174
2175 S1 a_t = ;
2176 S2 b_T = ;
2177 S3 res_T = b_T op a_t;
2178
2179 where type 'TYPE' is a type with different size than 'type',
2180 and op is <<, >> or rotate.
2181
2182 Also detect cases:
2183
2184 type a_t;
2185 TYPE b_T, c_T, res_T;
2186
2187 S0 c_T = ;
2188 S1 a_t = (type) c_T;
2189 S2 b_T = ;
2190 S3 res_T = b_T op a_t;
2191
2192 Input/Output:
2193
7987f646 2194 * STMT_VINFO: The stmt from which the pattern search begins,
40dfedc8 2195 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2196 with a shift/rotate which has same type on both operands, in the
2197 second case just b_T op c_T, in the first case with added cast
18937389 2198 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
40dfedc8 2199
2200 Output:
2201
40dfedc8 2202 * TYPE_OUT: The type of the output of this pattern.
2203
2204 * Return value: A new stmt that will be used to replace the shift/rotate
2205 S3 stmt. */
2206
42acab1c 2207static gimple *
7987f646 2208vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
2209 tree *type_out)
40dfedc8 2210{
7987f646 2211 gimple *last_stmt = stmt_vinfo->stmt;
40dfedc8 2212 tree oprnd0, oprnd1, lhs, var;
c32b0020 2213 gimple *pattern_stmt;
40dfedc8 2214 enum tree_code rhs_code;
e2c5c678 2215 vec_info *vinfo = stmt_vinfo->vinfo;
40dfedc8 2216
2217 if (!is_gimple_assign (last_stmt))
2218 return NULL;
2219
2220 rhs_code = gimple_assign_rhs_code (last_stmt);
2221 switch (rhs_code)
2222 {
2223 case LSHIFT_EXPR:
2224 case RSHIFT_EXPR:
2225 case LROTATE_EXPR:
2226 case RROTATE_EXPR:
2227 break;
2228 default:
2229 return NULL;
2230 }
2231
40dfedc8 2232 lhs = gimple_assign_lhs (last_stmt);
2233 oprnd0 = gimple_assign_rhs1 (last_stmt);
2234 oprnd1 = gimple_assign_rhs2 (last_stmt);
2235 if (TREE_CODE (oprnd0) != SSA_NAME
2236 || TREE_CODE (oprnd1) != SSA_NAME
2237 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
654ba22c 2238 || !type_has_mode_precision_p (TREE_TYPE (oprnd1))
40dfedc8 2239 || TYPE_PRECISION (TREE_TYPE (lhs))
2240 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2241 return NULL;
2242
c32b0020 2243 stmt_vec_info def_vinfo = vect_get_internal_def (vinfo, oprnd1);
2244 if (!def_vinfo)
40dfedc8 2245 return NULL;
2246
1ba54d4e 2247 *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2248 if (*type_out == NULL_TREE)
40dfedc8 2249 return NULL;
2250
5cc2ea45 2251 tree def = NULL_TREE;
c32b0020 2252 gassign *def_stmt = dyn_cast <gassign *> (def_vinfo->stmt);
142c3207 2253 if (def_stmt && gimple_assign_cast_p (def_stmt))
40dfedc8 2254 {
2255 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2256 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2257 && TYPE_PRECISION (TREE_TYPE (rhs1))
2258 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
7e83fd1f 2259 {
2260 if (TYPE_PRECISION (TREE_TYPE (oprnd1))
2261 >= TYPE_PRECISION (TREE_TYPE (rhs1)))
2262 def = rhs1;
2263 else
2264 {
2265 tree mask
2266 = build_low_bits_mask (TREE_TYPE (rhs1),
2267 TYPE_PRECISION (TREE_TYPE (oprnd1)));
2268 def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
2269 def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
76f769a4 2270 tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2271 append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
7e83fd1f 2272 }
2273 }
40dfedc8 2274 }
2275
2276 if (def == NULL_TREE)
2277 {
2278 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
e9cf809e 2279 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
05499cc7 2280 append_pattern_def_seq (stmt_vinfo, def_stmt);
40dfedc8 2281 }
2282
2283 /* Pattern detected. */
972163bf 2284 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt);
40dfedc8 2285
2286 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2287 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
e9cf809e 2288 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
40dfedc8 2289
40dfedc8 2290 return pattern_stmt;
2291}
2292
c5a9fc7b 2293/* Return true iff the target has a vector optab implementing the operation
2294 CODE on type VECTYPE. */
8f282ed2 2295
c5a9fc7b 2296static bool
2297target_has_vecop_for_code (tree_code code, tree vectype)
2298{
2299 optab voptab = optab_for_tree_code (code, vectype, optab_vector);
2300 return voptab
2301 && optab_handler (voptab, TYPE_MODE (vectype)) != CODE_FOR_nothing;
2302}
8f282ed2 2303
c5a9fc7b 2304/* Verify that the target has optabs of VECTYPE to perform all the steps
2305 needed by the multiplication-by-immediate synthesis algorithm described by
2306 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
2307 present. Return true iff the target supports all the steps. */
2308
2309static bool
2310target_supports_mult_synth_alg (struct algorithm *alg, mult_variant var,
2311 tree vectype, bool synth_shift_p)
2312{
2313 if (alg->op[0] != alg_zero && alg->op[0] != alg_m)
2314 return false;
2315
2316 bool supports_vminus = target_has_vecop_for_code (MINUS_EXPR, vectype);
2317 bool supports_vplus = target_has_vecop_for_code (PLUS_EXPR, vectype);
2318
2319 if (var == negate_variant
2320 && !target_has_vecop_for_code (NEGATE_EXPR, vectype))
2321 return false;
2322
2323 /* If we must synthesize shifts with additions make sure that vector
2324 addition is available. */
2325 if ((var == add_variant || synth_shift_p) && !supports_vplus)
2326 return false;
2327
2328 for (int i = 1; i < alg->ops; i++)
2329 {
2330 switch (alg->op[i])
2331 {
2332 case alg_shift:
2333 break;
2334 case alg_add_t_m2:
2335 case alg_add_t2_m:
2336 case alg_add_factor:
2337 if (!supports_vplus)
2338 return false;
2339 break;
2340 case alg_sub_t_m2:
2341 case alg_sub_t2_m:
2342 case alg_sub_factor:
2343 if (!supports_vminus)
2344 return false;
2345 break;
2346 case alg_unknown:
2347 case alg_m:
2348 case alg_zero:
2349 case alg_impossible:
2350 return false;
2351 default:
2352 gcc_unreachable ();
2353 }
2354 }
2355
2356 return true;
2357}
2358
2359/* Synthesize a left shift of OP by AMNT bits using a series of additions and
2360 putting the final result in DEST. Append all statements but the last into
2361 VINFO. Return the last statement. */
2362
2363static gimple *
2364synth_lshift_by_additions (tree dest, tree op, HOST_WIDE_INT amnt,
2365 stmt_vec_info vinfo)
2366{
2367 HOST_WIDE_INT i;
2368 tree itype = TREE_TYPE (op);
2369 tree prev_res = op;
2370 gcc_assert (amnt >= 0);
2371 for (i = 0; i < amnt; i++)
2372 {
2373 tree tmp_var = (i < amnt - 1) ? vect_recog_temp_ssa_var (itype, NULL)
2374 : dest;
2375 gimple *stmt
2376 = gimple_build_assign (tmp_var, PLUS_EXPR, prev_res, prev_res);
2377 prev_res = tmp_var;
2378 if (i < amnt - 1)
2379 append_pattern_def_seq (vinfo, stmt);
2380 else
2381 return stmt;
2382 }
2383 gcc_unreachable ();
2384 return NULL;
2385}
2386
2387/* Helper for vect_synth_mult_by_constant. Apply a binary operation
2388 CODE to operands OP1 and OP2, creating a new temporary SSA var in
2389 the process if necessary. Append the resulting assignment statements
2390 to the sequence in STMT_VINFO. Return the SSA variable that holds the
2391 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
2392 left shifts using additions. */
2393
2394static tree
2395apply_binop_and_append_stmt (tree_code code, tree op1, tree op2,
2396 stmt_vec_info stmt_vinfo, bool synth_shift_p)
2397{
2398 if (integer_zerop (op2)
2399 && (code == LSHIFT_EXPR
2400 || code == PLUS_EXPR))
2401 {
2402 gcc_assert (TREE_CODE (op1) == SSA_NAME);
2403 return op1;
2404 }
2405
2406 gimple *stmt;
2407 tree itype = TREE_TYPE (op1);
2408 tree tmp_var = vect_recog_temp_ssa_var (itype, NULL);
2409
2410 if (code == LSHIFT_EXPR
2411 && synth_shift_p)
2412 {
2413 stmt = synth_lshift_by_additions (tmp_var, op1, TREE_INT_CST_LOW (op2),
2414 stmt_vinfo);
2415 append_pattern_def_seq (stmt_vinfo, stmt);
2416 return tmp_var;
2417 }
2418
2419 stmt = gimple_build_assign (tmp_var, code, op1, op2);
2420 append_pattern_def_seq (stmt_vinfo, stmt);
2421 return tmp_var;
2422}
2423
2424/* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
2425 and simple arithmetic operations to be vectorized. Record the statements
2426 produced in STMT_VINFO and return the last statement in the sequence or
2427 NULL if it's not possible to synthesize such a multiplication.
2428 This function mirrors the behavior of expand_mult_const in expmed.c but
2429 works on tree-ssa form. */
2430
2431static gimple *
2432vect_synth_mult_by_constant (tree op, tree val,
2433 stmt_vec_info stmt_vinfo)
2434{
2435 tree itype = TREE_TYPE (op);
2436 machine_mode mode = TYPE_MODE (itype);
2437 struct algorithm alg;
2438 mult_variant variant;
2439 if (!tree_fits_shwi_p (val))
2440 return NULL;
2441
2442 /* Multiplication synthesis by shifts, adds and subs can introduce
2443 signed overflow where the original operation didn't. Perform the
2444 operations on an unsigned type and cast back to avoid this.
2445 In the future we may want to relax this for synthesis algorithms
2446 that we can prove do not cause unexpected overflow. */
2447 bool cast_to_unsigned_p = !TYPE_OVERFLOW_WRAPS (itype);
2448
2449 tree multtype = cast_to_unsigned_p ? unsigned_type_for (itype) : itype;
8f282ed2 2450
c5a9fc7b 2451 /* Targets that don't support vector shifts but support vector additions
2452 can synthesize shifts that way. */
2453 bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
2454
2455 HOST_WIDE_INT hwval = tree_to_shwi (val);
2456 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
2457 The vectorizer's benefit analysis will decide whether it's beneficial
2458 to do this. */
2459 bool possible = choose_mult_variant (mode, hwval, &alg,
2460 &variant, MAX_COST);
2461 if (!possible)
2462 return NULL;
2463
2464 tree vectype = get_vectype_for_scalar_type (multtype);
2465
2466 if (!vectype
2467 || !target_supports_mult_synth_alg (&alg, variant,
2468 vectype, synth_shift_p))
2469 return NULL;
2470
2471 tree accumulator;
2472
2473 /* Clear out the sequence of statements so we can populate it below. */
c5a9fc7b 2474 gimple *stmt = NULL;
2475
2476 if (cast_to_unsigned_p)
2477 {
2478 tree tmp_op = vect_recog_temp_ssa_var (multtype, NULL);
2479 stmt = gimple_build_assign (tmp_op, CONVERT_EXPR, op);
2480 append_pattern_def_seq (stmt_vinfo, stmt);
2481 op = tmp_op;
2482 }
2483
2484 if (alg.op[0] == alg_zero)
2485 accumulator = build_int_cst (multtype, 0);
2486 else
2487 accumulator = op;
2488
2489 bool needs_fixup = (variant == negate_variant)
2490 || (variant == add_variant);
2491
2492 for (int i = 1; i < alg.ops; i++)
2493 {
2494 tree shft_log = build_int_cst (multtype, alg.log[i]);
2495 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2496 tree tmp_var = NULL_TREE;
2497
2498 switch (alg.op[i])
2499 {
2500 case alg_shift:
2501 if (synth_shift_p)
2502 stmt
2503 = synth_lshift_by_additions (accum_tmp, accumulator, alg.log[i],
2504 stmt_vinfo);
2505 else
2506 stmt = gimple_build_assign (accum_tmp, LSHIFT_EXPR, accumulator,
2507 shft_log);
2508 break;
2509 case alg_add_t_m2:
2510 tmp_var
2511 = apply_binop_and_append_stmt (LSHIFT_EXPR, op, shft_log,
2512 stmt_vinfo, synth_shift_p);
2513 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2514 tmp_var);
2515 break;
2516 case alg_sub_t_m2:
2517 tmp_var = apply_binop_and_append_stmt (LSHIFT_EXPR, op,
2518 shft_log, stmt_vinfo,
2519 synth_shift_p);
2520 /* In some algorithms the first step involves zeroing the
2521 accumulator. If subtracting from such an accumulator
2522 just emit the negation directly. */
2523 if (integer_zerop (accumulator))
2524 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, tmp_var);
2525 else
2526 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, accumulator,
2527 tmp_var);
2528 break;
2529 case alg_add_t2_m:
2530 tmp_var
2531 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2532 stmt_vinfo, synth_shift_p);
2533 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, tmp_var, op);
2534 break;
2535 case alg_sub_t2_m:
2536 tmp_var
2537 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2538 stmt_vinfo, synth_shift_p);
2539 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var, op);
2540 break;
2541 case alg_add_factor:
2542 tmp_var
2543 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2544 stmt_vinfo, synth_shift_p);
2545 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator,
2546 tmp_var);
2547 break;
2548 case alg_sub_factor:
2549 tmp_var
2550 = apply_binop_and_append_stmt (LSHIFT_EXPR, accumulator, shft_log,
2551 stmt_vinfo, synth_shift_p);
2552 stmt = gimple_build_assign (accum_tmp, MINUS_EXPR, tmp_var,
2553 accumulator);
2554 break;
2555 default:
2556 gcc_unreachable ();
2557 }
2558 /* We don't want to append the last stmt in the sequence to stmt_vinfo
2559 but rather return it directly. */
2560
2561 if ((i < alg.ops - 1) || needs_fixup || cast_to_unsigned_p)
2562 append_pattern_def_seq (stmt_vinfo, stmt);
2563 accumulator = accum_tmp;
2564 }
2565 if (variant == negate_variant)
2566 {
2567 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2568 stmt = gimple_build_assign (accum_tmp, NEGATE_EXPR, accumulator);
2569 accumulator = accum_tmp;
2570 if (cast_to_unsigned_p)
2571 append_pattern_def_seq (stmt_vinfo, stmt);
2572 }
2573 else if (variant == add_variant)
2574 {
2575 tree accum_tmp = vect_recog_temp_ssa_var (multtype, NULL);
2576 stmt = gimple_build_assign (accum_tmp, PLUS_EXPR, accumulator, op);
2577 accumulator = accum_tmp;
2578 if (cast_to_unsigned_p)
2579 append_pattern_def_seq (stmt_vinfo, stmt);
2580 }
2581 /* Move back to a signed if needed. */
2582 if (cast_to_unsigned_p)
2583 {
2584 tree accum_tmp = vect_recog_temp_ssa_var (itype, NULL);
2585 stmt = gimple_build_assign (accum_tmp, CONVERT_EXPR, accumulator);
2586 }
2587
2588 return stmt;
2589}
2590
2591/* Detect multiplication by constant and convert it into a sequence of
2592 shifts and additions, subtractions, negations. We reuse the
2593 choose_mult_variant algorithms from expmed.c
8f282ed2 2594
2595 Input/Output:
2596
7987f646 2597 STMT_VINFO: The stmt from which the pattern search begins,
c5a9fc7b 2598 i.e. the mult stmt.
8f282ed2 2599
2600 Output:
2601
8f282ed2 2602 * TYPE_OUT: The type of the output of this pattern.
2603
c5a9fc7b 2604 * Return value: A new stmt that will be used to replace
2605 the multiplication. */
8f282ed2 2606
42acab1c 2607static gimple *
7987f646 2608vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
8f282ed2 2609{
7987f646 2610 gimple *last_stmt = stmt_vinfo->stmt;
8f282ed2 2611 tree oprnd0, oprnd1, vectype, itype;
c5a9fc7b 2612 gimple *pattern_stmt;
8f282ed2 2613
2614 if (!is_gimple_assign (last_stmt))
2615 return NULL;
2616
2617 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2618 return NULL;
2619
2620 oprnd0 = gimple_assign_rhs1 (last_stmt);
2621 oprnd1 = gimple_assign_rhs2 (last_stmt);
2622 itype = TREE_TYPE (oprnd0);
2623
2624 if (TREE_CODE (oprnd0) != SSA_NAME
2625 || TREE_CODE (oprnd1) != INTEGER_CST
2626 || !INTEGRAL_TYPE_P (itype)
654ba22c 2627 || !type_has_mode_precision_p (itype))
8f282ed2 2628 return NULL;
2629
2630 vectype = get_vectype_for_scalar_type (itype);
2631 if (vectype == NULL_TREE)
2632 return NULL;
2633
2634 /* If the target can handle vectorized multiplication natively,
2635 don't attempt to optimize this. */
c5a9fc7b 2636 optab mul_optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2637 if (mul_optab != unknown_optab)
8f282ed2 2638 {
2639 machine_mode vec_mode = TYPE_MODE (vectype);
c5a9fc7b 2640 int icode = (int) optab_handler (mul_optab, vec_mode);
8f282ed2 2641 if (icode != CODE_FOR_nothing)
c5a9fc7b 2642 return NULL;
8f282ed2 2643 }
2644
c5a9fc7b 2645 pattern_stmt = vect_synth_mult_by_constant (oprnd0, oprnd1, stmt_vinfo);
2646 if (!pattern_stmt)
8f282ed2 2647 return NULL;
2648
2649 /* Pattern detected. */
972163bf 2650 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt);
8f282ed2 2651
8f282ed2 2652 *type_out = vectype;
2653
2654 return pattern_stmt;
2655}
2656
127cb1cd 2657/* Detect a signed division by a constant that wouldn't be
18937389 2658 otherwise vectorized:
2659
2660 type a_t, b_t;
2661
2662 S1 a_t = b_t / N;
2663
127cb1cd 2664 where type 'type' is an integral type and N is a constant.
18937389 2665
127cb1cd 2666 Similarly handle modulo by a constant:
18937389 2667
2668 S4 a_t = b_t % N;
2669
2670 Input/Output:
2671
7987f646 2672 * STMT_VINFO: The stmt from which the pattern search begins,
127cb1cd 2673 i.e. the division stmt. S1 is replaced by if N is a power
2674 of two constant and type is signed:
18937389 2675 S3 y_t = b_t < 0 ? N - 1 : 0;
2676 S2 x_t = b_t + y_t;
2677 S1' a_t = x_t >> log2 (N);
2678
127cb1cd 2679 S4 is replaced if N is a power of two constant and
2680 type is signed by (where *_T temporaries have unsigned type):
18937389 2681 S9 y_T = b_t < 0 ? -1U : 0U;
2682 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2683 S7 z_t = (type) z_T;
2684 S6 w_t = b_t + z_t;
2685 S5 x_t = w_t & (N - 1);
2686 S4' a_t = x_t - z_t;
2687
2688 Output:
2689
18937389 2690 * TYPE_OUT: The type of the output of this pattern.
2691
2692 * Return value: A new stmt that will be used to replace the division
2693 S1 or modulo S4 stmt. */
2694
42acab1c 2695static gimple *
7987f646 2696vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
18937389 2697{
7987f646 2698 gimple *last_stmt = stmt_vinfo->stmt;
3af51fe9 2699 tree oprnd0, oprnd1, vectype, itype, cond;
42acab1c 2700 gimple *pattern_stmt, *def_stmt;
18937389 2701 enum tree_code rhs_code;
18937389 2702 optab optab;
ebf4f764 2703 tree q;
127cb1cd 2704 int dummy_int, prec;
18937389 2705
2706 if (!is_gimple_assign (last_stmt))
2707 return NULL;
2708
2709 rhs_code = gimple_assign_rhs_code (last_stmt);
2710 switch (rhs_code)
2711 {
2712 case TRUNC_DIV_EXPR:
37f29597 2713 case EXACT_DIV_EXPR:
18937389 2714 case TRUNC_MOD_EXPR:
2715 break;
2716 default:
2717 return NULL;
2718 }
2719
18937389 2720 oprnd0 = gimple_assign_rhs1 (last_stmt);
2721 oprnd1 = gimple_assign_rhs2 (last_stmt);
2722 itype = TREE_TYPE (oprnd0);
2723 if (TREE_CODE (oprnd0) != SSA_NAME
2724 || TREE_CODE (oprnd1) != INTEGER_CST
2725 || TREE_CODE (itype) != INTEGER_TYPE
654ba22c 2726 || !type_has_mode_precision_p (itype))
18937389 2727 return NULL;
2728
03b7a719 2729 scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
18937389 2730 vectype = get_vectype_for_scalar_type (itype);
2731 if (vectype == NULL_TREE)
2732 return NULL;
2733
1c91e6c2 2734 if (optimize_bb_for_size_p (gimple_bb (last_stmt)))
18937389 2735 {
1c91e6c2 2736 /* If the target can handle vectorized division or modulo natively,
2737 don't attempt to optimize this, since native division is likely
2738 to give smaller code. */
2739 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2740 if (optab != unknown_optab)
2741 {
2742 machine_mode vec_mode = TYPE_MODE (vectype);
2743 int icode = (int) optab_handler (optab, vec_mode);
2744 if (icode != CODE_FOR_nothing)
2745 return NULL;
2746 }
18937389 2747 }
2748
127cb1cd 2749 prec = TYPE_PRECISION (itype);
2750 if (integer_pow2p (oprnd1))
18937389 2751 {
127cb1cd 2752 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2753 return NULL;
18937389 2754
127cb1cd 2755 /* Pattern detected. */
972163bf 2756 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
127cb1cd 2757
2758 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2759 build_int_cst (itype, 0));
37f29597 2760 if (rhs_code == TRUNC_DIV_EXPR
2761 || rhs_code == EXACT_DIV_EXPR)
127cb1cd 2762 {
2763 tree var = vect_recog_temp_ssa_var (itype, NULL);
2764 tree shift;
2765 def_stmt
e9cf809e 2766 = gimple_build_assign (var, COND_EXPR, cond,
2767 fold_build2 (MINUS_EXPR, itype, oprnd1,
2768 build_int_cst (itype, 1)),
2769 build_int_cst (itype, 0));
05499cc7 2770 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 2771 var = vect_recog_temp_ssa_var (itype, NULL);
2772 def_stmt
e9cf809e 2773 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2774 gimple_assign_lhs (def_stmt));
127cb1cd 2775 append_pattern_def_seq (stmt_vinfo, def_stmt);
2776
2777 shift = build_int_cst (itype, tree_log2 (oprnd1));
2778 pattern_stmt
e9cf809e 2779 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2780 RSHIFT_EXPR, var, shift);
127cb1cd 2781 }
2782 else
2783 {
2784 tree signmask;
127cb1cd 2785 if (compare_tree_int (oprnd1, 2) == 0)
2786 {
2787 signmask = vect_recog_temp_ssa_var (itype, NULL);
e9cf809e 2788 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2789 build_int_cst (itype, 1),
2790 build_int_cst (itype, 0));
127cb1cd 2791 append_pattern_def_seq (stmt_vinfo, def_stmt);
2792 }
2793 else
2794 {
2795 tree utype
2796 = build_nonstandard_integer_type (prec, 1);
2797 tree vecutype = get_vectype_for_scalar_type (utype);
2798 tree shift
03b7a719 2799 = build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
127cb1cd 2800 - tree_log2 (oprnd1));
2801 tree var = vect_recog_temp_ssa_var (utype, NULL);
2802
e9cf809e 2803 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2804 build_int_cst (utype, -1),
2805 build_int_cst (utype, 0));
76f769a4 2806 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
127cb1cd 2807 var = vect_recog_temp_ssa_var (utype, NULL);
e9cf809e 2808 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2809 gimple_assign_lhs (def_stmt),
2810 shift);
76f769a4 2811 append_pattern_def_seq (stmt_vinfo, def_stmt, vecutype);
127cb1cd 2812 signmask = vect_recog_temp_ssa_var (itype, NULL);
2813 def_stmt
e9cf809e 2814 = gimple_build_assign (signmask, NOP_EXPR, var);
127cb1cd 2815 append_pattern_def_seq (stmt_vinfo, def_stmt);
2816 }
2817 def_stmt
e9cf809e 2818 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2819 PLUS_EXPR, oprnd0, signmask);
127cb1cd 2820 append_pattern_def_seq (stmt_vinfo, def_stmt);
2821 def_stmt
e9cf809e 2822 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2823 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2824 fold_build2 (MINUS_EXPR, itype, oprnd1,
2825 build_int_cst (itype, 1)));
127cb1cd 2826 append_pattern_def_seq (stmt_vinfo, def_stmt);
2827
2828 pattern_stmt
e9cf809e 2829 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2830 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2831 signmask);
127cb1cd 2832 }
2833
127cb1cd 2834 *type_out = vectype;
2835 return pattern_stmt;
18937389 2836 }
127cb1cd 2837
20448fd9 2838 if (prec > HOST_BITS_PER_WIDE_INT
2839 || integer_zerop (oprnd1))
127cb1cd 2840 return NULL;
2841
ebf4f764 2842 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2843 return NULL;
127cb1cd 2844
127cb1cd 2845 if (TYPE_UNSIGNED (itype))
18937389 2846 {
127cb1cd 2847 unsigned HOST_WIDE_INT mh, ml;
2848 int pre_shift, post_shift;
f9ae6f95 2849 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
03b7a719 2850 & GET_MODE_MASK (itype_mode));
3af51fe9 2851 tree t1, t2, t3, t4;
127cb1cd 2852
edc19fd0 2853 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
127cb1cd 2854 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2855 return NULL;
2856
2857 /* Find a suitable multiplier and right shift count
2858 instead of multiplying with D. */
2859 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2860
2861 /* If the suggested multiplier is more than SIZE bits, we can do better
2862 for even divisors, using an initial right shift. */
2863 if (mh != 0 && (d & 1) == 0)
18937389 2864 {
ac29ece2 2865 pre_shift = ctz_or_zero (d);
127cb1cd 2866 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2867 &ml, &post_shift, &dummy_int);
2868 gcc_assert (!mh);
2869 }
2870 else
2871 pre_shift = 0;
2872
2873 if (mh != 0)
2874 {
2875 if (post_shift - 1 >= prec)
2876 return NULL;
2877
3af51fe9 2878 /* t1 = oprnd0 h* ml;
2879 t2 = oprnd0 - t1;
2880 t3 = t2 >> 1;
2881 t4 = t1 + t3;
2882 q = t4 >> (post_shift - 1); */
2883 t1 = vect_recog_temp_ssa_var (itype, NULL);
e9cf809e 2884 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2885 build_int_cst (itype, ml));
127cb1cd 2886 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 2887
3af51fe9 2888 t2 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 2889 def_stmt
e9cf809e 2890 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
6d741312 2891 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 2892
2893 t3 = vect_recog_temp_ssa_var (itype, NULL);
2894 def_stmt
e9cf809e 2895 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
127cb1cd 2896 append_pattern_def_seq (stmt_vinfo, def_stmt);
2897
3af51fe9 2898 t4 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 2899 def_stmt
e9cf809e 2900 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
127cb1cd 2901
2902 if (post_shift != 1)
2903 {
2904 append_pattern_def_seq (stmt_vinfo, def_stmt);
2905
3af51fe9 2906 q = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 2907 pattern_stmt
e9cf809e 2908 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2909 build_int_cst (itype, post_shift - 1));
127cb1cd 2910 }
2911 else
2912 {
3af51fe9 2913 q = t4;
127cb1cd 2914 pattern_stmt = def_stmt;
2915 }
18937389 2916 }
2917 else
2918 {
127cb1cd 2919 if (pre_shift >= prec || post_shift >= prec)
2920 return NULL;
2921
2922 /* t1 = oprnd0 >> pre_shift;
3af51fe9 2923 t2 = t1 h* ml;
2924 q = t2 >> post_shift; */
127cb1cd 2925 if (pre_shift)
2926 {
2927 t1 = vect_recog_temp_ssa_var (itype, NULL);
2928 def_stmt
e9cf809e 2929 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2930 build_int_cst (NULL, pre_shift));
127cb1cd 2931 append_pattern_def_seq (stmt_vinfo, def_stmt);
2932 }
2933 else
2934 t1 = oprnd0;
18937389 2935
3af51fe9 2936 t2 = vect_recog_temp_ssa_var (itype, NULL);
e9cf809e 2937 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2938 build_int_cst (itype, ml));
127cb1cd 2939
3af51fe9 2940 if (post_shift)
2941 {
2942 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 2943
3af51fe9 2944 q = vect_recog_temp_ssa_var (itype, NULL);
2945 def_stmt
e9cf809e 2946 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2947 build_int_cst (itype, post_shift));
3af51fe9 2948 }
2949 else
2950 q = t2;
2951
2952 pattern_stmt = def_stmt;
127cb1cd 2953 }
2954 }
2955 else
2956 {
2957 unsigned HOST_WIDE_INT ml;
60420e1c 2958 int post_shift;
f9ae6f95 2959 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
127cb1cd 2960 unsigned HOST_WIDE_INT abs_d;
2961 bool add = false;
3af51fe9 2962 tree t1, t2, t3, t4;
127cb1cd 2963
2964 /* Give up for -1. */
2965 if (d == -1)
2966 return NULL;
2967
127cb1cd 2968 /* Since d might be INT_MIN, we have to cast to
2969 unsigned HOST_WIDE_INT before negating to avoid
2970 undefined signed overflow. */
2971 abs_d = (d >= 0
2972 ? (unsigned HOST_WIDE_INT) d
2973 : - (unsigned HOST_WIDE_INT) d);
2974
2975 /* n rem d = n rem -d */
2976 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2977 {
2978 d = abs_d;
2979 oprnd1 = build_int_cst (itype, abs_d);
2980 }
2981 else if (HOST_BITS_PER_WIDE_INT >= prec
edc19fd0 2982 && abs_d == HOST_WIDE_INT_1U << (prec - 1))
127cb1cd 2983 /* This case is not handled correctly below. */
2984 return NULL;
2985
60420e1c 2986 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
edc19fd0 2987 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
127cb1cd 2988 {
2989 add = true;
7097b942 2990 ml |= HOST_WIDE_INT_M1U << (prec - 1);
127cb1cd 2991 }
2992 if (post_shift >= prec)
2993 return NULL;
2994
de6af029 2995 /* t1 = oprnd0 h* ml; */
3af51fe9 2996 t1 = vect_recog_temp_ssa_var (itype, NULL);
e9cf809e 2997 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2998 build_int_cst (itype, ml));
127cb1cd 2999
3000 if (add)
3001 {
3af51fe9 3002 /* t2 = t1 + oprnd0; */
de6af029 3003 append_pattern_def_seq (stmt_vinfo, def_stmt);
3af51fe9 3004 t2 = vect_recog_temp_ssa_var (itype, NULL);
e9cf809e 3005 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
127cb1cd 3006 }
3007 else
3af51fe9 3008 t2 = t1;
127cb1cd 3009
3af51fe9 3010 if (post_shift)
127cb1cd 3011 {
3af51fe9 3012 /* t3 = t2 >> post_shift; */
de6af029 3013 append_pattern_def_seq (stmt_vinfo, def_stmt);
3af51fe9 3014 t3 = vect_recog_temp_ssa_var (itype, NULL);
e9cf809e 3015 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
3016 build_int_cst (itype, post_shift));
18937389 3017 }
127cb1cd 3018 else
3af51fe9 3019 t3 = t2;
127cb1cd 3020
9c1be15e 3021 wide_int oprnd0_min, oprnd0_max;
de6af029 3022 int msb = 1;
3023 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
3024 {
9c1be15e 3025 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
de6af029 3026 msb = 0;
9c1be15e 3027 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
de6af029 3028 msb = -1;
3029 }
127cb1cd 3030
de6af029 3031 if (msb == 0 && d >= 0)
3032 {
3033 /* q = t3; */
3034 q = t3;
3035 pattern_stmt = def_stmt;
3036 }
3037 else
3038 {
3039 /* t4 = oprnd0 >> (prec - 1);
3040 or if we know from VRP that oprnd0 >= 0
3041 t4 = 0;
3042 or if we know from VRP that oprnd0 < 0
3043 t4 = -1; */
3044 append_pattern_def_seq (stmt_vinfo, def_stmt);
3045 t4 = vect_recog_temp_ssa_var (itype, NULL);
3046 if (msb != 1)
e9cf809e 3047 def_stmt = gimple_build_assign (t4, INTEGER_CST,
3048 build_int_cst (itype, msb));
de6af029 3049 else
e9cf809e 3050 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
3051 build_int_cst (itype, prec - 1));
de6af029 3052 append_pattern_def_seq (stmt_vinfo, def_stmt);
3053
3054 /* q = t3 - t4; or q = t4 - t3; */
3055 q = vect_recog_temp_ssa_var (itype, NULL);
e9cf809e 3056 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
3057 d < 0 ? t3 : t4);
de6af029 3058 }
127cb1cd 3059 }
3060
3061 if (rhs_code == TRUNC_MOD_EXPR)
3062 {
3063 tree r, t1;
3064
3065 /* We divided. Now finish by:
3066 t1 = q * oprnd1;
3067 r = oprnd0 - t1; */
3068 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3069
3070 t1 = vect_recog_temp_ssa_var (itype, NULL);
e9cf809e 3071 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
6d741312 3072 append_pattern_def_seq (stmt_vinfo, def_stmt);
18937389 3073
127cb1cd 3074 r = vect_recog_temp_ssa_var (itype, NULL);
e9cf809e 3075 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
18937389 3076 }
3077
127cb1cd 3078 /* Pattern detected. */
972163bf 3079 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt);
18937389 3080
18937389 3081 *type_out = vectype;
3082 return pattern_stmt;
3083}
3084
84557284 3085/* Function vect_recog_mixed_size_cond_pattern
3086
3087 Try to find the following pattern:
3088
3089 type x_t, y_t;
3090 TYPE a_T, b_T, c_T;
3091 loop:
3092 S1 a_T = x_t CMP y_t ? b_T : c_T;
3093
3094 where type 'TYPE' is an integral type which has different size
087903db 3095 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
84557284 3096 than 'type', the constants need to fit into an integer type
087903db 3097 with the same width as 'type') or results of conversion from 'type'.
84557284 3098
3099 Input:
3100
7987f646 3101 * STMT_VINFO: The stmt from which the pattern search begins.
84557284 3102
3103 Output:
3104
84557284 3105 * TYPE_OUT: The type of the output of this pattern.
3106
3107 * Return value: A new stmt that will be used to replace the pattern.
3108 Additionally a def_stmt is added.
3109
3110 a_it = x_t CMP y_t ? b_it : c_it;
3111 a_T = (TYPE) a_it; */
3112
42acab1c 3113static gimple *
7987f646 3114vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
84557284 3115{
7987f646 3116 gimple *last_stmt = stmt_vinfo->stmt;
84557284 3117 tree cond_expr, then_clause, else_clause;
087903db 3118 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
42acab1c 3119 gimple *pattern_stmt, *def_stmt;
087903db 3120 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
42acab1c 3121 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
087903db 3122 bool promotion;
3123 tree comp_scalar_type;
84557284 3124
3125 if (!is_gimple_assign (last_stmt)
3126 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
3127 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
3128 return NULL;
3129
3130 cond_expr = gimple_assign_rhs1 (last_stmt);
3131 then_clause = gimple_assign_rhs2 (last_stmt);
3132 else_clause = gimple_assign_rhs3 (last_stmt);
3133
f39dd90c 3134 if (!COMPARISON_CLASS_P (cond_expr))
3135 return NULL;
3136
087903db 3137 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
3138 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
f39dd90c 3139 if (comp_vectype == NULL_TREE)
84557284 3140 return NULL;
3141
3142 type = gimple_expr_type (last_stmt);
087903db 3143 if (types_compatible_p (type, comp_scalar_type)
3144 || ((TREE_CODE (then_clause) != INTEGER_CST
3145 || TREE_CODE (else_clause) != INTEGER_CST)
3146 && !INTEGRAL_TYPE_P (comp_scalar_type))
3147 || !INTEGRAL_TYPE_P (type))
3148 return NULL;
3149
3150 if ((TREE_CODE (then_clause) != INTEGER_CST
a73182ff 3151 && !type_conversion_p (then_clause, stmt_vinfo, false, &orig_type0,
3152 &def_stmt0, &promotion))
087903db 3153 || (TREE_CODE (else_clause) != INTEGER_CST
a73182ff 3154 && !type_conversion_p (else_clause, stmt_vinfo, false, &orig_type1,
3155 &def_stmt1, &promotion)))
087903db 3156 return NULL;
3157
3158 if (orig_type0 && orig_type1
3159 && !types_compatible_p (orig_type0, orig_type1))
3160 return NULL;
3161
3162 if (orig_type0)
3163 {
3164 if (!types_compatible_p (orig_type0, comp_scalar_type))
3165 return NULL;
3166 then_clause = gimple_assign_rhs1 (def_stmt0);
3167 itype = orig_type0;
3168 }
3169
3170 if (orig_type1)
3171 {
3172 if (!types_compatible_p (orig_type1, comp_scalar_type))
3173 return NULL;
3174 else_clause = gimple_assign_rhs1 (def_stmt1);
3175 itype = orig_type1;
3176 }
3177
84557284 3178
0436eae5 3179 HOST_WIDE_INT cmp_mode_size
3180 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
3181
03b7a719 3182 scalar_int_mode type_mode = SCALAR_INT_TYPE_MODE (type);
3183 if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
84557284 3184 return NULL;
3185
3186 vectype = get_vectype_for_scalar_type (type);
3187 if (vectype == NULL_TREE)
3188 return NULL;
3189
6a2e2a85 3190 if (expand_vec_cond_expr_p (vectype, comp_vectype, TREE_CODE (cond_expr)))
84557284 3191 return NULL;
3192
087903db 3193 if (itype == NULL_TREE)
0436eae5 3194 itype = build_nonstandard_integer_type (cmp_mode_size,
087903db 3195 TYPE_UNSIGNED (type));
3196
84557284 3197 if (itype == NULL_TREE
3d2b0034 3198 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
84557284 3199 return NULL;
3200
3201 vecitype = get_vectype_for_scalar_type (itype);
3202 if (vecitype == NULL_TREE)
3203 return NULL;
3204
6a2e2a85 3205 if (!expand_vec_cond_expr_p (vecitype, comp_vectype, TREE_CODE (cond_expr)))
84557284 3206 return NULL;
3207
03b7a719 3208 if (GET_MODE_BITSIZE (type_mode) > cmp_mode_size)
84557284 3209 {
087903db 3210 if ((TREE_CODE (then_clause) == INTEGER_CST
3211 && !int_fits_type_p (then_clause, itype))
3212 || (TREE_CODE (else_clause) == INTEGER_CST
3213 && !int_fits_type_p (else_clause, itype)))
84557284 3214 return NULL;
3215 }
3216
e9cf809e 3217 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3218 COND_EXPR, unshare_expr (cond_expr),
3219 fold_convert (itype, then_clause),
3220 fold_convert (itype, else_clause));
3221 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3222 NOP_EXPR, gimple_assign_lhs (def_stmt));
84557284 3223
76f769a4 3224 append_pattern_def_seq (stmt_vinfo, def_stmt, vecitype);
84557284 3225 *type_out = vectype;
3226
972163bf 3227 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt);
4c0c783a 3228
84557284 3229 return pattern_stmt;
3230}
3231
3232
50f85e2e 3233/* Helper function of vect_recog_bool_pattern. Called recursively, return
dab48979 3234 true if bool VAR can and should be optimized that way. Assume it shouldn't
3235 in case it's a result of a comparison which can be directly vectorized into
6172a9fd 3236 a vector comparison. Fills in STMTS with all stmts visited during the
3237 walk. */
50f85e2e 3238
3239static bool
6172a9fd 3240check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
50f85e2e 3241{
5cc2ea45 3242 tree rhs1;
50f85e2e 3243 enum tree_code rhs_code;
3244
c32b0020 3245 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3246 if (!def_stmt_info)
50f85e2e 3247 return false;
3248
c32b0020 3249 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3250 if (!def_stmt)
50f85e2e 3251 return false;
3252
6172a9fd 3253 if (stmts.contains (def_stmt))
3254 return true;
50f85e2e 3255
3256 rhs1 = gimple_assign_rhs1 (def_stmt);
3257 rhs_code = gimple_assign_rhs_code (def_stmt);
3258 switch (rhs_code)
3259 {
3260 case SSA_NAME:
6172a9fd 3261 if (! check_bool_pattern (rhs1, vinfo, stmts))
3262 return false;
3263 break;
50f85e2e 3264
3265 CASE_CONVERT:
69fcaae3 3266 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
50f85e2e 3267 return false;
6172a9fd 3268 if (! check_bool_pattern (rhs1, vinfo, stmts))
3269 return false;
3270 break;
50f85e2e 3271
3272 case BIT_NOT_EXPR:
6172a9fd 3273 if (! check_bool_pattern (rhs1, vinfo, stmts))
3274 return false;
3275 break;
50f85e2e 3276
3277 case BIT_AND_EXPR:
3278 case BIT_IOR_EXPR:
3279 case BIT_XOR_EXPR:
6172a9fd 3280 if (! check_bool_pattern (rhs1, vinfo, stmts)
3281 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo, stmts))
50f85e2e 3282 return false;
6172a9fd 3283 break;
50f85e2e 3284
3285 default:
3286 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3287 {
6172a9fd 3288 tree vecitype, comp_vectype;
50f85e2e 3289
3a542b98 3290 /* If the comparison can throw, then is_gimple_condexpr will be
3291 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
aac19106 3292 if (stmt_could_throw_p (cfun, def_stmt))
3a542b98 3293 return false;
3294
50f85e2e 3295 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3296 if (comp_vectype == NULL_TREE)
3297 return false;
3298
6172a9fd 3299 tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
dab48979 3300 if (mask_type
6a2e2a85 3301 && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
dab48979 3302 return false;
3303
50f85e2e 3304 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
3305 {
3d2b0034 3306 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
50f85e2e 3307 tree itype
d6152abc 3308 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
50f85e2e 3309 vecitype = get_vectype_for_scalar_type (itype);
3310 if (vecitype == NULL_TREE)
3311 return false;
3312 }
3313 else
3314 vecitype = comp_vectype;
6a2e2a85 3315 if (! expand_vec_cond_expr_p (vecitype, comp_vectype, rhs_code))
6172a9fd 3316 return false;
50f85e2e 3317 }
6172a9fd 3318 else
3319 return false;
3320 break;
50f85e2e 3321 }
6172a9fd 3322
3323 bool res = stmts.add (def_stmt);
3324 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
3325 gcc_assert (!res);
3326
3327 return true;
50f85e2e 3328}
3329
3330
3331/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
6172a9fd 3332 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
3333 pattern sequence. */
50f85e2e 3334
3335static tree
6172a9fd 3336adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
50f85e2e 3337{
6172a9fd 3338 gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
3339 NOP_EXPR, var);
76f769a4 3340 append_pattern_def_seq (stmt_info, cast_stmt,
3341 get_vectype_for_scalar_type (type));
50f85e2e 3342 return gimple_assign_lhs (cast_stmt);
3343}
3344
6172a9fd 3345/* Helper function of vect_recog_bool_pattern. Do the actual transformations.
3346 VAR is an SSA_NAME that should be transformed from bool to a wider integer
3347 type, OUT_TYPE is the desired final integer type of the whole pattern.
3348 STMT_INFO is the info of the pattern root and is where pattern stmts should
3349 be associated with. DEFS is a map of pattern defs. */
50f85e2e 3350
6172a9fd 3351static void
3352adjust_bool_pattern (tree var, tree out_type,
3353 stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
50f85e2e 3354{
42acab1c 3355 gimple *stmt = SSA_NAME_DEF_STMT (var);
50f85e2e 3356 enum tree_code rhs_code, def_rhs_code;
3357 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
3358 location_t loc;
42acab1c 3359 gimple *pattern_stmt, *def_stmt;
6172a9fd 3360 tree trueval = NULL_TREE;
50f85e2e 3361
3362 rhs1 = gimple_assign_rhs1 (stmt);
3363 rhs2 = gimple_assign_rhs2 (stmt);
3364 rhs_code = gimple_assign_rhs_code (stmt);
3365 loc = gimple_location (stmt);
3366 switch (rhs_code)
3367 {
3368 case SSA_NAME:
3369 CASE_CONVERT:
6172a9fd 3370 irhs1 = *defs.get (rhs1);
50f85e2e 3371 itype = TREE_TYPE (irhs1);
3372 pattern_stmt
e9cf809e 3373 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3374 SSA_NAME, irhs1);
50f85e2e 3375 break;
3376
3377 case BIT_NOT_EXPR:
6172a9fd 3378 irhs1 = *defs.get (rhs1);
50f85e2e 3379 itype = TREE_TYPE (irhs1);
3380 pattern_stmt
e9cf809e 3381 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3382 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
50f85e2e 3383 break;
3384
3385 case BIT_AND_EXPR:
3386 /* Try to optimize x = y & (a < b ? 1 : 0); into
3387 x = (a < b ? y : 0);
3388
3389 E.g. for:
3390 bool a_b, b_b, c_b;
3391 TYPE d_T;
3392
3393 S1 a_b = x1 CMP1 y1;
3394 S2 b_b = x2 CMP2 y2;
3395 S3 c_b = a_b & b_b;
3396 S4 d_T = (TYPE) c_b;
3397
3398 we would normally emit:
3399
3400 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3401 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3402 S3' c_T = a_T & b_T;
3403 S4' d_T = c_T;
3404
3405 but we can save one stmt by using the
3406 result of one of the COND_EXPRs in the other COND_EXPR and leave
3407 BIT_AND_EXPR stmt out:
3408
3409 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3410 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3411 S4' f_T = c_T;
3412
3413 At least when VEC_COND_EXPR is implemented using masks
3414 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3415 computes the comparison masks and ands it, in one case with
3416 all ones vector, in the other case with a vector register.
3417 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3418 often more expensive. */
3419 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3420 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3421 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3422 {
6172a9fd 3423 irhs1 = *defs.get (rhs1);
50f85e2e 3424 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
50f85e2e 3425 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3d2b0034 3426 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
50f85e2e 3427 {
6172a9fd 3428 rhs_code = def_rhs_code;
3429 rhs1 = def_rhs1;
3430 rhs2 = gimple_assign_rhs2 (def_stmt);
3431 trueval = irhs1;
3432 goto do_compare;
50f85e2e 3433 }
3434 else
6172a9fd 3435 irhs2 = *defs.get (rhs2);
50f85e2e 3436 goto and_ior_xor;
3437 }
3438 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3439 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3440 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3441 {
6172a9fd 3442 irhs2 = *defs.get (rhs2);
50f85e2e 3443 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
50f85e2e 3444 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3d2b0034 3445 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1))))
50f85e2e 3446 {
6172a9fd 3447 rhs_code = def_rhs_code;
3448 rhs1 = def_rhs1;
3449 rhs2 = gimple_assign_rhs2 (def_stmt);
3450 trueval = irhs2;
3451 goto do_compare;
50f85e2e 3452 }
3453 else
6172a9fd 3454 irhs1 = *defs.get (rhs1);
50f85e2e 3455 goto and_ior_xor;
3456 }
3457 /* FALLTHRU */
3458 case BIT_IOR_EXPR:
3459 case BIT_XOR_EXPR:
6172a9fd 3460 irhs1 = *defs.get (rhs1);
3461 irhs2 = *defs.get (rhs2);
50f85e2e 3462 and_ior_xor:
3463 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3464 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3465 {
3466 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3467 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3468 int out_prec = TYPE_PRECISION (out_type);
3469 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
6172a9fd 3470 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), irhs2,
3471 stmt_info);
50f85e2e 3472 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
6172a9fd 3473 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), irhs1,
3474 stmt_info);
50f85e2e 3475 else
3476 {
6172a9fd 3477 irhs1 = adjust_bool_pattern_cast (out_type, irhs1, stmt_info);
3478 irhs2 = adjust_bool_pattern_cast (out_type, irhs2, stmt_info);
50f85e2e 3479 }
3480 }
3481 itype = TREE_TYPE (irhs1);
3482 pattern_stmt
e9cf809e 3483 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3484 rhs_code, irhs1, irhs2);
50f85e2e 3485 break;
3486
3487 default:
6172a9fd 3488 do_compare:
50f85e2e 3489 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3490 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
78fb8a4f 3491 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
eafbcd13 3492 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1)),
3493 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
50f85e2e 3494 {
3d2b0034 3495 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
50f85e2e 3496 itype
d6152abc 3497 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
50f85e2e 3498 }
3499 else
3500 itype = TREE_TYPE (rhs1);
3501 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3502 if (trueval == NULL_TREE)
3503 trueval = build_int_cst (itype, 1);
3504 else
3505 gcc_checking_assert (useless_type_conversion_p (itype,
3506 TREE_TYPE (trueval)));
3507 pattern_stmt
e9cf809e 3508 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3509 COND_EXPR, cond_expr, trueval,
3510 build_int_cst (itype, 0));
50f85e2e 3511 break;
3512 }
3513
50f85e2e 3514 gimple_set_location (pattern_stmt, loc);
76f769a4 3515 append_pattern_def_seq (stmt_info, pattern_stmt,
3516 get_vectype_for_scalar_type (itype));
6172a9fd 3517 defs.put (var, gimple_assign_lhs (pattern_stmt));
3518}
3519
3520/* Comparison function to qsort a vector of gimple stmts after UID. */
3521
3522static int
3523sort_after_uid (const void *p1, const void *p2)
3524{
3525 const gimple *stmt1 = *(const gimple * const *)p1;
3526 const gimple *stmt2 = *(const gimple * const *)p2;
3527 return gimple_uid (stmt1) - gimple_uid (stmt2);
50f85e2e 3528}
3529
6172a9fd 3530/* Create pattern stmts for all stmts participating in the bool pattern
ecc42a77 3531 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
6172a9fd 3532 OUT_TYPE. Return the def of the pattern root. */
3533
3534static tree
3535adjust_bool_stmts (hash_set <gimple *> &bool_stmt_set,
ecc42a77 3536 tree out_type, stmt_vec_info stmt_info)
6172a9fd 3537{
3538 /* Gather original stmts in the bool pattern in their order of appearance
3539 in the IL. */
3540 auto_vec<gimple *> bool_stmts (bool_stmt_set.elements ());
3541 for (hash_set <gimple *>::iterator i = bool_stmt_set.begin ();
3542 i != bool_stmt_set.end (); ++i)
3543 bool_stmts.quick_push (*i);
3544 bool_stmts.qsort (sort_after_uid);
3545
3546 /* Now process them in that order, producing pattern stmts. */
3547 hash_map <tree, tree> defs;
3548 for (unsigned i = 0; i < bool_stmts.length (); ++i)
3549 adjust_bool_pattern (gimple_assign_lhs (bool_stmts[i]),
1c2fef9a 3550 out_type, stmt_info, defs);
6172a9fd 3551
3552 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
3553 gimple *pattern_stmt
1c2fef9a 3554 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
6172a9fd 3555 return gimple_assign_lhs (pattern_stmt);
3556}
50f85e2e 3557
adca2d16 3558/* Helper for search_type_for_mask. */
dab48979 3559
3560static tree
adca2d16 3561search_type_for_mask_1 (tree var, vec_info *vinfo,
3562 hash_map<gimple *, tree> &cache)
dab48979 3563{
dab48979 3564 tree rhs1;
3565 enum tree_code rhs_code;
959c4b00 3566 tree res = NULL_TREE, res2;
dab48979 3567
69fcaae3 3568 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
dab48979 3569 return NULL_TREE;
3570
c32b0020 3571 stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
3572 if (!def_stmt_info)
dab48979 3573 return NULL_TREE;
3574
c32b0020 3575 gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
3576 if (!def_stmt)
dab48979 3577 return NULL_TREE;
3578
adca2d16 3579 tree *c = cache.get (def_stmt);
3580 if (c)
3581 return *c;
3582
dab48979 3583 rhs_code = gimple_assign_rhs_code (def_stmt);
3584 rhs1 = gimple_assign_rhs1 (def_stmt);
3585
3586 switch (rhs_code)
3587 {
3588 case SSA_NAME:
3589 case BIT_NOT_EXPR:
3590 CASE_CONVERT:
adca2d16 3591 res = search_type_for_mask_1 (rhs1, vinfo, cache);
dab48979 3592 break;
3593
3594 case BIT_AND_EXPR:
3595 case BIT_IOR_EXPR:
3596 case BIT_XOR_EXPR:
adca2d16 3597 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3598 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
3599 cache);
959c4b00 3600 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3601 res = res2;
dab48979 3602 break;
3603
3604 default:
3605 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3606 {
959c4b00 3607 tree comp_vectype, mask_type;
3608
69fcaae3 3609 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
268fbd56 3610 {
adca2d16 3611 res = search_type_for_mask_1 (rhs1, vinfo, cache);
3612 res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
3613 vinfo, cache);
268fbd56 3614 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3615 res = res2;
3616 break;
3617 }
3618
959c4b00 3619 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3620 if (comp_vectype == NULL_TREE)
adca2d16 3621 {
3622 res = NULL_TREE;
3623 break;
3624 }
959c4b00 3625
3626 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3627 if (!mask_type
6a2e2a85 3628 || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
adca2d16 3629 {
3630 res = NULL_TREE;
3631 break;
3632 }
959c4b00 3633
dab48979 3634 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3635 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3636 {
3d2b0034 3637 scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
dab48979 3638 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3639 }
3640 else
3641 res = TREE_TYPE (rhs1);
3642 }
3643 }
3644
adca2d16 3645 cache.put (def_stmt, res);
dab48979 3646 return res;
3647}
3648
adca2d16 3649/* Return the proper type for converting bool VAR into
3650 an integer value or NULL_TREE if no such type exists.
3651 The type is chosen so that converted value has the
3652 same number of elements as VAR's vector type. */
3653
3654static tree
3655search_type_for_mask (tree var, vec_info *vinfo)
3656{
3657 hash_map<gimple *, tree> cache;
3658 return search_type_for_mask_1 (var, vinfo, cache);
3659}
dab48979 3660
50f85e2e 3661/* Function vect_recog_bool_pattern
3662
3663 Try to find pattern like following:
3664
3665 bool a_b, b_b, c_b, d_b, e_b;
3666 TYPE f_T;
3667 loop:
3668 S1 a_b = x1 CMP1 y1;
3669 S2 b_b = x2 CMP2 y2;
3670 S3 c_b = a_b & b_b;
3671 S4 d_b = x3 CMP3 y3;
3672 S5 e_b = c_b | d_b;
3673 S6 f_T = (TYPE) e_b;
3674
60711c82 3675 where type 'TYPE' is an integral type. Or a similar pattern
3676 ending in
3677
3678 S6 f_Y = e_b ? r_Y : s_Y;
3679
3680 as results from if-conversion of a complex condition.
50f85e2e 3681
3682 Input:
3683
7987f646 3684 * STMT_VINFO: The stmt at the end from which the pattern
3685 search begins, i.e. cast of a bool to
3686 an integer type.
50f85e2e 3687
3688 Output:
3689
50f85e2e 3690 * TYPE_OUT: The type of the output of this pattern.
3691
3692 * Return value: A new stmt that will be used to replace the pattern.
3693
3694 Assuming size of TYPE is the same as size of all comparisons
3695 (otherwise some casts would be added where needed), the above
3696 sequence we create related pattern stmts:
3697 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3698 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3699 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3700 S5' e_T = c_T | d_T;
3701 S6' f_T = e_T;
3702
3703 Instead of the above S3' we could emit:
3704 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3705 S3' c_T = a_T | b_T;
3706 but the above is more efficient. */
3707
42acab1c 3708static gimple *
7987f646 3709vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
50f85e2e 3710{
7987f646 3711 gimple *last_stmt = stmt_vinfo->stmt;
50f85e2e 3712 enum tree_code rhs_code;
3713 tree var, lhs, rhs, vectype;
e2c5c678 3714 vec_info *vinfo = stmt_vinfo->vinfo;
42acab1c 3715 gimple *pattern_stmt;
50f85e2e 3716
3717 if (!is_gimple_assign (last_stmt))
3718 return NULL;
3719
3720 var = gimple_assign_rhs1 (last_stmt);
3721 lhs = gimple_assign_lhs (last_stmt);
3722
69fcaae3 3723 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
50f85e2e 3724 return NULL;
3725
6172a9fd 3726 hash_set<gimple *> bool_stmts;
3727
50f85e2e 3728 rhs_code = gimple_assign_rhs_code (last_stmt);
3729 if (CONVERT_EXPR_CODE_P (rhs_code))
3730 {
4dd9bacf 3731 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
3b515af5 3732 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
50f85e2e 3733 return NULL;
3734 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3735 if (vectype == NULL_TREE)
3736 return NULL;
3737
6172a9fd 3738 if (check_bool_pattern (var, vinfo, bool_stmts))
dab48979 3739 {
a73182ff 3740 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (lhs), stmt_vinfo);
dab48979 3741 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3742 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3743 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3744 else
3745 pattern_stmt
3746 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3747 }
50f85e2e 3748 else
dab48979 3749 {
3750 tree type = search_type_for_mask (var, vinfo);
b602a8ec 3751 tree cst0, cst1, tmp;
dab48979 3752
3753 if (!type)
3754 return NULL;
3755
3756 /* We may directly use cond with narrowed type to avoid
3757 multiple cond exprs with following result packing and
3758 perform single cond with packed mask instead. In case
3759 of widening we better make cond first and then extract
3760 results. */
3761 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3762 type = TREE_TYPE (lhs);
3763
3764 cst0 = build_int_cst (type, 0);
3765 cst1 = build_int_cst (type, 1);
3766 tmp = vect_recog_temp_ssa_var (type, NULL);
b602a8ec 3767 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
dab48979 3768
3769 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3770 {
3771 tree new_vectype = get_vectype_for_scalar_type (type);
76f769a4 3772 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
dab48979 3773
3774 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3775 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3776 }
3777 }
3778
50f85e2e 3779 *type_out = vectype;
972163bf 3780 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
4c0c783a 3781
60711c82 3782 return pattern_stmt;
3783 }
3784 else if (rhs_code == COND_EXPR
3785 && TREE_CODE (var) == SSA_NAME)
3786 {
3787 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3788 if (vectype == NULL_TREE)
3789 return NULL;
3790
3791 /* Build a scalar type for the boolean result that when
3792 vectorized matches the vector type of the result in
3793 size and number of elements. */
3794 unsigned prec
f08ee65f 3795 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype)),
3796 TYPE_VECTOR_SUBPARTS (vectype));
3797
60711c82 3798 tree type
3799 = build_nonstandard_integer_type (prec,
3800 TYPE_UNSIGNED (TREE_TYPE (var)));
3801 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3802 return NULL;
3803
6172a9fd 3804 if (!check_bool_pattern (var, vinfo, bool_stmts))
b602a8ec 3805 return NULL;
3806
a73182ff 3807 rhs = adjust_bool_stmts (bool_stmts, type, stmt_vinfo);
60711c82 3808
60711c82 3809 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3810 pattern_stmt
b602a8ec 3811 = gimple_build_assign (lhs, COND_EXPR,
3812 build2 (NE_EXPR, boolean_type_node,
3813 rhs, build_int_cst (type, 0)),
e9cf809e 3814 gimple_assign_rhs2 (last_stmt),
3815 gimple_assign_rhs3 (last_stmt));
60711c82 3816 *type_out = vectype;
972163bf 3817 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
60711c82 3818
50f85e2e 3819 return pattern_stmt;
3820 }
d6152abc 3821 else if (rhs_code == SSA_NAME
3822 && STMT_VINFO_DATA_REF (stmt_vinfo))
3823 {
3824 stmt_vec_info pattern_stmt_info;
3825 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3826 gcc_assert (vectype != NULL_TREE);
16c8a484 3827 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3828 return NULL;
d6152abc 3829
6172a9fd 3830 if (check_bool_pattern (var, vinfo, bool_stmts))
a73182ff 3831 rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
dab48979 3832 else
3833 {
3834 tree type = search_type_for_mask (var, vinfo);
b602a8ec 3835 tree cst0, cst1, new_vectype;
dab48979 3836
3837 if (!type)
3838 return NULL;
3839
3840 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3841 type = TREE_TYPE (vectype);
3842
3843 cst0 = build_int_cst (type, 0);
3844 cst1 = build_int_cst (type, 1);
3845 new_vectype = get_vectype_for_scalar_type (type);
3846
3847 rhs = vect_recog_temp_ssa_var (type, NULL);
b602a8ec 3848 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
76f769a4 3849 append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
dab48979 3850 }
3851
d6152abc 3852 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3853 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3854 {
3855 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
42acab1c 3856 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
dab48979 3857 append_pattern_def_seq (stmt_vinfo, cast_stmt);
d6152abc 3858 rhs = rhs2;
3859 }
e9cf809e 3860 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
04b2391d 3861 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
5f02ee72 3862 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
d6152abc 3863 *type_out = vectype;
972163bf 3864 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt);
3865
d6152abc 3866 return pattern_stmt;
3867 }
50f85e2e 3868 else
3869 return NULL;
3870}
3871
3872
959c4b00 3873/* A helper for vect_recog_mask_conversion_pattern. Build
3874 conversion of MASK to a type suitable for masking VECTYPE.
3875 Built statement gets required vectype and is appended to
3876 a pattern sequence of STMT_VINFO.
3877
3878 Return converted mask. */
3879
3880static tree
76f769a4 3881build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
959c4b00 3882{
3883 gimple *stmt;
3884 tree masktype, tmp;
959c4b00 3885
3886 masktype = build_same_sized_truth_vector_type (vectype);
3887 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3888 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
76f769a4 3889 append_pattern_def_seq (stmt_vinfo, stmt, masktype);
959c4b00 3890
3891 return tmp;
3892}
3893
3894
3895/* Function vect_recog_mask_conversion_pattern
3896
3897 Try to find statements which require boolean type
3898 converison. Additional conversion statements are
3899 added to handle such cases. For example:
3900
3901 bool m_1, m_2, m_3;
3902 int i_4, i_5;
3903 double d_6, d_7;
3904 char c_1, c_2, c_3;
3905
3906 S1 m_1 = i_4 > i_5;
3907 S2 m_2 = d_6 < d_7;
3908 S3 m_3 = m_1 & m_2;
3909 S4 c_1 = m_3 ? c_2 : c_3;
3910
3911 Will be transformed into:
3912
3913 S1 m_1 = i_4 > i_5;
3914 S2 m_2 = d_6 < d_7;
3915 S3'' m_2' = (_Bool[bitsize=32])m_2
3916 S3' m_3' = m_1 & m_2';
3917 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3918 S4' c_1' = m_3'' ? c_2 : c_3; */
3919
3920static gimple *
7987f646 3921vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
959c4b00 3922{
7987f646 3923 gimple *last_stmt = stmt_vinfo->stmt;
959c4b00 3924 enum tree_code rhs_code;
8bde2609 3925 tree lhs = NULL_TREE, rhs1, rhs2, tmp, rhs1_type, rhs2_type;
3926 tree vectype1, vectype2;
959c4b00 3927 stmt_vec_info pattern_stmt_info;
3928 vec_info *vinfo = stmt_vinfo->vinfo;
959c4b00 3929
3930 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3931 if (is_gimple_call (last_stmt)
03821886 3932 && gimple_call_internal_p (last_stmt))
959c4b00 3933 {
989f02dc 3934 gcall *pattern_stmt;
959c4b00 3935
03821886 3936 internal_fn ifn = gimple_call_internal_fn (last_stmt);
3937 int mask_argno = internal_fn_mask_index (ifn);
3938 if (mask_argno < 0)
3939 return NULL;
3940
3941 bool store_p = internal_store_fn_p (ifn);
3942 if (store_p)
959c4b00 3943 {
03821886 3944 int rhs_index = internal_fn_stored_value_index (ifn);
3945 tree rhs = gimple_call_arg (last_stmt, rhs_index);
3946 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
959c4b00 3947 }
3948 else
3949 {
03821886 3950 lhs = gimple_call_lhs (last_stmt);
3951 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
959c4b00 3952 }
3953
03821886 3954 tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
3955 tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
3956 if (!mask_arg_type)
959c4b00 3957 return NULL;
03821886 3958 vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
959c4b00 3959
3960 if (!vectype1 || !vectype2
f08ee65f 3961 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
3962 TYPE_VECTOR_SUBPARTS (vectype2)))
959c4b00 3963 return NULL;
3964
03821886 3965 tmp = build_mask_conversion (mask_arg, vectype1, stmt_vinfo);
3966
3967 auto_vec<tree, 8> args;
3968 unsigned int nargs = gimple_call_num_args (last_stmt);
3969 args.safe_grow (nargs);
3970 for (unsigned int i = 0; i < nargs; ++i)
3971 args[i] = ((int) i == mask_argno
3972 ? tmp
3973 : gimple_call_arg (last_stmt, i));
3974 pattern_stmt = gimple_build_call_internal_vec (ifn, args);
959c4b00 3975
03821886 3976 if (!store_p)
959c4b00 3977 {
3978 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
959c4b00 3979 gimple_call_set_lhs (pattern_stmt, lhs);
3980 }
989f02dc 3981 gimple_call_set_nothrow (pattern_stmt, true);
959c4b00 3982
04b2391d 3983 pattern_stmt_info = vinfo->add_stmt (pattern_stmt);
03821886 3984 if (STMT_VINFO_DATA_REF (stmt_vinfo))
5f02ee72 3985 vinfo->move_dr (pattern_stmt_info, stmt_vinfo);
959c4b00 3986
3987 *type_out = vectype1;
972163bf 3988 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
959c4b00 3989
3990 return pattern_stmt;
3991 }
3992
3993 if (!is_gimple_assign (last_stmt))
3994 return NULL;
3995
989f02dc 3996 gimple *pattern_stmt;
959c4b00 3997 lhs = gimple_assign_lhs (last_stmt);
3998 rhs1 = gimple_assign_rhs1 (last_stmt);
3999 rhs_code = gimple_assign_rhs_code (last_stmt);
4000
4001 /* Check for cond expression requiring mask conversion. */
4002 if (rhs_code == COND_EXPR)
4003 {
959c4b00 4004 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
4005
4006 if (TREE_CODE (rhs1) == SSA_NAME)
4007 {
4008 rhs1_type = search_type_for_mask (rhs1, vinfo);
4009 if (!rhs1_type)
4010 return NULL;
4011 }
9082c4a4 4012 else if (COMPARISON_CLASS_P (rhs1))
eedf2bfe 4013 {
4014 /* Check whether we're comparing scalar booleans and (if so)
4015 whether a better mask type exists than the mask associated
4016 with boolean-sized elements. This avoids unnecessary packs
4017 and unpacks if the booleans are set from comparisons of
4018 wider types. E.g. in:
4019
4020 int x1, x2, x3, x4, y1, y1;
4021 ...
4022 bool b1 = (x1 == x2);
4023 bool b2 = (x3 == x4);
4024 ... = b1 == b2 ? y1 : y2;
4025
4026 it is better for b1 and b2 to use the mask type associated
4027 with int elements rather bool (byte) elements. */
4028 rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
4029 if (!rhs1_type)
4030 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
4031 }
9082c4a4 4032 else
4033 return NULL;
959c4b00 4034
4035 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
4036
eedf2bfe 4037 if (!vectype1 || !vectype2)
4038 return NULL;
4039
4040 /* Continue if a conversion is needed. Also continue if we have
4041 a comparison whose vector type would normally be different from
4042 VECTYPE2 when considered in isolation. In that case we'll
4043 replace the comparison with an SSA name (so that we can record
4044 its vector type) and behave as though the comparison was an SSA
4045 name from the outset. */
4046 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
4047 TYPE_VECTOR_SUBPARTS (vectype2))
4048 && (TREE_CODE (rhs1) == SSA_NAME
4049 || rhs1_type == TREE_TYPE (TREE_OPERAND (rhs1, 0))))
959c4b00 4050 return NULL;
4051
6d44bd9d 4052 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
4053 in place, we can handle it in vectorizable_condition. This avoids
4054 unnecessary promotion stmts and increased vectorization factor. */
4055 if (COMPARISON_CLASS_P (rhs1)
4056 && INTEGRAL_TYPE_P (rhs1_type)
f08ee65f 4057 && known_le (TYPE_VECTOR_SUBPARTS (vectype1),
4058 TYPE_VECTOR_SUBPARTS (vectype2)))
6d44bd9d 4059 {
6d44bd9d 4060 enum vect_def_type dt;
bf8b3614 4061 if (vect_is_simple_use (TREE_OPERAND (rhs1, 0), vinfo, &dt)
6d44bd9d 4062 && dt == vect_external_def
bf8b3614 4063 && vect_is_simple_use (TREE_OPERAND (rhs1, 1), vinfo, &dt)
6d44bd9d 4064 && (dt == vect_external_def
4065 || dt == vect_constant_def))
4066 {
4067 tree wide_scalar_type = build_nonstandard_integer_type
4068 (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
4069 TYPE_UNSIGNED (rhs1_type));
4070 tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
4071 if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
4072 return NULL;
4073 }
4074 }
4075
959c4b00 4076 /* If rhs1 is a comparison we need to move it into a
4077 separate statement. */
4078 if (TREE_CODE (rhs1) != SSA_NAME)
4079 {
4080 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
4081 pattern_stmt = gimple_build_assign (tmp, rhs1);
4082 rhs1 = tmp;
76f769a4 4083 append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
959c4b00 4084 }
4085
eedf2bfe 4086 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
4087 TYPE_VECTOR_SUBPARTS (vectype2)))
76f769a4 4088 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
eedf2bfe 4089 else
4090 tmp = rhs1;
959c4b00 4091
4092 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4093 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
4094 gimple_assign_rhs2 (last_stmt),
4095 gimple_assign_rhs3 (last_stmt));
4096
4097 *type_out = vectype1;
972163bf 4098 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
959c4b00 4099
4100 return pattern_stmt;
4101 }
4102
4103 /* Now check for binary boolean operations requiring conversion for
4104 one of operands. */
69fcaae3 4105 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
959c4b00 4106 return NULL;
4107
4108 if (rhs_code != BIT_IOR_EXPR
4109 && rhs_code != BIT_XOR_EXPR
82ccd33e 4110 && rhs_code != BIT_AND_EXPR
4111 && TREE_CODE_CLASS (rhs_code) != tcc_comparison)
959c4b00 4112 return NULL;
4113
4114 rhs2 = gimple_assign_rhs2 (last_stmt);
4115
4116 rhs1_type = search_type_for_mask (rhs1, vinfo);
4117 rhs2_type = search_type_for_mask (rhs2, vinfo);
4118
4119 if (!rhs1_type || !rhs2_type
4120 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
4121 return NULL;
4122
4123 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
4124 {
4125 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
4126 if (!vectype1)
4127 return NULL;
76f769a4 4128 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
959c4b00 4129 }
4130 else
4131 {
4132 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
4133 if (!vectype1)
4134 return NULL;
76f769a4 4135 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
959c4b00 4136 }
4137
4138 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
4139 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
4140
4141 *type_out = vectype1;
972163bf 4142 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt);
959c4b00 4143
4144 return pattern_stmt;
4145}
4146
ecc42a77 4147/* STMT_INFO is a load or store. If the load or store is conditional, return
1619606c 4148 the boolean condition under which it occurs, otherwise return null. */
4149
4150static tree
ecc42a77 4151vect_get_load_store_mask (stmt_vec_info stmt_info)
1619606c 4152{
ecc42a77 4153 if (gassign *def_assign = dyn_cast <gassign *> (stmt_info->stmt))
1619606c 4154 {
4155 gcc_assert (gimple_assign_single_p (def_assign));
4156 return NULL_TREE;
4157 }
4158
ecc42a77 4159 if (gcall *def_call = dyn_cast <gcall *> (stmt_info->stmt))
1619606c 4160 {
4161 internal_fn ifn = gimple_call_internal_fn (def_call);
4162 int mask_index = internal_fn_mask_index (ifn);
4163 return gimple_call_arg (def_call, mask_index);
4164 }
4165
4166 gcc_unreachable ();
4167}
4168
4169/* Return the scalar offset type that an internal gather/scatter function
4170 should use. GS_INFO describes the gather/scatter operation. */
4171
4172static tree
4173vect_get_gather_scatter_offset_type (gather_scatter_info *gs_info)
4174{
4175 tree offset_type = TREE_TYPE (gs_info->offset);
4176 unsigned int element_bits = tree_to_uhwi (TYPE_SIZE (gs_info->element_type));
4177
4178 /* Enforced by vect_check_gather_scatter. */
4179 unsigned int offset_bits = TYPE_PRECISION (offset_type);
4180 gcc_assert (element_bits >= offset_bits);
4181
4182 /* If the offset is narrower than the elements, extend it according
4183 to its sign. */
4184 if (element_bits > offset_bits)
4185 return build_nonstandard_integer_type (element_bits,
4186 TYPE_UNSIGNED (offset_type));
4187
4188 return offset_type;
4189}
4190
4191/* Return MASK if MASK is suitable for masking an operation on vectors
4192 of type VECTYPE, otherwise convert it into such a form and return
4193 the result. Associate any conversion statements with STMT_INFO's
4194 pattern. */
4195
4196static tree
4197vect_convert_mask_for_vectype (tree mask, tree vectype,
4198 stmt_vec_info stmt_info, vec_info *vinfo)
4199{
4200 tree mask_type = search_type_for_mask (mask, vinfo);
4201 if (mask_type)
4202 {
4203 tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
4204 if (mask_vectype
4205 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
4206 TYPE_VECTOR_SUBPARTS (mask_vectype)))
76f769a4 4207 mask = build_mask_conversion (mask, vectype, stmt_info);
1619606c 4208 }
4209 return mask;
4210}
4211
4212/* Return the equivalent of:
4213
4214 fold_convert (TYPE, VALUE)
4215
4216 with the expectation that the operation will be vectorized.
4217 If new statements are needed, add them as pattern statements
4218 to STMT_INFO. */
4219
4220static tree
76f769a4 4221vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
1619606c 4222{
4223 if (useless_type_conversion_p (type, TREE_TYPE (value)))
4224 return value;
4225
4226 tree new_value = vect_recog_temp_ssa_var (type, NULL);
4227 gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
76f769a4 4228 append_pattern_def_seq (stmt_info, conversion,
4229 get_vectype_for_scalar_type (type));
1619606c 4230 return new_value;
4231}
4232
7987f646 4233/* Try to convert STMT_INFO into a call to a gather load or scatter store
1619606c 4234 internal function. Return the final statement on success and set
1ba54d4e 4235 *TYPE_OUT to the vector type being loaded or stored.
1619606c 4236
4237 This function only handles gathers and scatters that were recognized
4238 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
4239
4240static gimple *
7987f646 4241vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
1619606c 4242{
4243 /* Currently we only support this for loop vectorization. */
1619606c 4244 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_info->vinfo);
4245 if (!loop_vinfo)
4246 return NULL;
4247
4248 /* Make sure that we're looking at a gather load or scatter store. */
4249 data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
4250 if (!dr || !STMT_VINFO_GATHER_SCATTER_P (stmt_info))
4251 return NULL;
4252
1619606c 4253 /* Get the boolean that controls whether the load or store happens.
4254 This is null if the operation is unconditional. */
a73182ff 4255 tree mask = vect_get_load_store_mask (stmt_info);
1619606c 4256
4257 /* Make sure that the target supports an appropriate internal
4258 function for the gather/scatter operation. */
4259 gather_scatter_info gs_info;
a73182ff 4260 if (!vect_check_gather_scatter (stmt_info, loop_vinfo, &gs_info)
1619606c 4261 || gs_info.decl)
4262 return NULL;
4263
4264 /* Convert the mask to the right form. */
4265 tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
4266 if (mask)
7987f646 4267 mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
1619606c 4268 loop_vinfo);
4269
4270 /* Get the invariant base and non-invariant offset, converting the
4271 latter to the same width as the vector elements. */
4272 tree base = gs_info.base;
4273 tree offset_type = vect_get_gather_scatter_offset_type (&gs_info);
76f769a4 4274 tree offset = vect_add_conversion_to_pattern (offset_type, gs_info.offset,
4275 stmt_info);
1619606c 4276
4277 /* Build the new pattern statement. */
4278 tree scale = size_int (gs_info.scale);
4279 gcall *pattern_stmt;
4280 if (DR_IS_READ (dr))
4281 {
4282 if (mask != NULL)
4283 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 4, base,
4284 offset, scale, mask);
4285 else
4286 pattern_stmt = gimple_build_call_internal (gs_info.ifn, 3, base,
4287 offset, scale);
4288 tree load_lhs = vect_recog_temp_ssa_var (gs_info.element_type, NULL);
4289 gimple_call_set_lhs (pattern_stmt, load_lhs);
4290 }
4291 else
0bf8b382 4292 {
a73182ff 4293 tree rhs = vect_get_store_rhs (stmt_info);
0bf8b382 4294 if (mask != NULL)
4295 pattern_stmt = gimple_build_call_internal (IFN_MASK_SCATTER_STORE, 5,
4296 base, offset, scale, rhs,
4297 mask);
4298 else
4299 pattern_stmt = gimple_build_call_internal (IFN_SCATTER_STORE, 4,
4300 base, offset, scale, rhs);
4301 }
1619606c 4302 gimple_call_set_nothrow (pattern_stmt, true);
4303
4304 /* Copy across relevant vectorization info and associate DR with the
4305 new pattern statement instead of the original statement. */
04b2391d 4306 stmt_vec_info pattern_stmt_info = loop_vinfo->add_stmt (pattern_stmt);
5f02ee72 4307 loop_vinfo->move_dr (pattern_stmt_info, stmt_info);
1619606c 4308
4309 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
4310 *type_out = vectype;
a73182ff 4311 vect_pattern_detected ("gather/scatter pattern", stmt_info->stmt);
1619606c 4312
4313 return pattern_stmt;
4314}
4315
18bbd2f1 4316/* Return true if TYPE is a non-boolean integer type. These are the types
4317 that we want to consider for narrowing. */
4318
4319static bool
4320vect_narrowable_type_p (tree type)
4321{
4322 return INTEGRAL_TYPE_P (type) && !VECT_SCALAR_BOOLEAN_TYPE_P (type);
4323}
4324
4325/* Return true if the operation given by CODE can be truncated to N bits
4326 when only N bits of the output are needed. This is only true if bit N+1
4327 of the inputs has no effect on the low N bits of the result. */
4328
4329static bool
4330vect_truncatable_operation_p (tree_code code)
4331{
4332 switch (code)
4333 {
4334 case PLUS_EXPR:
4335 case MINUS_EXPR:
4336 case MULT_EXPR:
4337 case BIT_AND_EXPR:
4338 case BIT_IOR_EXPR:
4339 case BIT_XOR_EXPR:
4340 case COND_EXPR:
4341 return true;
4342
4343 default:
4344 return false;
4345 }
4346}
4347
4348/* Record that STMT_INFO could be changed from operating on TYPE to
4349 operating on a type with the precision and sign given by PRECISION
4350 and SIGN respectively. PRECISION is an arbitrary bit precision;
4351 it might not be a whole number of bytes. */
4352
4353static void
4354vect_set_operation_type (stmt_vec_info stmt_info, tree type,
4355 unsigned int precision, signop sign)
4356{
4357 /* Round the precision up to a whole number of bytes. */
4358 precision = vect_element_precision (precision);
4359 if (precision < TYPE_PRECISION (type)
4360 && (!stmt_info->operation_precision
4361 || stmt_info->operation_precision > precision))
4362 {
4363 stmt_info->operation_precision = precision;
4364 stmt_info->operation_sign = sign;
4365 }
4366}
4367
4368/* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
4369 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
4370 is an arbitrary bit precision; it might not be a whole number of bytes. */
4371
4372static void
4373vect_set_min_input_precision (stmt_vec_info stmt_info, tree type,
4374 unsigned int min_input_precision)
4375{
4376 /* This operation in isolation only requires the inputs to have
4377 MIN_INPUT_PRECISION of precision, However, that doesn't mean
4378 that MIN_INPUT_PRECISION is a natural precision for the chain
4379 as a whole. E.g. consider something like:
4380
4381 unsigned short *x, *y;
4382 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4383
4384 The right shift can be done on unsigned chars, and only requires the
4385 result of "*x & 0xf0" to be done on unsigned chars. But taking that
4386 approach would mean turning a natural chain of single-vector unsigned
4387 short operations into one that truncates "*x" and then extends
4388 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
4389 operation and one vector for each unsigned char operation.
4390 This would be a significant pessimization.
4391
4392 Instead only propagate the maximum of this precision and the precision
4393 required by the users of the result. This means that we don't pessimize
4394 the case above but continue to optimize things like:
4395
4396 unsigned char *y;
4397 unsigned short *x;
4398 *y = ((*x & 0xf0) >> 4) | (*y << 4);
4399
4400 Here we would truncate two vectors of *x to a single vector of
4401 unsigned chars and use single-vector unsigned char operations for
4402 everything else, rather than doing two unsigned short copies of
4403 "(*x & 0xf0) >> 4" and then truncating the result. */
4404 min_input_precision = MAX (min_input_precision,
4405 stmt_info->min_output_precision);
4406
4407 if (min_input_precision < TYPE_PRECISION (type)
4408 && (!stmt_info->min_input_precision
4409 || stmt_info->min_input_precision > min_input_precision))
4410 stmt_info->min_input_precision = min_input_precision;
4411}
4412
4413/* Subroutine of vect_determine_min_output_precision. Return true if
4414 we can calculate a reduced number of output bits for STMT_INFO,
4415 whose result is LHS. */
4416
4417static bool
4418vect_determine_min_output_precision_1 (stmt_vec_info stmt_info, tree lhs)
4419{
4420 /* Take the maximum precision required by users of the result. */
03c0d666 4421 vec_info *vinfo = stmt_info->vinfo;
18bbd2f1 4422 unsigned int precision = 0;
4423 imm_use_iterator iter;
4424 use_operand_p use;
4425 FOR_EACH_IMM_USE_FAST (use, iter, lhs)
4426 {
4427 gimple *use_stmt = USE_STMT (use);
4428 if (is_gimple_debug (use_stmt))
4429 continue;
03c0d666 4430 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
4431 if (!use_stmt_info || !use_stmt_info->min_input_precision)
18bbd2f1 4432 return false;
e95d867a 4433 /* The input precision recorded for COND_EXPRs applies only to the
4434 "then" and "else" values. */
4435 gassign *assign = dyn_cast <gassign *> (stmt_info->stmt);
4436 if (assign
4437 && gimple_assign_rhs_code (assign) == COND_EXPR
4438 && use->use != gimple_assign_rhs2_ptr (assign)
4439 && use->use != gimple_assign_rhs3_ptr (assign))
4440 return false;
18bbd2f1 4441 precision = MAX (precision, use_stmt_info->min_input_precision);
4442 }
4443
4444 if (dump_enabled_p ())
a4e972e3 4445 dump_printf_loc (MSG_NOTE, vect_location,
4446 "only the low %d bits of %T are significant\n",
4447 precision, lhs);
18bbd2f1 4448 stmt_info->min_output_precision = precision;
4449 return true;
4450}
4451
4452/* Calculate min_output_precision for STMT_INFO. */
4453
4454static void
4455vect_determine_min_output_precision (stmt_vec_info stmt_info)
4456{
4457 /* We're only interested in statements with a narrowable result. */
4458 tree lhs = gimple_get_lhs (stmt_info->stmt);
4459 if (!lhs
4460 || TREE_CODE (lhs) != SSA_NAME
4461 || !vect_narrowable_type_p (TREE_TYPE (lhs)))
4462 return;
4463
4464 if (!vect_determine_min_output_precision_1 (stmt_info, lhs))
4465 stmt_info->min_output_precision = TYPE_PRECISION (TREE_TYPE (lhs));
4466}
4467
4468/* Use range information to decide whether STMT (described by STMT_INFO)
4469 could be done in a narrower type. This is effectively a forward
4470 propagation, since it uses context-independent information that applies
4471 to all users of an SSA name. */
4472
4473static void
4474vect_determine_precisions_from_range (stmt_vec_info stmt_info, gassign *stmt)
4475{
4476 tree lhs = gimple_assign_lhs (stmt);
4477 if (!lhs || TREE_CODE (lhs) != SSA_NAME)
4478 return;
4479
4480 tree type = TREE_TYPE (lhs);
4481 if (!vect_narrowable_type_p (type))
4482 return;
4483
4484 /* First see whether we have any useful range information for the result. */
4485 unsigned int precision = TYPE_PRECISION (type);
4486 signop sign = TYPE_SIGN (type);
4487 wide_int min_value, max_value;
4488 if (!vect_get_range_info (lhs, &min_value, &max_value))
4489 return;
4490
4491 tree_code code = gimple_assign_rhs_code (stmt);
4492 unsigned int nops = gimple_num_ops (stmt);
4493
4494 if (!vect_truncatable_operation_p (code))
4495 /* Check that all relevant input operands are compatible, and update
4496 [MIN_VALUE, MAX_VALUE] to include their ranges. */
4497 for (unsigned int i = 1; i < nops; ++i)
4498 {
4499 tree op = gimple_op (stmt, i);
4500 if (TREE_CODE (op) == INTEGER_CST)
4501 {
4502 /* Don't require the integer to have RHS_TYPE (which it might
4503 not for things like shift amounts, etc.), but do require it
4504 to fit the type. */
4505 if (!int_fits_type_p (op, type))
4506 return;
4507
4508 min_value = wi::min (min_value, wi::to_wide (op, precision), sign);
4509 max_value = wi::max (max_value, wi::to_wide (op, precision), sign);
4510 }
4511 else if (TREE_CODE (op) == SSA_NAME)
4512 {
4513 /* Ignore codes that don't take uniform arguments. */
4514 if (!types_compatible_p (TREE_TYPE (op), type))
4515 return;
4516
4517 wide_int op_min_value, op_max_value;
4518 if (!vect_get_range_info (op, &op_min_value, &op_max_value))
4519 return;
4520
4521 min_value = wi::min (min_value, op_min_value, sign);
4522 max_value = wi::max (max_value, op_max_value, sign);
4523 }
4524 else
4525 return;
4526 }
4527
4528 /* Try to switch signed types for unsigned types if we can.
4529 This is better for two reasons. First, unsigned ops tend
4530 to be cheaper than signed ops. Second, it means that we can
4531 handle things like:
4532
4533 signed char c;
4534 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
4535
4536 as:
4537
4538 signed char c;
4539 unsigned short res_1 = (unsigned short) c & 0xff00;
4540 int res = (int) res_1;
4541
4542 where the intermediate result res_1 has unsigned rather than
4543 signed type. */
4544 if (sign == SIGNED && !wi::neg_p (min_value))
4545 sign = UNSIGNED;
4546
4547 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
4548 unsigned int precision1 = wi::min_precision (min_value, sign);
4549 unsigned int precision2 = wi::min_precision (max_value, sign);
4550 unsigned int value_precision = MAX (precision1, precision2);
4551 if (value_precision >= precision)
4552 return;
4553
4554 if (dump_enabled_p ())
a4e972e3 4555 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4556 " without loss of precision: %G",
4557 sign == SIGNED ? "signed" : "unsigned",
4558 value_precision, stmt);
18bbd2f1 4559
4560 vect_set_operation_type (stmt_info, type, value_precision, sign);
4561 vect_set_min_input_precision (stmt_info, type, value_precision);
4562}
4563
4564/* Use information about the users of STMT's result to decide whether
4565 STMT (described by STMT_INFO) could be done in a narrower type.
4566 This is effectively a backward propagation. */
4567
4568static void
4569vect_determine_precisions_from_users (stmt_vec_info stmt_info, gassign *stmt)
4570{
4571 tree_code code = gimple_assign_rhs_code (stmt);
4572 unsigned int opno = (code == COND_EXPR ? 2 : 1);
4573 tree type = TREE_TYPE (gimple_op (stmt, opno));
4574 if (!vect_narrowable_type_p (type))
4575 return;
4576
4577 unsigned int precision = TYPE_PRECISION (type);
4578 unsigned int operation_precision, min_input_precision;
4579 switch (code)
4580 {
4581 CASE_CONVERT:
4582 /* Only the bits that contribute to the output matter. Don't change
4583 the precision of the operation itself. */
4584 operation_precision = precision;
4585 min_input_precision = stmt_info->min_output_precision;
4586 break;
4587
4588 case LSHIFT_EXPR:
4589 case RSHIFT_EXPR:
4590 {
4591 tree shift = gimple_assign_rhs2 (stmt);
4592 if (TREE_CODE (shift) != INTEGER_CST
4593 || !wi::ltu_p (wi::to_widest (shift), precision))
4594 return;
4595 unsigned int const_shift = TREE_INT_CST_LOW (shift);
4596 if (code == LSHIFT_EXPR)
4597 {
4598 /* We need CONST_SHIFT fewer bits of the input. */
4599 operation_precision = stmt_info->min_output_precision;
4600 min_input_precision = (MAX (operation_precision, const_shift)
4601 - const_shift);
4602 }
4603 else
4604 {
4605 /* We need CONST_SHIFT extra bits to do the operation. */
4606 operation_precision = (stmt_info->min_output_precision
4607 + const_shift);
4608 min_input_precision = operation_precision;
4609 }
4610 break;
4611 }
4612
4613 default:
4614 if (vect_truncatable_operation_p (code))
4615 {
4616 /* Input bit N has no effect on output bits N-1 and lower. */
4617 operation_precision = stmt_info->min_output_precision;
4618 min_input_precision = operation_precision;
4619 break;
4620 }
4621 return;
4622 }
4623
4624 if (operation_precision < precision)
4625 {
4626 if (dump_enabled_p ())
a4e972e3 4627 dump_printf_loc (MSG_NOTE, vect_location, "can narrow to %s:%d"
4628 " without affecting users: %G",
4629 TYPE_UNSIGNED (type) ? "unsigned" : "signed",
4630 operation_precision, stmt);
18bbd2f1 4631 vect_set_operation_type (stmt_info, type, operation_precision,
4632 TYPE_SIGN (type));
4633 }
4634 vect_set_min_input_precision (stmt_info, type, min_input_precision);
4635}
4636
4637/* Handle vect_determine_precisions for STMT_INFO, given that we
4638 have already done so for the users of its result. */
4639
4640void
4641vect_determine_stmt_precisions (stmt_vec_info stmt_info)
4642{
4643 vect_determine_min_output_precision (stmt_info);
4644 if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
4645 {
4646 vect_determine_precisions_from_range (stmt_info, stmt);
4647 vect_determine_precisions_from_users (stmt_info, stmt);
4648 }
4649}
4650
4651/* Walk backwards through the vectorizable region to determine the
4652 values of these fields:
4653
4654 - min_output_precision
4655 - min_input_precision
4656 - operation_precision
4657 - operation_sign. */
4658
4659void
4660vect_determine_precisions (vec_info *vinfo)
4661{
4662 DUMP_VECT_SCOPE ("vect_determine_precisions");
4663
4664 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4665 {
4666 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
4667 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
4668 unsigned int nbbs = loop->num_nodes;
4669
4670 for (unsigned int i = 0; i < nbbs; i++)
4671 {
4672 basic_block bb = bbs[nbbs - i - 1];
4673 for (gimple_stmt_iterator si = gsi_last_bb (bb);
4674 !gsi_end_p (si); gsi_prev (&si))
03c0d666 4675 vect_determine_stmt_precisions
4676 (vinfo->lookup_stmt (gsi_stmt (si)));
18bbd2f1 4677 }
4678 }
4679 else
4680 {
4681 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4682 gimple_stmt_iterator si = bb_vinfo->region_end;
4683 gimple *stmt;
4684 do
4685 {
4686 if (!gsi_stmt (si))
4687 si = gsi_last_bb (bb_vinfo->bb);
4688 else
4689 gsi_prev (&si);
4690 stmt = gsi_stmt (si);
03c0d666 4691 stmt_vec_info stmt_info = vinfo->lookup_stmt (stmt);
18bbd2f1 4692 if (stmt_info && STMT_VINFO_VECTORIZABLE (stmt_info))
4693 vect_determine_stmt_precisions (stmt_info);
4694 }
4695 while (stmt != gsi_stmt (bb_vinfo->region_begin));
4696 }
4697}
4698
7987f646 4699typedef gimple *(*vect_recog_func_ptr) (stmt_vec_info, tree *);
1ba54d4e 4700
4701struct vect_recog_func
4702{
4703 vect_recog_func_ptr fn;
4704 const char *name;
4705};
4706
4707/* Note that ordering matters - the first pattern matching on a stmt is
4708 taken which means usually the more complex one needs to preceed the
4709 less comples onex (widen_sum only after dot_prod or sad for example). */
4710static vect_recog_func vect_vect_recog_func_ptrs[] = {
18bbd2f1 4711 { vect_recog_over_widening_pattern, "over_widening" },
3f887927 4712 /* Must come after over_widening, which narrows the shift as much as
4713 possible beforehand. */
4714 { vect_recog_average_pattern, "average" },
18bbd2f1 4715 { vect_recog_cast_forwprop_pattern, "cast_forwprop" },
1ba54d4e 4716 { vect_recog_widen_mult_pattern, "widen_mult" },
4717 { vect_recog_dot_prod_pattern, "dot_prod" },
4718 { vect_recog_sad_pattern, "sad" },
4719 { vect_recog_widen_sum_pattern, "widen_sum" },
4720 { vect_recog_pow_pattern, "pow" },
4721 { vect_recog_widen_shift_pattern, "widen_shift" },
1ba54d4e 4722 { vect_recog_rotate_pattern, "rotate" },
4723 { vect_recog_vector_vector_shift_pattern, "vector_vector_shift" },
4724 { vect_recog_divmod_pattern, "divmod" },
4725 { vect_recog_mult_pattern, "mult" },
4726 { vect_recog_mixed_size_cond_pattern, "mixed_size_cond" },
4727 { vect_recog_bool_pattern, "bool" },
4728 /* This must come before mask conversion, and includes the parts
4729 of mask conversion that are needed for gather and scatter
4730 internal functions. */
4731 { vect_recog_gather_scatter_pattern, "gather_scatter" },
4732 { vect_recog_mask_conversion_pattern, "mask_conversion" }
4733};
4734
4735const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
4736
45eea33f 4737/* Mark statements that are involved in a pattern. */
4738
4739static inline void
726ee479 4740vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
45eea33f 4741 tree pattern_vectype)
4742{
d5b5c2c5 4743 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
45eea33f 4744
726ee479 4745 gimple *orig_pattern_stmt = NULL;
4746 if (is_pattern_stmt_p (orig_stmt_info))
d6152abc 4747 {
d5b5c2c5 4748 /* We're replacing a statement in an existing pattern definition
4749 sequence. */
726ee479 4750 orig_pattern_stmt = orig_stmt_info->stmt;
d5b5c2c5 4751 if (dump_enabled_p ())
a4e972e3 4752 dump_printf_loc (MSG_NOTE, vect_location,
4753 "replacing earlier pattern %G", orig_pattern_stmt);
d5b5c2c5 4754
4755 /* To keep the book-keeping simple, just swap the lhs of the
4756 old and new statements, so that the old one has a valid but
4757 unused lhs. */
726ee479 4758 tree old_lhs = gimple_get_lhs (orig_pattern_stmt);
4759 gimple_set_lhs (orig_pattern_stmt, gimple_get_lhs (pattern_stmt));
d5b5c2c5 4760 gimple_set_lhs (pattern_stmt, old_lhs);
4761
4762 if (dump_enabled_p ())
a4e972e3 4763 dump_printf_loc (MSG_NOTE, vect_location, "with %G", pattern_stmt);
d5b5c2c5 4764
4765 /* Switch to the statement that ORIG replaces. */
aebdbd31 4766 orig_stmt_info = STMT_VINFO_RELATED_STMT (orig_stmt_info);
d5b5c2c5 4767
4768 /* We shouldn't be replacing the main pattern statement. */
726ee479 4769 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info)->stmt
4770 != orig_pattern_stmt);
d6152abc 4771 }
45eea33f 4772
d5b5c2c5 4773 if (def_seq)
da611310 4774 for (gimple_stmt_iterator si = gsi_start (def_seq);
4775 !gsi_end_p (si); gsi_next (&si))
1650c1ec 4776 {
4777 stmt_vec_info pattern_stmt_info
4778 = vect_init_pattern_stmt (gsi_stmt (si),
4779 orig_stmt_info, pattern_vectype);
4780 /* Stmts in the def sequence are not vectorizable cycle or
4781 induction defs, instead they should all be vect_internal_def
4782 feeding the main pattern stmt which retains this def type. */
4783 STMT_VINFO_DEF_TYPE (pattern_stmt_info) = vect_internal_def;
4784 }
d5b5c2c5 4785
726ee479 4786 if (orig_pattern_stmt)
d5b5c2c5 4787 {
4788 vect_init_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
4789
4790 /* Insert all the new pattern statements before the original one. */
4791 gimple_seq *orig_def_seq = &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
726ee479 4792 gimple_stmt_iterator gsi = gsi_for_stmt (orig_pattern_stmt,
4793 orig_def_seq);
d5b5c2c5 4794 gsi_insert_seq_before_without_update (&gsi, def_seq, GSI_SAME_STMT);
4795 gsi_insert_before_without_update (&gsi, pattern_stmt, GSI_SAME_STMT);
2fcb0ed9 4796
4797 /* Remove the pattern statement that this new pattern replaces. */
4798 gsi_remove (&gsi, false);
d5b5c2c5 4799 }
4800 else
4801 vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
45eea33f 4802}
4803
48e1416a 4804/* Function vect_pattern_recog_1
4a61a337 4805
4806 Input:
4807 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
4808 computation pattern.
726ee479 4809 STMT_INFO: A stmt from which the pattern search should start.
4a61a337 4810
1ba54d4e 4811 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
4812 a sequence of statements that has the same functionality and can be
726ee479 4813 used to replace STMT_INFO. It returns the last statement in the sequence
4814 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
1ba54d4e 4815 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
4816 statement, having first checked that the target supports the new operation
4817 in that type.
4a61a337 4818
48e1416a 4819 This function also does some bookkeeping, as explained in the documentation
4a61a337 4820 for vect_recog_pattern. */
4821
d5b5c2c5 4822static void
726ee479 4823vect_pattern_recog_1 (vect_recog_func *recog_func, stmt_vec_info stmt_info)
4a61a337 4824{
726ee479 4825 vec_info *vinfo = stmt_info->vinfo;
4826 gimple *pattern_stmt;
0eee81bc 4827 loop_vec_info loop_vinfo;
4a61a337 4828 tree pattern_vectype;
4a61a337 4829
d5b5c2c5 4830 /* If this statement has already been replaced with pattern statements,
4831 leave the original statement alone, since the first match wins.
4832 Instead try to match against the definition statements that feed
4833 the main pattern statement. */
d5b5c2c5 4834 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
4835 {
4836 gimple_stmt_iterator gsi;
4837 for (gsi = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
4838 !gsi_end_p (gsi); gsi_next (&gsi))
726ee479 4839 vect_pattern_recog_1 (recog_func, vinfo->lookup_stmt (gsi_stmt (gsi)));
d5b5c2c5 4840 return;
4841 }
4842
05499cc7 4843 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
7987f646 4844 pattern_stmt = recog_func->fn (stmt_info, &pattern_vectype);
75a70cf9 4845 if (!pattern_stmt)
e31847cd 4846 {
2fcb0ed9 4847 /* Clear any half-formed pattern definition sequence. */
4848 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
d5b5c2c5 4849 return;
e31847cd 4850 }
48e1416a 4851
0eee81bc 4852 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1ba54d4e 4853 gcc_assert (pattern_vectype);
0eee81bc 4854
4a61a337 4855 /* Found a vectorizable pattern. */
6d8fb6cf 4856 if (dump_enabled_p ())
a4e972e3 4857 dump_printf_loc (MSG_NOTE, vect_location,
4858 "%s pattern recognized: %G",
4859 recog_func->name, pattern_stmt);
48e1416a 4860
75a70cf9 4861 /* Mark the stmts that are involved in the pattern. */
726ee479 4862 vect_mark_pattern_stmts (stmt_info, pattern_stmt, pattern_vectype);
4a61a337 4863
eefa05c8 4864 /* Patterns cannot be vectorized using SLP, because they change the order of
4865 computation. */
4c0c783a 4866 if (loop_vinfo)
620610fa 4867 {
4868 unsigned ix, ix2;
f4649a92 4869 stmt_vec_info *elem_ptr;
620610fa 4870 VEC_ORDERED_REMOVE_IF (LOOP_VINFO_REDUCTIONS (loop_vinfo), ix, ix2,
f4649a92 4871 elem_ptr, *elem_ptr == stmt_info);
620610fa 4872 }
4a61a337 4873}
4874
4875
4876/* Function vect_pattern_recog
4877
4878 Input:
4879 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
4880 computation idioms.
4881
cfdcf183 4882 Output - for each computation idiom that is detected we create a new stmt
4883 that provides the same functionality and that can be vectorized. We
4a61a337 4884 also record some information in the struct_stmt_info of the relevant
4885 stmts, as explained below:
4886
4887 At the entry to this function we have the following stmts, with the
4888 following initial value in the STMT_VINFO fields:
4889
4890 stmt in_pattern_p related_stmt vec_stmt
4891 S1: a_i = .... - - -
4892 S2: a_2 = ..use(a_i).. - - -
4893 S3: a_1 = ..use(a_2).. - - -
4894 S4: a_0 = ..use(a_1).. - - -
4895 S5: ... = ..use(a_0).. - - -
4896
4897 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
cfdcf183 4898 represented by a single stmt. We then:
4899 - create a new stmt S6 equivalent to the pattern (the stmt is not
4900 inserted into the code)
4a61a337 4901 - fill in the STMT_VINFO fields as follows:
4902
4903 in_pattern_p related_stmt vec_stmt
48e1416a 4904 S1: a_i = .... - - -
4a61a337 4905 S2: a_2 = ..use(a_i).. - - -
4906 S3: a_1 = ..use(a_2).. - - -
4a61a337 4907 S4: a_0 = ..use(a_1).. true S6 -
cfdcf183 4908 '---> S6: a_new = .... - S4 -
4a61a337 4909 S5: ... = ..use(a_0).. - - -
4910
4911 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
cfdcf183 4912 to each other through the RELATED_STMT field).
4a61a337 4913
4914 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
4915 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
4916 remain irrelevant unless used by stmts other than S4.
4917
4918 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
cfdcf183 4919 (because they are marked as irrelevant). It will vectorize S6, and record
8bf58742 4920 a pointer to the new vector stmt VS6 from S6 (as usual).
4921 S4 will be skipped, and S5 will be vectorized as usual:
4a61a337 4922
4923 in_pattern_p related_stmt vec_stmt
4924 S1: a_i = .... - - -
4925 S2: a_2 = ..use(a_i).. - - -
4926 S3: a_1 = ..use(a_2).. - - -
4927 > VS6: va_new = .... - - -
4a61a337 4928 S4: a_0 = ..use(a_1).. true S6 VS6
cfdcf183 4929 '---> S6: a_new = .... - S4 VS6
4a61a337 4930 > VS5: ... = ..vuse(va_new).. - - -
4931 S5: ... = ..use(a_0).. - - -
4932
cfdcf183 4933 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4a61a337 4934 elsewhere), and we'll end up with:
4935
48e1416a 4936 VS6: va_new = ....
8bf58742 4937 VS5: ... = ..vuse(va_new)..
4938
4939 In case of more than one pattern statements, e.g., widen-mult with
4940 intermediate type:
4941
4942 S1 a_t = ;
4943 S2 a_T = (TYPE) a_t;
4944 '--> S3: a_it = (interm_type) a_t;
4945 S4 prod_T = a_T * CONST;
4946 '--> S5: prod_T' = a_it w* CONST;
4947
4948 there may be other users of a_T outside the pattern. In that case S2 will
4949 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
4950 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
4951 be recorded in S3. */
4a61a337 4952
4953void
e2c5c678 4954vect_pattern_recog (vec_info *vinfo)
4a61a337 4955{
4c0c783a 4956 struct loop *loop;
eca8fccf 4957 basic_block *bbs;
4c0c783a 4958 unsigned int nbbs;
75a70cf9 4959 gimple_stmt_iterator si;
4a61a337 4960 unsigned int i, j;
4a61a337 4961
18bbd2f1 4962 vect_determine_precisions (vinfo);
4963
88f6eb8f 4964 DUMP_VECT_SCOPE ("vect_pattern_recog");
4a61a337 4965
e2c5c678 4966 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
4c0c783a 4967 {
4968 loop = LOOP_VINFO_LOOP (loop_vinfo);
4969 bbs = LOOP_VINFO_BBS (loop_vinfo);
4970 nbbs = loop->num_nodes;
4c7587f5 4971
4972 /* Scan through the loop stmts, applying the pattern recognition
4973 functions starting at each stmt visited: */
4974 for (i = 0; i < nbbs; i++)
4975 {
4976 basic_block bb = bbs[i];
4977 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
726ee479 4978 {
4979 stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
4980 /* Scan over all generic vect_recog_xxx_pattern functions. */
4981 for (j = 0; j < NUM_PATTERNS; j++)
4982 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j],
4983 stmt_info);
4984 }
4c7587f5 4985 }
4c0c783a 4986 }
4987 else
4988 {
4c7587f5 4989 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4990 for (si = bb_vinfo->region_begin;
4991 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4992 {
e31847cd 4993 gimple *stmt = gsi_stmt (si);
03c0d666 4994 stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
d5b5c2c5 4995 if (stmt_info && !STMT_VINFO_VECTORIZABLE (stmt_info))
4c7587f5 4996 continue;
4c0c783a 4997
4c7587f5 4998 /* Scan over all generic vect_recog_xxx_pattern functions. */
4999 for (j = 0; j < NUM_PATTERNS; j++)
726ee479 5000 vect_pattern_recog_1 (&vect_vect_recog_func_ptrs[j], stmt_info);
4c7587f5 5001 }
4a61a337 5002 }
5003}