1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006-2024 Free Software Foundation, Inc.
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
23 #include "coretypes.h"
28 #include "gimple-iterator.h"
29 #include "gimple-fold.h"
32 #include "optabs-tree.h"
33 #include "insn-config.h"
34 #include "recog.h" /* FIXME: for insn_data */
35 #include "fold-const.h"
36 #include "stor-layout.h"
39 #include "gimple-iterator.h"
40 #include "gimple-fold.h"
41 #include "gimplify-me.h"
43 #include "tree-vectorizer.h"
46 #include "internal-fn.h"
47 #include "case-cfn-macros.h"
48 #include "fold-const-call.h"
51 #include "omp-simd-clone.h"
53 #include "tree-vector-builder.h"
54 #include "vec-perm-indices.h"
55 #include "gimple-range.h"
58 /* TODO: Note the vectorizer still builds COND_EXPRs with GENERIC compares
59 in the first operand. Disentangling this is future work, the
60 IL is properly transfered to VEC_COND_EXPRs with separate compares. */
63 /* Return true if we have a useful VR_RANGE range for VAR, storing it
64 in *MIN_VALUE and *MAX_VALUE if so. Note the range in the dump files. */
67 vect_get_range_info (tree var
, wide_int
*min_value
, wide_int
*max_value
)
71 get_range_query (cfun
)->range_of_expr (vr
, var
);
72 if (vr
.undefined_p ())
73 vr
.set_varying (TREE_TYPE (var
));
74 value_range_kind vr_type
= get_legacy_range (vr
, vr_min
, vr_max
);
75 *min_value
= wi::to_wide (vr_min
);
76 *max_value
= wi::to_wide (vr_max
);
77 wide_int nonzero
= get_nonzero_bits (var
);
78 signop sgn
= TYPE_SIGN (TREE_TYPE (var
));
79 if (intersect_range_with_nonzero_bits (vr_type
, min_value
, max_value
,
80 nonzero
, sgn
) == VR_RANGE
)
82 if (dump_enabled_p ())
84 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
85 dump_printf (MSG_NOTE
, " has range [");
86 dump_hex (MSG_NOTE
, *min_value
);
87 dump_printf (MSG_NOTE
, ", ");
88 dump_hex (MSG_NOTE
, *max_value
);
89 dump_printf (MSG_NOTE
, "]\n");
95 if (dump_enabled_p ())
97 dump_generic_expr_loc (MSG_NOTE
, vect_location
, TDF_SLIM
, var
);
98 dump_printf (MSG_NOTE
, " has no range info\n");
104 /* Report that we've found an instance of pattern PATTERN in
108 vect_pattern_detected (const char *name
, gimple
*stmt
)
110 if (dump_enabled_p ())
111 dump_printf_loc (MSG_NOTE
, vect_location
, "%s: detected: %G", name
, stmt
);
114 /* Associate pattern statement PATTERN_STMT with ORIG_STMT_INFO and
115 return the pattern statement's stmt_vec_info. Set its vector type to
116 VECTYPE if it doesn't have one already. */
119 vect_init_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
120 stmt_vec_info orig_stmt_info
, tree vectype
)
122 stmt_vec_info pattern_stmt_info
= vinfo
->lookup_stmt (pattern_stmt
);
123 if (pattern_stmt_info
== NULL
)
124 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
125 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt_info
->stmt
));
127 pattern_stmt_info
->pattern_stmt_p
= true;
128 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt_info
;
129 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
130 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
131 STMT_VINFO_TYPE (pattern_stmt_info
) = STMT_VINFO_TYPE (orig_stmt_info
);
132 if (!STMT_VINFO_VECTYPE (pattern_stmt_info
))
135 || is_a
<gcond
*> (pattern_stmt
)
136 || (VECTOR_BOOLEAN_TYPE_P (vectype
)
137 == vect_use_mask_type_p (orig_stmt_info
)));
138 STMT_VINFO_VECTYPE (pattern_stmt_info
) = vectype
;
139 pattern_stmt_info
->mask_precision
= orig_stmt_info
->mask_precision
;
141 return pattern_stmt_info
;
144 /* Set the pattern statement of ORIG_STMT_INFO to PATTERN_STMT.
145 Also set the vector type of PATTERN_STMT to VECTYPE, if it doesn't
149 vect_set_pattern_stmt (vec_info
*vinfo
, gimple
*pattern_stmt
,
150 stmt_vec_info orig_stmt_info
, tree vectype
)
152 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
153 STMT_VINFO_RELATED_STMT (orig_stmt_info
)
154 = vect_init_pattern_stmt (vinfo
, pattern_stmt
, orig_stmt_info
, vectype
);
157 /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE
158 is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
159 be different from the vector type of the final pattern statement.
160 If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
161 from which it was derived. */
164 append_pattern_def_seq (vec_info
*vinfo
,
165 stmt_vec_info stmt_info
, gimple
*new_stmt
,
166 tree vectype
= NULL_TREE
,
167 tree scalar_type_for_mask
= NULL_TREE
)
169 gcc_assert (!scalar_type_for_mask
170 == (!vectype
|| !VECTOR_BOOLEAN_TYPE_P (vectype
)));
173 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (new_stmt
);
174 STMT_VINFO_VECTYPE (new_stmt_info
) = vectype
;
175 if (scalar_type_for_mask
)
176 new_stmt_info
->mask_precision
177 = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask
));
179 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
183 /* The caller wants to perform new operations on vect_external variable
184 VAR, so that the result of the operations would also be vect_external.
185 Return the edge on which the operations can be performed, if one exists.
186 Return null if the operations should instead be treated as part of
187 the pattern that needs them. */
190 vect_get_external_def_edge (vec_info
*vinfo
, tree var
)
193 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
195 e
= loop_preheader_edge (loop_vinfo
->loop
);
196 if (!SSA_NAME_IS_DEFAULT_DEF (var
))
198 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
200 || !dominated_by_p (CDI_DOMINATORS
, e
->dest
, bb
))
207 /* Return true if the target supports a vector version of CODE,
208 where CODE is known to map to a direct optab with the given SUBTYPE.
209 ITYPE specifies the type of (some of) the scalar inputs and OTYPE
210 specifies the type of the scalar result.
212 If CODE allows the inputs and outputs to have different type
213 (such as for WIDEN_SUM_EXPR), it is the input mode rather
214 than the output mode that determines the appropriate target pattern.
215 Operand 0 of the target pattern then specifies the mode that the output
218 When returning true, set *VECOTYPE_OUT to the vector version of OTYPE.
219 Also set *VECITYPE_OUT to the vector version of ITYPE if VECITYPE_OUT
223 vect_supportable_direct_optab_p (vec_info
*vinfo
, tree otype
, tree_code code
,
224 tree itype
, tree
*vecotype_out
,
225 tree
*vecitype_out
= NULL
,
226 enum optab_subtype subtype
= optab_default
)
228 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
232 tree vecotype
= get_vectype_for_scalar_type (vinfo
, otype
);
236 optab optab
= optab_for_tree_code (code
, vecitype
, subtype
);
240 insn_code icode
= optab_handler (optab
, TYPE_MODE (vecitype
));
241 if (icode
== CODE_FOR_nothing
242 || insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (vecotype
))
245 *vecotype_out
= vecotype
;
247 *vecitype_out
= vecitype
;
251 /* Round bit precision PRECISION up to a full element. */
254 vect_element_precision (unsigned int precision
)
256 precision
= 1 << ceil_log2 (precision
);
257 return MAX (precision
, BITS_PER_UNIT
);
260 /* If OP is defined by a statement that's being considered for vectorization,
261 return information about that statement, otherwise return NULL. */
264 vect_get_internal_def (vec_info
*vinfo
, tree op
)
266 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (op
);
268 && STMT_VINFO_DEF_TYPE (def_stmt_info
) == vect_internal_def
)
269 return def_stmt_info
;
273 /* Check whether NAME, an ssa-name used in STMT_VINFO,
274 is a result of a type promotion, such that:
275 DEF_STMT: NAME = NOP (name0)
276 If CHECK_SIGN is TRUE, check that either both types are signed or both are
280 type_conversion_p (vec_info
*vinfo
, tree name
, bool check_sign
,
281 tree
*orig_type
, gimple
**def_stmt
, bool *promotion
)
283 tree type
= TREE_TYPE (name
);
285 enum vect_def_type dt
;
287 stmt_vec_info def_stmt_info
;
288 if (!vect_is_simple_use (name
, vinfo
, &dt
, &def_stmt_info
, def_stmt
))
291 if (dt
!= vect_internal_def
292 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
298 if (!is_gimple_assign (*def_stmt
))
301 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt
)))
304 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
306 *orig_type
= TREE_TYPE (oprnd0
);
307 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*orig_type
)
308 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*orig_type
)) && check_sign
))
311 if (TYPE_PRECISION (type
) >= (TYPE_PRECISION (*orig_type
) * 2))
316 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
))
322 /* Holds information about an input operand after some sign changes
323 and type promotions have been peeled away. */
324 class vect_unpromoted_value
{
326 vect_unpromoted_value ();
328 void set_op (tree
, vect_def_type
, stmt_vec_info
= NULL
);
330 /* The value obtained after peeling away zero or more casts. */
333 /* The type of OP. */
336 /* The definition type of OP. */
339 /* If OP is the result of peeling at least one cast, and if the cast
340 of OP itself is a vectorizable statement, CASTER identifies that
341 statement, otherwise it is null. */
342 stmt_vec_info caster
;
345 inline vect_unpromoted_value::vect_unpromoted_value ()
348 dt (vect_uninitialized_def
),
353 /* Set the operand to OP_IN, its definition type to DT_IN, and the
354 statement that casts it to CASTER_IN. */
357 vect_unpromoted_value::set_op (tree op_in
, vect_def_type dt_in
,
358 stmt_vec_info caster_in
)
361 type
= TREE_TYPE (op
);
366 /* If OP is a vectorizable SSA name, strip a sequence of integer conversions
367 to reach some vectorizable inner operand OP', continuing as long as it
368 is possible to convert OP' back to OP using a possible sign change
369 followed by a possible promotion P. Return this OP', or null if OP is
370 not a vectorizable SSA name. If there is a promotion P, describe its
371 input in UNPROM, otherwise describe OP' in UNPROM. If SINGLE_USE_P
372 is nonnull, set *SINGLE_USE_P to false if any of the SSA names involved
373 have more than one user.
375 A successful return means that it is possible to go from OP' to OP
376 via UNPROM. The cast from OP' to UNPROM is at most a sign change,
377 whereas the cast from UNPROM to OP might be a promotion, a sign
382 signed short *ptr = ...;
383 signed short C = *ptr;
384 unsigned short B = (unsigned short) C; // sign change
385 signed int A = (signed int) B; // unsigned promotion
386 ...possible other uses of A...
387 unsigned int OP = (unsigned int) A; // sign change
389 In this case it's possible to go directly from C to OP using:
391 OP = (unsigned int) (unsigned short) C;
392 +------------+ +--------------+
393 promotion sign change
395 so OP' would be C. The input to the promotion is B, so UNPROM
399 vect_look_through_possible_promotion (vec_info
*vinfo
, tree op
,
400 vect_unpromoted_value
*unprom
,
401 bool *single_use_p
= NULL
)
403 tree op_type
= TREE_TYPE (op
);
404 if (!INTEGRAL_TYPE_P (op_type
))
407 tree res
= NULL_TREE
;
408 unsigned int orig_precision
= TYPE_PRECISION (op_type
);
409 unsigned int min_precision
= orig_precision
;
410 stmt_vec_info caster
= NULL
;
411 while (TREE_CODE (op
) == SSA_NAME
&& INTEGRAL_TYPE_P (op_type
))
413 /* See whether OP is simple enough to vectorize. */
414 stmt_vec_info def_stmt_info
;
417 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
420 /* If OP is the input of a demotion, skip over it to see whether
421 OP is itself the result of a promotion. If so, the combined
422 effect of the promotion and the demotion might fit the required
423 pattern, otherwise neither operation fits.
425 This copes with cases such as the result of an arithmetic
426 operation being truncated before being stored, and where that
427 arithmetic operation has been recognized as an over-widened one. */
428 if (TYPE_PRECISION (op_type
) <= min_precision
)
430 /* Use OP as the UNPROM described above if we haven't yet
431 found a promotion, or if using the new input preserves the
432 sign of the previous promotion. */
434 || TYPE_PRECISION (unprom
->type
) == orig_precision
435 || TYPE_SIGN (unprom
->type
) == TYPE_SIGN (op_type
))
437 unprom
->set_op (op
, dt
, caster
);
438 min_precision
= TYPE_PRECISION (op_type
);
440 /* Stop if we've already seen a promotion and if this
441 conversion does more than change the sign. */
442 else if (TYPE_PRECISION (op_type
)
443 != TYPE_PRECISION (unprom
->type
))
446 /* The sequence now extends to OP. */
450 /* See whether OP is defined by a cast. Record it as CASTER if
451 the cast is potentially vectorizable. */
454 caster
= def_stmt_info
;
456 /* Ignore pattern statements, since we don't link uses for them. */
459 && !STMT_VINFO_RELATED_STMT (caster
)
460 && !has_single_use (res
))
461 *single_use_p
= false;
463 gassign
*assign
= dyn_cast
<gassign
*> (def_stmt
);
464 if (!assign
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def_stmt
)))
467 /* Continue with the input to the cast. */
468 op
= gimple_assign_rhs1 (def_stmt
);
469 op_type
= TREE_TYPE (op
);
474 /* OP is an integer operand to an operation that returns TYPE, and we
475 want to treat the operation as a widening one. So far we can treat
476 it as widening from *COMMON_TYPE.
478 Return true if OP is suitable for such a widening operation,
479 either widening from *COMMON_TYPE or from some supertype of it.
480 Update *COMMON_TYPE to the supertype in the latter case.
482 SHIFT_P is true if OP is a shift amount. */
485 vect_joust_widened_integer (tree type
, bool shift_p
, tree op
,
488 /* Calculate the minimum precision required by OP, without changing
489 the sign of either operand. */
490 unsigned int precision
;
493 if (!wi::leu_p (wi::to_widest (op
), TYPE_PRECISION (type
) / 2))
495 precision
= TREE_INT_CST_LOW (op
);
499 precision
= wi::min_precision (wi::to_widest (op
),
500 TYPE_SIGN (*common_type
));
501 if (precision
* 2 > TYPE_PRECISION (type
))
505 /* If OP requires a wider type, switch to that type. The checks
506 above ensure that this is still narrower than the result. */
507 precision
= vect_element_precision (precision
);
508 if (TYPE_PRECISION (*common_type
) < precision
)
509 *common_type
= build_nonstandard_integer_type
510 (precision
, TYPE_UNSIGNED (*common_type
));
514 /* Return true if the common supertype of NEW_TYPE and *COMMON_TYPE
515 is narrower than type, storing the supertype in *COMMON_TYPE if so. */
518 vect_joust_widened_type (tree type
, tree new_type
, tree
*common_type
)
520 if (types_compatible_p (*common_type
, new_type
))
523 /* See if *COMMON_TYPE can hold all values of NEW_TYPE. */
524 if ((TYPE_PRECISION (new_type
) < TYPE_PRECISION (*common_type
))
525 && (TYPE_UNSIGNED (new_type
) || !TYPE_UNSIGNED (*common_type
)))
528 /* See if NEW_TYPE can hold all values of *COMMON_TYPE. */
529 if (TYPE_PRECISION (*common_type
) < TYPE_PRECISION (new_type
)
530 && (TYPE_UNSIGNED (*common_type
) || !TYPE_UNSIGNED (new_type
)))
532 *common_type
= new_type
;
536 /* We have mismatched signs, with the signed type being
537 no wider than the unsigned type. In this case we need
538 a wider signed type. */
539 unsigned int precision
= MAX (TYPE_PRECISION (*common_type
),
540 TYPE_PRECISION (new_type
));
543 if (precision
* 2 > TYPE_PRECISION (type
))
546 *common_type
= build_nonstandard_integer_type (precision
, false);
550 /* Check whether STMT_INFO can be viewed as a tree of integer operations
551 in which each node either performs CODE or WIDENED_CODE, and where
552 each leaf operand is narrower than the result of STMT_INFO. MAX_NOPS
553 specifies the maximum number of leaf operands. SHIFT_P says whether
554 CODE and WIDENED_CODE are some sort of shift.
556 If STMT_INFO is such a tree, return the number of leaf operands
557 and describe them in UNPROM[0] onwards. Also set *COMMON_TYPE
558 to a type that (a) is narrower than the result of STMT_INFO and
559 (b) can hold all leaf operand values.
561 If SUBTYPE then allow that the signs of the operands
562 may differ in signs but not in precision. SUBTYPE is updated to reflect
565 Return 0 if STMT_INFO isn't such a tree, or if no such COMMON_TYPE
569 vect_widened_op_tree (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree_code code
,
570 code_helper widened_code
, bool shift_p
,
571 unsigned int max_nops
,
572 vect_unpromoted_value
*unprom
, tree
*common_type
,
573 enum optab_subtype
*subtype
= NULL
)
575 /* Check for an integer operation with the right code. */
576 gimple
* stmt
= stmt_info
->stmt
;
577 if (!(is_gimple_assign (stmt
) || is_gimple_call (stmt
)))
580 code_helper rhs_code
;
581 if (is_gimple_assign (stmt
))
582 rhs_code
= gimple_assign_rhs_code (stmt
);
583 else if (is_gimple_call (stmt
))
584 rhs_code
= gimple_call_combined_fn (stmt
);
589 && rhs_code
!= widened_code
)
592 tree lhs
= gimple_get_lhs (stmt
);
593 tree type
= TREE_TYPE (lhs
);
594 if (!INTEGRAL_TYPE_P (type
))
597 /* Assume that both operands will be leaf operands. */
600 /* Check the operands. */
601 unsigned int next_op
= 0;
602 for (unsigned int i
= 0; i
< 2; ++i
)
604 vect_unpromoted_value
*this_unprom
= &unprom
[next_op
];
605 unsigned int nops
= 1;
606 tree op
= gimple_arg (stmt
, i
);
607 if (i
== 1 && TREE_CODE (op
) == INTEGER_CST
)
609 /* We already have a common type from earlier operands.
610 Update it to account for OP. */
611 this_unprom
->set_op (op
, vect_constant_def
);
612 if (!vect_joust_widened_integer (type
, shift_p
, op
, common_type
))
617 /* Only allow shifts by constants. */
618 if (shift_p
&& i
== 1)
621 if (rhs_code
!= code
)
623 /* If rhs_code is widened_code, don't look through further
624 possible promotions, there is a promotion already embedded
625 in the WIDEN_*_EXPR. */
626 if (TREE_CODE (op
) != SSA_NAME
627 || !INTEGRAL_TYPE_P (TREE_TYPE (op
)))
630 stmt_vec_info def_stmt_info
;
633 if (!vect_is_simple_use (op
, vinfo
, &dt
, &def_stmt_info
,
636 this_unprom
->set_op (op
, dt
, NULL
);
638 else if (!vect_look_through_possible_promotion (vinfo
, op
,
642 if (TYPE_PRECISION (this_unprom
->type
) == TYPE_PRECISION (type
))
644 /* The operand isn't widened. If STMT_INFO has the code
645 for an unwidened operation, recursively check whether
646 this operand is a node of the tree. */
649 || this_unprom
->dt
!= vect_internal_def
)
652 /* Give back the leaf slot allocated above now that we're
653 not treating this as a leaf operand. */
656 /* Recursively process the definition of the operand. */
657 stmt_vec_info def_stmt_info
658 = vinfo
->lookup_def (this_unprom
->op
);
659 nops
= vect_widened_op_tree (vinfo
, def_stmt_info
, code
,
660 widened_code
, shift_p
, max_nops
,
661 this_unprom
, common_type
,
670 /* Make sure that the operand is narrower than the result. */
671 if (TYPE_PRECISION (this_unprom
->type
) * 2
672 > TYPE_PRECISION (type
))
675 /* Update COMMON_TYPE for the new operand. */
677 *common_type
= this_unprom
->type
;
678 else if (!vect_joust_widened_type (type
, this_unprom
->type
,
683 /* See if we can sign extend the smaller type. */
684 if (TYPE_PRECISION (this_unprom
->type
)
685 > TYPE_PRECISION (*common_type
))
686 *common_type
= this_unprom
->type
;
687 *subtype
= optab_vector_mixed_sign
;
699 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
700 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
703 vect_recog_temp_ssa_var (tree type
, gimple
*stmt
= NULL
)
705 return make_temp_ssa_name (type
, stmt
, "patt");
708 /* STMT2_INFO describes a type conversion that could be split into STMT1
709 followed by a version of STMT2_INFO that takes NEW_RHS as its first
710 input. Try to do this using pattern statements, returning true on
714 vect_split_statement (vec_info
*vinfo
, stmt_vec_info stmt2_info
, tree new_rhs
,
715 gimple
*stmt1
, tree vectype
)
717 if (is_pattern_stmt_p (stmt2_info
))
719 /* STMT2_INFO is part of a pattern. Get the statement to which
720 the pattern is attached. */
721 stmt_vec_info orig_stmt2_info
= STMT_VINFO_RELATED_STMT (stmt2_info
);
722 vect_init_pattern_stmt (vinfo
, stmt1
, orig_stmt2_info
, vectype
);
724 if (dump_enabled_p ())
725 dump_printf_loc (MSG_NOTE
, vect_location
,
726 "Splitting pattern statement: %G", stmt2_info
->stmt
);
728 /* Since STMT2_INFO is a pattern statement, we can change it
729 in-situ without worrying about changing the code for the
731 gimple_assign_set_rhs1 (stmt2_info
->stmt
, new_rhs
);
733 if (dump_enabled_p ())
735 dump_printf_loc (MSG_NOTE
, vect_location
, "into: %G", stmt1
);
736 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
740 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt2_info
);
741 if (STMT_VINFO_RELATED_STMT (orig_stmt2_info
) == stmt2_info
)
742 /* STMT2_INFO is the actual pattern statement. Add STMT1
743 to the end of the definition sequence. */
744 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
747 /* STMT2_INFO belongs to the definition sequence. Insert STMT1
749 gimple_stmt_iterator gsi
= gsi_for_stmt (stmt2_info
->stmt
, def_seq
);
750 gsi_insert_before_without_update (&gsi
, stmt1
, GSI_SAME_STMT
);
756 /* STMT2_INFO doesn't yet have a pattern. Try to create a
757 two-statement pattern now. */
758 gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info
));
759 tree lhs_type
= TREE_TYPE (gimple_get_lhs (stmt2_info
->stmt
));
760 tree lhs_vectype
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
764 if (dump_enabled_p ())
765 dump_printf_loc (MSG_NOTE
, vect_location
,
766 "Splitting statement: %G", stmt2_info
->stmt
);
768 /* Add STMT1 as a singleton pattern definition sequence. */
769 gimple_seq
*def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt2_info
);
770 vect_init_pattern_stmt (vinfo
, stmt1
, stmt2_info
, vectype
);
771 gimple_seq_add_stmt_without_update (def_seq
, stmt1
);
773 /* Build the second of the two pattern statements. */
774 tree new_lhs
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
775 gassign
*new_stmt2
= gimple_build_assign (new_lhs
, NOP_EXPR
, new_rhs
);
776 vect_set_pattern_stmt (vinfo
, new_stmt2
, stmt2_info
, lhs_vectype
);
778 if (dump_enabled_p ())
780 dump_printf_loc (MSG_NOTE
, vect_location
,
781 "into pattern statements: %G", stmt1
);
782 dump_printf_loc (MSG_NOTE
, vect_location
, "and: %G",
783 (gimple
*) new_stmt2
);
790 /* Look for the following pattern
796 ABS_STMT should point to a statement of code ABS_EXPR or ABSU_EXPR.
797 HALF_TYPE and UNPROM will be set should the statement be found to
798 be a widened operation.
799 DIFF_STMT will be set to the MINUS_EXPR
800 statement that precedes the ABS_STMT if it is a MINUS_EXPR..
803 vect_recog_absolute_difference (vec_info
*vinfo
, gassign
*abs_stmt
,
805 vect_unpromoted_value unprom
[2],
811 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
812 inside the loop (in case we are analyzing an outer-loop). */
813 enum tree_code code
= gimple_assign_rhs_code (abs_stmt
);
814 if (code
!= ABS_EXPR
&& code
!= ABSU_EXPR
)
817 tree abs_oprnd
= gimple_assign_rhs1 (abs_stmt
);
818 tree abs_type
= TREE_TYPE (abs_oprnd
);
821 if (!ANY_INTEGRAL_TYPE_P (abs_type
)
822 || TYPE_OVERFLOW_WRAPS (abs_type
)
823 || TYPE_UNSIGNED (abs_type
))
826 /* Peel off conversions from the ABS input. This can involve sign
827 changes (e.g. from an unsigned subtraction to a signed ABS input)
828 or signed promotion, but it can't include unsigned promotion.
829 (Note that ABS of an unsigned promotion should have been folded
830 away before now anyway.) */
831 vect_unpromoted_value unprom_diff
;
832 abs_oprnd
= vect_look_through_possible_promotion (vinfo
, abs_oprnd
,
836 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (abs_type
)
837 && TYPE_UNSIGNED (unprom_diff
.type
))
840 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
841 stmt_vec_info diff_stmt_vinfo
= vect_get_internal_def (vinfo
, abs_oprnd
);
842 if (!diff_stmt_vinfo
)
845 gassign
*diff
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (diff_stmt_vinfo
));
846 if (diff_stmt
&& diff
847 && gimple_assign_rhs_code (diff
) == MINUS_EXPR
848 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (abs_oprnd
)))
851 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
852 inside the loop (in case we are analyzing an outer-loop). */
853 if (vect_widened_op_tree (vinfo
, diff_stmt_vinfo
,
854 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
855 false, 2, unprom
, half_type
))
861 /* Convert UNPROM to TYPE and return the result, adding new statements
862 to STMT_INFO's pattern definition statements if no better way is
863 available. VECTYPE is the vector form of TYPE.
865 If SUBTYPE then convert the type based on the subtype. */
868 vect_convert_input (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
869 vect_unpromoted_value
*unprom
, tree vectype
,
870 enum optab_subtype subtype
= optab_default
)
872 /* Update the type if the signs differ. */
873 if (subtype
== optab_vector_mixed_sign
)
875 gcc_assert (!TYPE_UNSIGNED (type
));
876 if (TYPE_UNSIGNED (TREE_TYPE (unprom
->op
)))
878 type
= unsigned_type_for (type
);
879 vectype
= unsigned_type_for (vectype
);
883 /* Check for a no-op conversion. */
884 if (types_compatible_p (type
, TREE_TYPE (unprom
->op
)))
887 /* Allow the caller to create constant vect_unpromoted_values. */
888 if (TREE_CODE (unprom
->op
) == INTEGER_CST
)
889 return wide_int_to_tree (type
, wi::to_widest (unprom
->op
));
891 tree input
= unprom
->op
;
894 tree lhs
= gimple_get_lhs (unprom
->caster
->stmt
);
895 tree lhs_type
= TREE_TYPE (lhs
);
897 /* If the result of the existing cast is the right width, use it
898 instead of the source of the cast. */
899 if (TYPE_PRECISION (lhs_type
) == TYPE_PRECISION (type
))
901 /* If the precision we want is between the source and result
902 precisions of the existing cast, try splitting the cast into
903 two and tapping into a mid-way point. */
904 else if (TYPE_PRECISION (lhs_type
) > TYPE_PRECISION (type
)
905 && TYPE_PRECISION (type
) > TYPE_PRECISION (unprom
->type
))
907 /* In order to preserve the semantics of the original cast,
908 give the mid-way point the same signedness as the input value.
910 It would be possible to use a signed type here instead if
911 TYPE is signed and UNPROM->TYPE is unsigned, but that would
912 make the sign of the midtype sensitive to the order in
913 which we process the statements, since the signedness of
914 TYPE is the signedness required by just one of possibly
915 many users. Also, unsigned promotions are usually as cheap
916 as or cheaper than signed ones, so it's better to keep an
917 unsigned promotion. */
918 tree midtype
= build_nonstandard_integer_type
919 (TYPE_PRECISION (type
), TYPE_UNSIGNED (unprom
->type
));
920 tree vec_midtype
= get_vectype_for_scalar_type (vinfo
, midtype
);
923 input
= vect_recog_temp_ssa_var (midtype
, NULL
);
924 gassign
*new_stmt
= gimple_build_assign (input
, NOP_EXPR
,
926 if (!vect_split_statement (vinfo
, unprom
->caster
, input
, new_stmt
,
928 append_pattern_def_seq (vinfo
, stmt_info
,
929 new_stmt
, vec_midtype
);
933 /* See if we can reuse an existing result. */
934 if (types_compatible_p (type
, TREE_TYPE (input
)))
938 /* We need a new conversion statement. */
939 tree new_op
= vect_recog_temp_ssa_var (type
, NULL
);
940 gassign
*new_stmt
= gimple_build_assign (new_op
, NOP_EXPR
, input
);
942 /* If OP is an external value, see if we can insert the new statement
943 on an incoming edge. */
944 if (input
== unprom
->op
&& unprom
->dt
== vect_external_def
)
945 if (edge e
= vect_get_external_def_edge (vinfo
, input
))
947 basic_block new_bb
= gsi_insert_on_edge_immediate (e
, new_stmt
);
948 gcc_assert (!new_bb
);
952 /* As a (common) last resort, add the statement to the pattern itself. */
953 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
);
957 /* Invoke vect_convert_input for N elements of UNPROM and store the
958 result in the corresponding elements of RESULT.
960 If SUBTYPE then convert the type based on the subtype. */
963 vect_convert_inputs (vec_info
*vinfo
, stmt_vec_info stmt_info
, unsigned int n
,
964 tree
*result
, tree type
, vect_unpromoted_value
*unprom
,
965 tree vectype
, enum optab_subtype subtype
= optab_default
)
967 for (unsigned int i
= 0; i
< n
; ++i
)
970 for (j
= 0; j
< i
; ++j
)
971 if (unprom
[j
].op
== unprom
[i
].op
)
975 result
[i
] = result
[j
];
977 result
[i
] = vect_convert_input (vinfo
, stmt_info
,
978 type
, &unprom
[i
], vectype
, subtype
);
982 /* The caller has created a (possibly empty) sequence of pattern definition
983 statements followed by a single statement PATTERN_STMT. Cast the result
984 of this final statement to TYPE. If a new statement is needed, add
985 PATTERN_STMT to the end of STMT_INFO's pattern definition statements
986 and return the new statement, otherwise return PATTERN_STMT as-is.
987 VECITYPE is the vector form of PATTERN_STMT's result type. */
990 vect_convert_output (vec_info
*vinfo
, stmt_vec_info stmt_info
, tree type
,
991 gimple
*pattern_stmt
, tree vecitype
)
993 tree lhs
= gimple_get_lhs (pattern_stmt
);
994 if (!types_compatible_p (type
, TREE_TYPE (lhs
)))
996 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vecitype
);
997 tree cast_var
= vect_recog_temp_ssa_var (type
, NULL
);
998 pattern_stmt
= gimple_build_assign (cast_var
, NOP_EXPR
, lhs
);
1000 return pattern_stmt
;
1003 /* Return true if STMT_VINFO describes a reduction for which reassociation
1004 is allowed. If STMT_INFO is part of a group, assume that it's part of
1005 a reduction chain and optimistically assume that all statements
1006 except the last allow reassociation.
1007 Also require it to have code CODE and to be a reduction
1008 in the outermost loop. When returning true, store the operands in
1009 *OP0_OUT and *OP1_OUT. */
1012 vect_reassociating_reduction_p (vec_info
*vinfo
,
1013 stmt_vec_info stmt_info
, tree_code code
,
1014 tree
*op0_out
, tree
*op1_out
)
1016 loop_vec_info loop_info
= dyn_cast
<loop_vec_info
> (vinfo
);
1020 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
1021 if (!assign
|| gimple_assign_rhs_code (assign
) != code
)
1024 /* We don't allow changing the order of the computation in the inner-loop
1025 when doing outer-loop vectorization. */
1026 class loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
1027 if (loop
&& nested_in_vect_loop_p (loop
, stmt_info
))
1030 if (STMT_VINFO_DEF_TYPE (stmt_info
) == vect_reduction_def
)
1032 if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign
)),
1036 else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info
) == NULL
)
1039 *op0_out
= gimple_assign_rhs1 (assign
);
1040 *op1_out
= gimple_assign_rhs2 (assign
);
1041 if (commutative_tree_code (code
) && STMT_VINFO_REDUC_IDX (stmt_info
) == 0)
1042 std::swap (*op0_out
, *op1_out
);
1046 /* match.pd function to match
1047 (cond (cmp@3 a b) (convert@1 c) (convert@2 d))
1049 1) @1, @2, c, d, a, b are all integral type.
1050 2) There's single_use for both @1 and @2.
1051 3) a, c have same precision.
1052 4) c and @1 have different precision.
1053 5) c, d are the same type or they can differ in sign when convert is
1056 record a and c and d and @3. */
1058 extern bool gimple_cond_expr_convert_p (tree
, tree
*, tree (*)(tree
));
1060 /* Function vect_recog_cond_expr_convert
1062 Try to find the following pattern:
1067 TYPE_E op_true = (TYPE_E) A;
1068 TYPE_E op_false = (TYPE_E) B;
1070 E = C cmp D ? op_true : op_false;
1073 TYPE_PRECISION (TYPE_E) != TYPE_PRECISION (TYPE_CD);
1074 TYPE_PRECISION (TYPE_AB) == TYPE_PRECISION (TYPE_CD);
1075 single_use of op_true and op_false.
1076 TYPE_AB could differ in sign when (TYPE_E) A is a truncation.
1080 * STMT_VINFO: The stmt from which the pattern search begins.
1081 here it starts with E = c cmp D ? op_true : op_false;
1085 TYPE1 E' = C cmp D ? A : B;
1086 TYPE3 E = (TYPE3) E';
1088 There may extra nop_convert for A or B to handle different signness.
1090 * TYPE_OUT: The vector type of the output of this pattern.
1092 * Return value: A new stmt that will be used to replace the sequence of
1093 stmts that constitute the pattern. In this case it will be:
1095 E' = C cmp D ? A : B; is recorded in pattern definition statements; */
1098 vect_recog_cond_expr_convert_pattern (vec_info
*vinfo
,
1099 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1101 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
1102 tree lhs
, match
[4], temp
, type
, new_lhs
, op2
;
1104 gimple
*pattern_stmt
;
1109 lhs
= gimple_assign_lhs (last_stmt
);
1111 /* Find E = C cmp D ? (TYPE3) A ? (TYPE3) B;
1112 TYPE_PRECISION (A) == TYPE_PRECISION (C). */
1113 if (!gimple_cond_expr_convert_p (lhs
, &match
[0], NULL
))
1116 vect_pattern_detected ("vect_recog_cond_expr_convert_pattern", last_stmt
);
1119 type
= TREE_TYPE (match
[1]);
1120 if (TYPE_SIGN (type
) != TYPE_SIGN (TREE_TYPE (match
[2])))
1122 op2
= vect_recog_temp_ssa_var (type
, NULL
);
1123 gimple
* nop_stmt
= gimple_build_assign (op2
, NOP_EXPR
, match
[2]);
1124 append_pattern_def_seq (vinfo
, stmt_vinfo
, nop_stmt
,
1125 get_vectype_for_scalar_type (vinfo
, type
));
1128 temp
= vect_recog_temp_ssa_var (type
, NULL
);
1129 cond_stmt
= gimple_build_assign (temp
, build3 (COND_EXPR
, type
, match
[3],
1131 append_pattern_def_seq (vinfo
, stmt_vinfo
, cond_stmt
,
1132 get_vectype_for_scalar_type (vinfo
, type
));
1133 new_lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
1134 pattern_stmt
= gimple_build_assign (new_lhs
, NOP_EXPR
, temp
);
1135 *type_out
= STMT_VINFO_VECTYPE (stmt_vinfo
);
1137 if (dump_enabled_p ())
1138 dump_printf_loc (MSG_NOTE
, vect_location
,
1139 "created pattern stmt: %G", pattern_stmt
);
1140 return pattern_stmt
;
1143 /* Function vect_recog_dot_prod_pattern
1145 Try to find the following pattern:
1152 sum_0 = phi <init, sum_1>
1155 S3 x_T = (TYPE1) x_t;
1156 S4 y_T = (TYPE1) y_t;
1157 S5 prod = x_T * y_T;
1158 [S6 prod = (TYPE2) prod; #optional]
1159 S7 sum_1 = prod + sum_0;
1161 where 'TYPE1' is exactly double the size of type 'type1a' and 'type1b',
1162 the sign of 'TYPE1' must be one of 'type1a' or 'type1b' but the sign of
1163 'type1a' and 'type1b' can differ.
1167 * STMT_VINFO: The stmt from which the pattern search begins. In the
1168 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
1173 * TYPE_OUT: The type of the output of this pattern.
1175 * Return value: A new stmt that will be used to replace the sequence of
1176 stmts that constitute the pattern. In this case it will be:
1177 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
1179 Note: The dot-prod idiom is a widening reduction pattern that is
1180 vectorized without preserving all the intermediate results. It
1181 produces only N/2 (widened) results (by summing up pairs of
1182 intermediate results) rather than all N results. Therefore, we
1183 cannot allow this pattern when we want to get all the results and in
1184 the correct order (as is the case when this computation is in an
1185 inner-loop nested in an outer-loop that us being vectorized). */
1188 vect_recog_dot_prod_pattern (vec_info
*vinfo
,
1189 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1191 tree oprnd0
, oprnd1
;
1192 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1193 tree type
, half_type
;
1194 gimple
*pattern_stmt
;
1197 /* Look for the following pattern
1201 DDPROD = (TYPE2) DPROD;
1202 sum_1 = DDPROD + sum_0;
1204 - DX is double the size of X
1205 - DY is double the size of Y
1206 - DX, DY, DPROD all have the same type but the sign
1207 between X, Y and DPROD can differ.
1208 - sum is the same size of DPROD or bigger
1209 - sum has been recognized as a reduction variable.
1211 This is equivalent to:
1212 DPROD = X w* Y; #widen mult
1213 sum_1 = DPROD w+ sum_0; #widen summation
1215 DPROD = X w* Y; #widen mult
1216 sum_1 = DPROD + sum_0; #summation
1219 /* Starting from LAST_STMT, follow the defs of its uses in search
1220 of the above pattern. */
1222 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1226 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1228 vect_unpromoted_value unprom_mult
;
1229 oprnd0
= vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom_mult
);
1231 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1232 we know that oprnd1 is the reduction variable (defined by a loop-header
1233 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1234 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1238 stmt_vec_info mult_vinfo
= vect_get_internal_def (vinfo
, oprnd0
);
1242 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1243 inside the loop (in case we are analyzing an outer-loop). */
1244 vect_unpromoted_value unprom0
[2];
1245 enum optab_subtype subtype
= optab_vector
;
1246 if (!vect_widened_op_tree (vinfo
, mult_vinfo
, MULT_EXPR
, WIDEN_MULT_EXPR
,
1247 false, 2, unprom0
, &half_type
, &subtype
))
1250 /* If there are two widening operations, make sure they agree on the sign
1251 of the extension. The result of an optab_vector_mixed_sign operation
1252 is signed; otherwise, the result has the same sign as the operands. */
1253 if (TYPE_PRECISION (unprom_mult
.type
) != TYPE_PRECISION (type
)
1254 && (subtype
== optab_vector_mixed_sign
1255 ? TYPE_UNSIGNED (unprom_mult
.type
)
1256 : TYPE_SIGN (unprom_mult
.type
) != TYPE_SIGN (half_type
)))
1259 vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt
);
1261 /* If the inputs have mixed signs, canonicalize on using the signed
1262 input type for analysis. This also helps when emulating mixed-sign
1263 operations using signed operations. */
1264 if (subtype
== optab_vector_mixed_sign
)
1265 half_type
= signed_type_for (half_type
);
1268 if (!vect_supportable_direct_optab_p (vinfo
, type
, DOT_PROD_EXPR
, half_type
,
1269 type_out
, &half_vectype
, subtype
))
1271 /* We can emulate a mixed-sign dot-product using a sequence of
1272 signed dot-products; see vect_emulate_mixed_dot_prod for details. */
1273 if (subtype
!= optab_vector_mixed_sign
1274 || !vect_supportable_direct_optab_p (vinfo
, signed_type_for (type
),
1275 DOT_PROD_EXPR
, half_type
,
1276 type_out
, &half_vectype
,
1280 *type_out
= signed_or_unsigned_type_for (TYPE_UNSIGNED (type
),
1284 /* Get the inputs in the appropriate types. */
1286 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, mult_oprnd
, half_type
,
1287 unprom0
, half_vectype
, subtype
);
1289 var
= vect_recog_temp_ssa_var (type
, NULL
);
1290 pattern_stmt
= gimple_build_assign (var
, DOT_PROD_EXPR
,
1291 mult_oprnd
[0], mult_oprnd
[1], oprnd1
);
1293 return pattern_stmt
;
1297 /* Function vect_recog_sad_pattern
1299 Try to find the following Sum of Absolute Difference (SAD) pattern:
1302 signed TYPE1 diff, abs_diff;
1305 sum_0 = phi <init, sum_1>
1308 S3 x_T = (TYPE1) x_t;
1309 S4 y_T = (TYPE1) y_t;
1310 S5 diff = x_T - y_T;
1311 S6 abs_diff = ABS_EXPR <diff>;
1312 [S7 abs_diff = (TYPE2) abs_diff; #optional]
1313 S8 sum_1 = abs_diff + sum_0;
1315 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
1316 same size of 'TYPE1' or bigger. This is a special case of a reduction
1321 * STMT_VINFO: The stmt from which the pattern search begins. In the
1322 example, when this function is called with S8, the pattern
1323 {S3,S4,S5,S6,S7,S8} will be detected.
1327 * TYPE_OUT: The type of the output of this pattern.
1329 * Return value: A new stmt that will be used to replace the sequence of
1330 stmts that constitute the pattern. In this case it will be:
1331 SAD_EXPR <x_t, y_t, sum_0>
1335 vect_recog_sad_pattern (vec_info
*vinfo
,
1336 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1338 gimple
*last_stmt
= stmt_vinfo
->stmt
;
1341 /* Look for the following pattern
1345 DAD = ABS_EXPR <DDIFF>;
1346 DDPROD = (TYPE2) DPROD;
1347 sum_1 = DAD + sum_0;
1349 - DX is at least double the size of X
1350 - DY is at least double the size of Y
1351 - DX, DY, DDIFF, DAD all have the same type
1352 - sum is the same size of DAD or bigger
1353 - sum has been recognized as a reduction variable.
1355 This is equivalent to:
1356 DDIFF = X w- Y; #widen sub
1357 DAD = ABS_EXPR <DDIFF>;
1358 sum_1 = DAD w+ sum_0; #widen summation
1360 DDIFF = X w- Y; #widen sub
1361 DAD = ABS_EXPR <DDIFF>;
1362 sum_1 = DAD + sum_0; #summation
1365 /* Starting from LAST_STMT, follow the defs of its uses in search
1366 of the above pattern. */
1368 tree plus_oprnd0
, plus_oprnd1
;
1369 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
1370 &plus_oprnd0
, &plus_oprnd1
))
1373 tree sum_type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1375 /* Any non-truncating sequence of conversions is OK here, since
1376 with a successful match, the result of the ABS(U) is known to fit
1377 within the nonnegative range of the result type. (It cannot be the
1378 negative of the minimum signed value due to the range of the widening
1380 vect_unpromoted_value unprom_abs
;
1381 plus_oprnd0
= vect_look_through_possible_promotion (vinfo
, plus_oprnd0
,
1384 /* So far so good. Since last_stmt was detected as a (summation) reduction,
1385 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
1386 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
1387 Then check that plus_oprnd0 is defined by an abs_expr. */
1392 stmt_vec_info abs_stmt_vinfo
= vect_get_internal_def (vinfo
, plus_oprnd0
);
1393 if (!abs_stmt_vinfo
)
1396 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
1397 inside the loop (in case we are analyzing an outer-loop). */
1398 gassign
*abs_stmt
= dyn_cast
<gassign
*> (abs_stmt_vinfo
->stmt
);
1399 vect_unpromoted_value unprom
[2];
1403 gcall
*abd_stmt
= dyn_cast
<gcall
*> (abs_stmt_vinfo
->stmt
);
1405 || !gimple_call_internal_p (abd_stmt
)
1406 || gimple_call_num_args (abd_stmt
) != 2)
1409 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1410 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1412 if (gimple_call_internal_fn (abd_stmt
) == IFN_ABD
)
1414 if (!vect_look_through_possible_promotion (vinfo
, abd_oprnd0
,
1416 || !vect_look_through_possible_promotion (vinfo
, abd_oprnd1
,
1420 else if (gimple_call_internal_fn (abd_stmt
) == IFN_VEC_WIDEN_ABD
)
1422 unprom
[0].op
= abd_oprnd0
;
1423 unprom
[0].type
= TREE_TYPE (abd_oprnd0
);
1424 unprom
[1].op
= abd_oprnd1
;
1425 unprom
[1].type
= TREE_TYPE (abd_oprnd1
);
1430 half_type
= unprom
[0].type
;
1432 else if (!vect_recog_absolute_difference (vinfo
, abs_stmt
, &half_type
,
1436 vect_pattern_detected ("vect_recog_sad_pattern", last_stmt
);
1439 if (!vect_supportable_direct_optab_p (vinfo
, sum_type
, SAD_EXPR
, half_type
,
1440 type_out
, &half_vectype
))
1443 /* Get the inputs to the SAD_EXPR in the appropriate types. */
1445 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, sad_oprnd
, half_type
,
1446 unprom
, half_vectype
);
1448 tree var
= vect_recog_temp_ssa_var (sum_type
, NULL
);
1449 gimple
*pattern_stmt
= gimple_build_assign (var
, SAD_EXPR
, sad_oprnd
[0],
1450 sad_oprnd
[1], plus_oprnd1
);
1452 return pattern_stmt
;
1455 /* Function vect_recog_abd_pattern
1457 Try to find the following ABsolute Difference (ABD) or
1458 widening ABD (WIDEN_ABD) pattern:
1462 TYPE3 x_cast = (TYPE3) x; // widening or no-op
1463 TYPE3 y_cast = (TYPE3) y; // widening or no-op
1464 TYPE3 diff = x_cast - y_cast;
1465 TYPE4 diff_cast = (TYPE4) diff; // widening or no-op
1466 TYPE5 abs = ABS(U)_EXPR <diff_cast>;
1468 WIDEN_ABD exists to optimize the case where TYPE4 is at least
1469 twice as wide as TYPE3.
1473 * STMT_VINFO: The stmt from which the pattern search begins
1477 * TYPE_OUT: The type of the output of this pattern
1479 * Return value: A new stmt that will be used to replace the sequence of
1480 stmts that constitute the pattern, principally:
1481 out = IFN_ABD (x, y)
1482 out = IFN_WIDEN_ABD (x, y)
1486 vect_recog_abd_pattern (vec_info
*vinfo
,
1487 stmt_vec_info stmt_vinfo
, tree
*type_out
)
1489 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1493 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1495 vect_unpromoted_value unprom
[2];
1496 gassign
*diff_stmt
= NULL
;
1498 if (!vect_recog_absolute_difference (vinfo
, last_stmt
, &abd_in_type
,
1499 unprom
, &diff_stmt
))
1501 /* We cannot try further without having a non-widening MINUS. */
1505 unprom
[0].op
= gimple_assign_rhs1 (diff_stmt
);
1506 unprom
[1].op
= gimple_assign_rhs2 (diff_stmt
);
1507 abd_in_type
= signed_type_for (out_type
);
1510 tree abd_out_type
= abd_in_type
;
1512 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, abd_in_type
);
1516 internal_fn ifn
= IFN_ABD
;
1517 tree vectype_out
= vectype_in
;
1519 if (TYPE_PRECISION (out_type
) >= TYPE_PRECISION (abd_in_type
) * 2
1520 && stmt_vinfo
->min_output_precision
>= TYPE_PRECISION (abd_in_type
) * 2)
1523 = build_nonstandard_integer_type (TYPE_PRECISION (abd_in_type
) * 2,
1524 TYPE_UNSIGNED (abd_in_type
));
1525 tree mid_vectype
= get_vectype_for_scalar_type (vinfo
, mid_type
);
1527 code_helper dummy_code
;
1529 auto_vec
<tree
> dummy_vec
;
1531 && supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
,
1532 stmt_vinfo
, mid_vectype
,
1534 &dummy_code
, &dummy_code
,
1535 &dummy_int
, &dummy_vec
))
1537 ifn
= IFN_VEC_WIDEN_ABD
;
1538 abd_out_type
= mid_type
;
1539 vectype_out
= mid_vectype
;
1544 && !direct_internal_fn_supported_p (ifn
, vectype_in
,
1545 OPTIMIZE_FOR_SPEED
))
1548 vect_pattern_detected ("vect_recog_abd_pattern", last_stmt
);
1551 vect_convert_inputs (vinfo
, stmt_vinfo
, 2, abd_oprnds
,
1552 abd_in_type
, unprom
, vectype_in
);
1554 *type_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1556 tree abd_result
= vect_recog_temp_ssa_var (abd_out_type
, NULL
);
1557 gcall
*abd_stmt
= gimple_build_call_internal (ifn
, 2,
1558 abd_oprnds
[0], abd_oprnds
[1]);
1559 gimple_call_set_lhs (abd_stmt
, abd_result
);
1560 gimple_set_location (abd_stmt
, gimple_location (last_stmt
));
1562 gimple
*stmt
= abd_stmt
;
1563 if (TYPE_PRECISION (abd_in_type
) == TYPE_PRECISION (abd_out_type
)
1564 && TYPE_PRECISION (abd_out_type
) < TYPE_PRECISION (out_type
)
1565 && !TYPE_UNSIGNED (abd_out_type
))
1567 tree unsign
= unsigned_type_for (abd_out_type
);
1568 stmt
= vect_convert_output (vinfo
, stmt_vinfo
, unsign
, stmt
, vectype_out
);
1569 vectype_out
= get_vectype_for_scalar_type (vinfo
, unsign
);
1572 return vect_convert_output (vinfo
, stmt_vinfo
, out_type
, stmt
, vectype_out
);
1575 /* Recognize an operation that performs ORIG_CODE on widened inputs,
1576 so that it can be treated as though it had the form:
1580 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1581 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1582 | RES_TYPE a_extend = (RES_TYPE) a_cast; // promotion from HALF_TYPE
1583 | RES_TYPE b_extend = (RES_TYPE) b_cast; // promotion from HALF_TYPE
1584 | RES_TYPE res = a_extend ORIG_CODE b_extend;
1586 Try to replace the pattern with:
1590 HALF_TYPE a_cast = (HALF_TYPE) a; // possible no-op
1591 HALF_TYPE b_cast = (HALF_TYPE) b; // possible no-op
1592 | EXT_TYPE ext = a_cast WIDE_CODE b_cast;
1593 | RES_TYPE res = (EXT_TYPE) ext; // possible no-op
1595 where EXT_TYPE is wider than HALF_TYPE but has the same signedness.
1597 SHIFT_P is true if ORIG_CODE and WIDE_CODE are shifts. NAME is the
1598 name of the pattern being matched, for dump purposes. */
1601 vect_recog_widen_op_pattern (vec_info
*vinfo
,
1602 stmt_vec_info last_stmt_info
, tree
*type_out
,
1603 tree_code orig_code
, code_helper wide_code
,
1604 bool shift_p
, const char *name
)
1606 gimple
*last_stmt
= last_stmt_info
->stmt
;
1608 vect_unpromoted_value unprom
[2];
1610 if (!vect_widened_op_tree (vinfo
, last_stmt_info
, orig_code
, orig_code
,
1611 shift_p
, 2, unprom
, &half_type
))
1615 /* Pattern detected. */
1616 vect_pattern_detected (name
, last_stmt
);
1618 tree type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
1620 if (TYPE_PRECISION (type
) != TYPE_PRECISION (half_type
) * 2
1621 || TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type
))
1622 itype
= build_nonstandard_integer_type (TYPE_PRECISION (half_type
) * 2,
1623 TYPE_UNSIGNED (half_type
));
1625 /* Check target support */
1626 tree vectype
= get_vectype_for_scalar_type (vinfo
, half_type
);
1627 tree vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
1629 tree vecctype
= vecitype
;
1630 if (orig_code
== MINUS_EXPR
1631 && TYPE_UNSIGNED (itype
)
1632 && TYPE_PRECISION (type
) > TYPE_PRECISION (itype
))
1634 /* Subtraction is special, even if half_type is unsigned and no matter
1635 whether type is signed or unsigned, if type is wider than itype,
1636 we need to sign-extend from the widening operation result to the
1638 Consider half_type unsigned char, operand 1 0xfe, operand 2 0xff,
1639 itype unsigned short and type either int or unsigned int.
1640 Widened (unsigned short) 0xfe - (unsigned short) 0xff is
1641 (unsigned short) 0xffff, but for type int we want the result -1
1642 and for type unsigned int 0xffffffff rather than 0xffff. */
1643 ctype
= build_nonstandard_integer_type (TYPE_PRECISION (itype
), 0);
1644 vecctype
= get_vectype_for_scalar_type (vinfo
, ctype
);
1647 code_helper dummy_code
;
1649 auto_vec
<tree
> dummy_vec
;
1653 || !supportable_widening_operation (vinfo
, wide_code
, last_stmt_info
,
1655 &dummy_code
, &dummy_code
,
1656 &dummy_int
, &dummy_vec
))
1659 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
1664 vect_convert_inputs (vinfo
, last_stmt_info
,
1665 2, oprnd
, half_type
, unprom
, vectype
);
1667 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1668 gimple
*pattern_stmt
= vect_gimple_build (var
, wide_code
, oprnd
[0], oprnd
[1]);
1670 if (vecctype
!= vecitype
)
1671 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, ctype
,
1672 pattern_stmt
, vecitype
);
1674 return vect_convert_output (vinfo
, last_stmt_info
,
1675 type
, pattern_stmt
, vecctype
);
1678 /* Try to detect multiplication on widened inputs, converting MULT_EXPR
1679 to WIDEN_MULT_EXPR. See vect_recog_widen_op_pattern for details. */
1682 vect_recog_widen_mult_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1685 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1686 MULT_EXPR
, WIDEN_MULT_EXPR
, false,
1687 "vect_recog_widen_mult_pattern");
1690 /* Try to detect addition on widened inputs, converting PLUS_EXPR
1691 to IFN_VEC_WIDEN_PLUS. See vect_recog_widen_op_pattern for details. */
1694 vect_recog_widen_plus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1697 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1698 PLUS_EXPR
, IFN_VEC_WIDEN_PLUS
,
1699 false, "vect_recog_widen_plus_pattern");
1702 /* Try to detect subtraction on widened inputs, converting MINUS_EXPR
1703 to IFN_VEC_WIDEN_MINUS. See vect_recog_widen_op_pattern for details. */
1705 vect_recog_widen_minus_pattern (vec_info
*vinfo
, stmt_vec_info last_stmt_info
,
1708 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
1709 MINUS_EXPR
, IFN_VEC_WIDEN_MINUS
,
1710 false, "vect_recog_widen_minus_pattern");
1713 /* Try to detect abd on widened inputs, converting IFN_ABD
1714 to IFN_VEC_WIDEN_ABD. */
1716 vect_recog_widen_abd_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1719 gassign
*last_stmt
= dyn_cast
<gassign
*> (STMT_VINFO_STMT (stmt_vinfo
));
1720 if (!last_stmt
|| !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (last_stmt
)))
1723 tree last_rhs
= gimple_assign_rhs1 (last_stmt
);
1725 tree in_type
= TREE_TYPE (last_rhs
);
1726 tree out_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
1727 if (!INTEGRAL_TYPE_P (in_type
)
1728 || !INTEGRAL_TYPE_P (out_type
)
1729 || TYPE_PRECISION (in_type
) * 2 != TYPE_PRECISION (out_type
)
1730 || !TYPE_UNSIGNED (in_type
))
1733 vect_unpromoted_value unprom
;
1734 tree op
= vect_look_through_possible_promotion (vinfo
, last_rhs
, &unprom
);
1735 if (!op
|| TYPE_PRECISION (TREE_TYPE (op
)) != TYPE_PRECISION (in_type
))
1738 stmt_vec_info abd_pattern_vinfo
= vect_get_internal_def (vinfo
, op
);
1739 if (!abd_pattern_vinfo
)
1742 abd_pattern_vinfo
= vect_stmt_to_vectorize (abd_pattern_vinfo
);
1743 gcall
*abd_stmt
= dyn_cast
<gcall
*> (STMT_VINFO_STMT (abd_pattern_vinfo
));
1745 || !gimple_call_internal_p (abd_stmt
)
1746 || gimple_call_internal_fn (abd_stmt
) != IFN_ABD
)
1749 tree vectype_in
= get_vectype_for_scalar_type (vinfo
, in_type
);
1750 tree vectype_out
= get_vectype_for_scalar_type (vinfo
, out_type
);
1752 code_helper dummy_code
;
1754 auto_vec
<tree
> dummy_vec
;
1755 if (!supportable_widening_operation (vinfo
, IFN_VEC_WIDEN_ABD
, stmt_vinfo
,
1756 vectype_out
, vectype_in
,
1757 &dummy_code
, &dummy_code
,
1758 &dummy_int
, &dummy_vec
))
1761 vect_pattern_detected ("vect_recog_widen_abd_pattern", last_stmt
);
1763 *type_out
= vectype_out
;
1765 tree abd_oprnd0
= gimple_call_arg (abd_stmt
, 0);
1766 tree abd_oprnd1
= gimple_call_arg (abd_stmt
, 1);
1767 tree widen_abd_result
= vect_recog_temp_ssa_var (out_type
, NULL
);
1768 gcall
*widen_abd_stmt
= gimple_build_call_internal (IFN_VEC_WIDEN_ABD
, 2,
1769 abd_oprnd0
, abd_oprnd1
);
1770 gimple_call_set_lhs (widen_abd_stmt
, widen_abd_result
);
1771 gimple_set_location (widen_abd_stmt
, gimple_location (last_stmt
));
1772 return widen_abd_stmt
;
1775 /* Function vect_recog_ctz_ffs_pattern
1777 Try to find the following pattern:
1782 B = __builtin_ctz{,l,ll} (A);
1786 B = __builtin_ffs{,l,ll} (A);
1790 * STMT_VINFO: The stmt from which the pattern search begins.
1791 here it starts with B = __builtin_* (A);
1795 * TYPE_OUT: The vector type of the output of this pattern.
1797 * Return value: A new stmt that will be used to replace the sequence of
1798 stmts that constitute the pattern, using clz or popcount builtins. */
1801 vect_recog_ctz_ffs_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
1804 gimple
*call_stmt
= stmt_vinfo
->stmt
;
1805 gimple
*pattern_stmt
;
1806 tree rhs_oprnd
, rhs_type
, lhs_oprnd
, lhs_type
, vec_type
, vec_rhs_type
;
1808 internal_fn ifn
= IFN_LAST
, ifnnew
= IFN_LAST
;
1809 bool defined_at_zero
= true, defined_at_zero_new
= false;
1810 int val
= 0, val_new
= 0, val_cmp
= 0;
1812 int sub
= 0, add
= 0;
1815 if (!is_gimple_call (call_stmt
))
1818 if (gimple_call_num_args (call_stmt
) != 1
1819 && gimple_call_num_args (call_stmt
) != 2)
1822 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
1823 rhs_type
= TREE_TYPE (rhs_oprnd
);
1824 lhs_oprnd
= gimple_call_lhs (call_stmt
);
1827 lhs_type
= TREE_TYPE (lhs_oprnd
);
1828 if (!INTEGRAL_TYPE_P (lhs_type
)
1829 || !INTEGRAL_TYPE_P (rhs_type
)
1830 || !type_has_mode_precision_p (rhs_type
)
1831 || TREE_CODE (rhs_oprnd
) != SSA_NAME
)
1834 switch (gimple_call_combined_fn (call_stmt
))
1838 if (!gimple_call_internal_p (call_stmt
)
1839 || gimple_call_num_args (call_stmt
) != 2)
1840 defined_at_zero
= false;
1842 val
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
1851 prec
= TYPE_PRECISION (rhs_type
);
1852 loc
= gimple_location (call_stmt
);
1854 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
1858 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1862 /* Do it only if the backend doesn't have ctz<vector_mode>2 or
1863 ffs<vector_mode>2 pattern but does have clz<vector_mode>2 or
1864 popcount<vector_mode>2. */
1866 || direct_internal_fn_supported_p (ifn
, vec_rhs_type
,
1867 OPTIMIZE_FOR_SPEED
))
1871 && direct_internal_fn_supported_p (IFN_CTZ
, vec_rhs_type
,
1872 OPTIMIZE_FOR_SPEED
))
1876 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1879 else if (direct_internal_fn_supported_p (IFN_CLZ
, vec_rhs_type
,
1880 OPTIMIZE_FOR_SPEED
))
1884 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (rhs_type
),
1887 if ((ifnnew
== IFN_LAST
1888 || (defined_at_zero
&& !defined_at_zero_new
))
1889 && direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_rhs_type
,
1890 OPTIMIZE_FOR_SPEED
))
1892 ifnnew
= IFN_POPCOUNT
;
1893 defined_at_zero_new
= true;
1896 if (ifnnew
== IFN_LAST
)
1899 vect_pattern_detected ("vec_recog_ctz_ffs_pattern", call_stmt
);
1902 if ((ifnnew
== IFN_CLZ
1904 && defined_at_zero_new
1907 || (ifnnew
== IFN_POPCOUNT
&& ifn
== IFN_CTZ
))
1909 /* .CTZ (X) = PREC - .CLZ ((X - 1) & ~X)
1910 .CTZ (X) = .POPCOUNT ((X - 1) & ~X). */
1911 if (ifnnew
== IFN_CLZ
)
1915 if (!TYPE_UNSIGNED (rhs_type
))
1917 rhs_type
= unsigned_type_for (rhs_type
);
1918 vec_rhs_type
= get_vectype_for_scalar_type (vinfo
, rhs_type
);
1919 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1920 pattern_stmt
= gimple_build_assign (new_var
, NOP_EXPR
, rhs_oprnd
);
1921 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
,
1923 rhs_oprnd
= new_var
;
1926 tree m1
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1927 pattern_stmt
= gimple_build_assign (m1
, PLUS_EXPR
, rhs_oprnd
,
1928 build_int_cst (rhs_type
, -1));
1929 gimple_set_location (pattern_stmt
, loc
);
1930 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1932 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1933 pattern_stmt
= gimple_build_assign (new_var
, BIT_NOT_EXPR
, rhs_oprnd
);
1934 gimple_set_location (pattern_stmt
, loc
);
1935 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1936 rhs_oprnd
= new_var
;
1938 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1939 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
1941 gimple_set_location (pattern_stmt
, loc
);
1942 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1943 rhs_oprnd
= new_var
;
1945 else if (ifnnew
== IFN_CLZ
)
1947 /* .CTZ (X) = (PREC - 1) - .CLZ (X & -X)
1948 .FFS (X) = PREC - .CLZ (X & -X). */
1949 sub
= prec
- (ifn
== IFN_CTZ
);
1950 val_cmp
= sub
- val_new
;
1952 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1953 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
1954 gimple_set_location (pattern_stmt
, loc
);
1955 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1957 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1958 pattern_stmt
= gimple_build_assign (new_var
, BIT_AND_EXPR
,
1960 gimple_set_location (pattern_stmt
, loc
);
1961 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1962 rhs_oprnd
= new_var
;
1964 else if (ifnnew
== IFN_POPCOUNT
)
1966 /* .CTZ (X) = PREC - .POPCOUNT (X | -X)
1967 .FFS (X) = (PREC + 1) - .POPCOUNT (X | -X). */
1968 sub
= prec
+ (ifn
== IFN_FFS
);
1971 tree neg
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1972 pattern_stmt
= gimple_build_assign (neg
, NEGATE_EXPR
, rhs_oprnd
);
1973 gimple_set_location (pattern_stmt
, loc
);
1974 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1976 new_var
= vect_recog_temp_ssa_var (rhs_type
, NULL
);
1977 pattern_stmt
= gimple_build_assign (new_var
, BIT_IOR_EXPR
,
1979 gimple_set_location (pattern_stmt
, loc
);
1980 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_rhs_type
);
1981 rhs_oprnd
= new_var
;
1983 else if (ifnnew
== IFN_CTZ
)
1985 /* .FFS (X) = .CTZ (X) + 1. */
1990 /* Create B = .IFNNEW (A). */
1991 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
1992 if ((ifnnew
== IFN_CLZ
|| ifnnew
== IFN_CTZ
) && defined_at_zero_new
)
1994 = gimple_build_call_internal (ifnnew
, 2, rhs_oprnd
,
1995 build_int_cst (integer_type_node
,
1998 pattern_stmt
= gimple_build_call_internal (ifnnew
, 1, rhs_oprnd
);
1999 gimple_call_set_lhs (pattern_stmt
, new_var
);
2000 gimple_set_location (pattern_stmt
, loc
);
2001 *type_out
= vec_type
;
2005 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2006 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2007 pattern_stmt
= gimple_build_assign (ret_var
, MINUS_EXPR
,
2008 build_int_cst (lhs_type
, sub
),
2010 gimple_set_location (pattern_stmt
, loc
);
2015 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2016 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2017 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2018 build_int_cst (lhs_type
, add
));
2019 gimple_set_location (pattern_stmt
, loc
);
2024 && (!defined_at_zero_new
|| val
!= val_cmp
))
2026 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2027 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2028 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2029 rhs_type
= TREE_TYPE (rhs_oprnd
);
2030 tree cmp
= build2_loc (loc
, NE_EXPR
, boolean_type_node
,
2031 rhs_oprnd
, build_zero_cst (rhs_type
));
2032 pattern_stmt
= gimple_build_assign (ret_var
, COND_EXPR
, cmp
,
2034 build_int_cst (lhs_type
, val
));
2037 if (dump_enabled_p ())
2038 dump_printf_loc (MSG_NOTE
, vect_location
,
2039 "created pattern stmt: %G", pattern_stmt
);
2041 return pattern_stmt
;
2044 /* Function vect_recog_popcount_clz_ctz_ffs_pattern
2046 Try to find the following pattern:
2052 temp_in = (UTYPE2)A;
2054 temp_out = __builtin_popcount{,l,ll} (temp_in);
2055 B = (TYPE1) temp_out;
2057 TYPE2 may or may not be equal to TYPE3.
2058 i.e. TYPE2 is equal to TYPE3 for __builtin_popcount
2059 i.e. TYPE2 is not equal to TYPE3 for __builtin_popcountll
2063 * STMT_VINFO: The stmt from which the pattern search begins.
2064 here it starts with B = (TYPE1) temp_out;
2068 * TYPE_OUT: The vector type of the output of this pattern.
2070 * Return value: A new stmt that will be used to replace the sequence of
2071 stmts that constitute the pattern. In this case it will be:
2074 Similarly for clz, ctz and ffs.
2078 vect_recog_popcount_clz_ctz_ffs_pattern (vec_info
*vinfo
,
2079 stmt_vec_info stmt_vinfo
,
2082 gassign
*last_stmt
= dyn_cast
<gassign
*> (stmt_vinfo
->stmt
);
2083 gimple
*call_stmt
, *pattern_stmt
;
2084 tree rhs_oprnd
, rhs_origin
, lhs_oprnd
, lhs_type
, vec_type
, new_var
;
2085 internal_fn ifn
= IFN_LAST
;
2088 /* Find B = (TYPE1) temp_out. */
2091 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2092 if (!CONVERT_EXPR_CODE_P (code
))
2095 lhs_oprnd
= gimple_assign_lhs (last_stmt
);
2096 lhs_type
= TREE_TYPE (lhs_oprnd
);
2097 if (!INTEGRAL_TYPE_P (lhs_type
))
2100 rhs_oprnd
= gimple_assign_rhs1 (last_stmt
);
2101 if (TREE_CODE (rhs_oprnd
) != SSA_NAME
2102 || !has_single_use (rhs_oprnd
))
2104 call_stmt
= SSA_NAME_DEF_STMT (rhs_oprnd
);
2106 /* Find temp_out = __builtin_popcount{,l,ll} (temp_in); */
2107 if (!is_gimple_call (call_stmt
))
2109 switch (gimple_call_combined_fn (call_stmt
))
2117 /* Punt if call result is unsigned and defined value at zero
2118 is negative, as the negative value doesn't extend correctly. */
2119 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2120 && gimple_call_internal_p (call_stmt
)
2121 && CLZ_DEFINED_VALUE_AT_ZERO
2122 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2128 /* Punt if call result is unsigned and defined value at zero
2129 is negative, as the negative value doesn't extend correctly. */
2130 if (TYPE_UNSIGNED (TREE_TYPE (rhs_oprnd
))
2131 && gimple_call_internal_p (call_stmt
)
2132 && CTZ_DEFINED_VALUE_AT_ZERO
2133 (SCALAR_INT_TYPE_MODE (TREE_TYPE (rhs_oprnd
)), val
) == 2
2144 if (gimple_call_num_args (call_stmt
) != 1
2145 && gimple_call_num_args (call_stmt
) != 2)
2148 rhs_oprnd
= gimple_call_arg (call_stmt
, 0);
2149 vect_unpromoted_value unprom_diff
;
2151 = vect_look_through_possible_promotion (vinfo
, rhs_oprnd
, &unprom_diff
);
2156 /* Input and output of .POPCOUNT should be same-precision integer. */
2157 if (TYPE_PRECISION (unprom_diff
.type
) != TYPE_PRECISION (lhs_type
))
2160 /* Also A should be unsigned or same precision as temp_in, otherwise
2161 different builtins/internal functions have different behaviors. */
2162 if (TYPE_PRECISION (unprom_diff
.type
)
2163 != TYPE_PRECISION (TREE_TYPE (rhs_oprnd
)))
2167 /* For popcount require zero extension, which doesn't add any
2168 further bits to the count. */
2169 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2173 /* clzll (x) == clz (x) + 32 for unsigned x != 0, so ok
2174 if it is undefined at zero or if it matches also for the
2175 defined value there. */
2176 if (!TYPE_UNSIGNED (unprom_diff
.type
))
2178 if (!type_has_mode_precision_p (lhs_type
)
2179 || !type_has_mode_precision_p (TREE_TYPE (rhs_oprnd
)))
2181 addend
= (TYPE_PRECISION (TREE_TYPE (rhs_oprnd
))
2182 - TYPE_PRECISION (lhs_type
));
2183 if (gimple_call_internal_p (call_stmt
)
2184 && gimple_call_num_args (call_stmt
) == 2)
2187 val1
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
2189 = CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2191 if (d2
!= 2 || val1
!= val2
+ addend
)
2196 /* ctzll (x) == ctz (x) for unsigned or signed x != 0, so ok
2197 if it is undefined at zero or if it matches also for the
2198 defined value there. */
2199 if (gimple_call_internal_p (call_stmt
)
2200 && gimple_call_num_args (call_stmt
) == 2)
2203 val1
= tree_to_shwi (gimple_call_arg (call_stmt
, 1));
2205 = CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2207 if (d2
!= 2 || val1
!= val2
)
2212 /* ffsll (x) == ffs (x) for unsigned or signed x. */
2218 vec_type
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
2219 /* Do it only if the backend has popcount<vector_mode>2 etc. pattern. */
2224 = direct_internal_fn_supported_p (ifn
, vec_type
, OPTIMIZE_FOR_SPEED
);
2232 /* vect_recog_ctz_ffs_pattern can implement ffs using ctz. */
2233 if (direct_internal_fn_supported_p (IFN_CTZ
, vec_type
,
2234 OPTIMIZE_FOR_SPEED
))
2238 /* vect_recog_ctz_ffs_pattern can implement ffs or ctz using
2240 if (direct_internal_fn_supported_p (IFN_CLZ
, vec_type
,
2241 OPTIMIZE_FOR_SPEED
))
2243 if (direct_internal_fn_supported_p (IFN_POPCOUNT
, vec_type
,
2244 OPTIMIZE_FOR_SPEED
))
2251 vect_pattern_detected ("vec_recog_popcount_clz_ctz_ffs_pattern",
2254 /* Create B = .POPCOUNT (A). */
2255 new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2256 tree arg2
= NULL_TREE
;
2259 && CLZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2261 arg2
= build_int_cst (integer_type_node
, val
);
2262 else if (ifn
== IFN_CTZ
2263 && CTZ_DEFINED_VALUE_AT_ZERO (SCALAR_INT_TYPE_MODE (lhs_type
),
2265 arg2
= build_int_cst (integer_type_node
, val
);
2267 pattern_stmt
= gimple_build_call_internal (ifn
, 2, unprom_diff
.op
, arg2
);
2269 pattern_stmt
= gimple_build_call_internal (ifn
, 1, unprom_diff
.op
);
2270 gimple_call_set_lhs (pattern_stmt
, new_var
);
2271 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
2272 *type_out
= vec_type
;
2274 if (dump_enabled_p ())
2275 dump_printf_loc (MSG_NOTE
, vect_location
,
2276 "created pattern stmt: %G", pattern_stmt
);
2280 gcc_assert (supported
);
2281 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vec_type
);
2282 tree ret_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
2283 pattern_stmt
= gimple_build_assign (ret_var
, PLUS_EXPR
, new_var
,
2284 build_int_cst (lhs_type
, addend
));
2286 else if (!supported
)
2288 stmt_vec_info new_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
2289 STMT_VINFO_VECTYPE (new_stmt_info
) = vec_type
;
2291 = vect_recog_ctz_ffs_pattern (vinfo
, new_stmt_info
, type_out
);
2292 if (pattern_stmt
== NULL
)
2294 if (gimple_seq seq
= STMT_VINFO_PATTERN_DEF_SEQ (new_stmt_info
))
2296 gimple_seq
*pseq
= &STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
);
2297 gimple_seq_add_seq_without_update (pseq
, seq
);
2300 return pattern_stmt
;
2303 /* Function vect_recog_pow_pattern
2305 Try to find the following pattern:
2309 with POW being one of pow, powf, powi, powif and N being
2314 * STMT_VINFO: The stmt from which the pattern search begins.
2318 * TYPE_OUT: The type of the output of this pattern.
2320 * Return value: A new stmt that will be used to replace the sequence of
2321 stmts that constitute the pattern. In this case it will be:
2328 vect_recog_pow_pattern (vec_info
*vinfo
,
2329 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2331 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2336 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
2339 switch (gimple_call_combined_fn (last_stmt
))
2349 base
= gimple_call_arg (last_stmt
, 0);
2350 exp
= gimple_call_arg (last_stmt
, 1);
2351 if (TREE_CODE (exp
) != REAL_CST
2352 && TREE_CODE (exp
) != INTEGER_CST
)
2354 if (flag_unsafe_math_optimizations
2355 && TREE_CODE (base
) == REAL_CST
2356 && gimple_call_builtin_p (last_stmt
, BUILT_IN_NORMAL
))
2358 combined_fn log_cfn
;
2359 built_in_function exp_bfn
;
2360 switch (DECL_FUNCTION_CODE (gimple_call_fndecl (last_stmt
)))
2363 log_cfn
= CFN_BUILT_IN_LOG
;
2364 exp_bfn
= BUILT_IN_EXP
;
2367 log_cfn
= CFN_BUILT_IN_LOGF
;
2368 exp_bfn
= BUILT_IN_EXPF
;
2371 log_cfn
= CFN_BUILT_IN_LOGL
;
2372 exp_bfn
= BUILT_IN_EXPL
;
2377 tree logc
= fold_const_call (log_cfn
, TREE_TYPE (base
), base
);
2378 tree exp_decl
= builtin_decl_implicit (exp_bfn
);
2379 /* Optimize pow (C, x) as exp (log (C) * x). Normally match.pd
2380 does that, but if C is a power of 2, we want to use
2381 exp2 (log2 (C) * x) in the non-vectorized version, but for
2382 vectorization we don't have vectorized exp2. */
2384 && TREE_CODE (logc
) == REAL_CST
2386 && lookup_attribute ("omp declare simd",
2387 DECL_ATTRIBUTES (exp_decl
)))
2389 cgraph_node
*node
= cgraph_node::get_create (exp_decl
);
2390 if (node
->simd_clones
== NULL
)
2392 if (targetm
.simd_clone
.compute_vecsize_and_simdlen
== NULL
2393 || node
->definition
)
2395 expand_simd_clones (node
);
2396 if (node
->simd_clones
== NULL
)
2399 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2402 tree def
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2403 gimple
*g
= gimple_build_assign (def
, MULT_EXPR
, exp
, logc
);
2404 append_pattern_def_seq (vinfo
, stmt_vinfo
, g
);
2405 tree res
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2406 g
= gimple_build_call (exp_decl
, 1, def
);
2407 gimple_call_set_lhs (g
, res
);
2415 /* We now have a pow or powi builtin function call with a constant
2418 /* Catch squaring. */
2419 if ((tree_fits_shwi_p (exp
)
2420 && tree_to_shwi (exp
) == 2)
2421 || (TREE_CODE (exp
) == REAL_CST
2422 && real_equal (&TREE_REAL_CST (exp
), &dconst2
)))
2424 if (!vect_supportable_direct_optab_p (vinfo
, TREE_TYPE (base
), MULT_EXPR
,
2425 TREE_TYPE (base
), type_out
))
2428 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
2429 stmt
= gimple_build_assign (var
, MULT_EXPR
, base
, base
);
2433 /* Catch square root. */
2434 if (TREE_CODE (exp
) == REAL_CST
2435 && real_equal (&TREE_REAL_CST (exp
), &dconsthalf
))
2437 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (base
));
2439 && direct_internal_fn_supported_p (IFN_SQRT
, *type_out
,
2440 OPTIMIZE_FOR_SPEED
))
2442 gcall
*stmt
= gimple_build_call_internal (IFN_SQRT
, 1, base
);
2443 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
2444 gimple_call_set_lhs (stmt
, var
);
2445 gimple_call_set_nothrow (stmt
, true);
2454 /* Function vect_recog_widen_sum_pattern
2456 Try to find the following pattern:
2459 TYPE x_T, sum = init;
2461 sum_0 = phi <init, sum_1>
2463 S2 x_T = (TYPE) x_t;
2464 S3 sum_1 = x_T + sum_0;
2466 where type 'TYPE' is at least double the size of type 'type', i.e - we're
2467 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
2468 a special case of a reduction computation.
2472 * STMT_VINFO: The stmt from which the pattern search begins. In the example,
2473 when this function is called with S3, the pattern {S2,S3} will be detected.
2477 * TYPE_OUT: The type of the output of this pattern.
2479 * Return value: A new stmt that will be used to replace the sequence of
2480 stmts that constitute the pattern. In this case it will be:
2481 WIDEN_SUM <x_t, sum_0>
2483 Note: The widening-sum idiom is a widening reduction pattern that is
2484 vectorized without preserving all the intermediate results. It
2485 produces only N/2 (widened) results (by summing up pairs of
2486 intermediate results) rather than all N results. Therefore, we
2487 cannot allow this pattern when we want to get all the results and in
2488 the correct order (as is the case when this computation is in an
2489 inner-loop nested in an outer-loop that us being vectorized). */
2492 vect_recog_widen_sum_pattern (vec_info
*vinfo
,
2493 stmt_vec_info stmt_vinfo
, tree
*type_out
)
2495 gimple
*last_stmt
= stmt_vinfo
->stmt
;
2496 tree oprnd0
, oprnd1
;
2498 gimple
*pattern_stmt
;
2501 /* Look for the following pattern
2504 In which DX is at least double the size of X, and sum_1 has been
2505 recognized as a reduction variable.
2508 /* Starting from LAST_STMT, follow the defs of its uses in search
2509 of the above pattern. */
2511 if (!vect_reassociating_reduction_p (vinfo
, stmt_vinfo
, PLUS_EXPR
,
2513 || TREE_CODE (oprnd0
) != SSA_NAME
2514 || !vinfo
->lookup_def (oprnd0
))
2517 type
= TREE_TYPE (gimple_get_lhs (last_stmt
));
2519 /* So far so good. Since last_stmt was detected as a (summation) reduction,
2520 we know that oprnd1 is the reduction variable (defined by a loop-header
2521 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
2522 Left to check that oprnd0 is defined by a cast from type 'type' to type
2525 vect_unpromoted_value unprom0
;
2526 if (!vect_look_through_possible_promotion (vinfo
, oprnd0
, &unprom0
)
2527 || TYPE_PRECISION (unprom0
.type
) * 2 > TYPE_PRECISION (type
))
2530 vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt
);
2532 if (!vect_supportable_direct_optab_p (vinfo
, type
, WIDEN_SUM_EXPR
,
2533 unprom0
.type
, type_out
))
2536 var
= vect_recog_temp_ssa_var (type
, NULL
);
2537 pattern_stmt
= gimple_build_assign (var
, WIDEN_SUM_EXPR
, unprom0
.op
, oprnd1
);
2539 return pattern_stmt
;
2542 /* Function vect_recog_bitfield_ref_pattern
2544 Try to find the following pattern:
2546 bf_value = BIT_FIELD_REF (container, bitsize, bitpos);
2547 result = (type_out) bf_value;
2551 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2553 where type_out is a non-bitfield type, that is to say, it's precision matches
2554 2^(TYPE_SIZE(type_out) - (TYPE_UNSIGNED (type_out) ? 1 : 2)).
2558 * STMT_VINFO: The stmt from which the pattern search begins.
2559 here it starts with:
2560 result = (type_out) bf_value;
2564 if (BIT_FIELD_REF (container, bitsize, bitpos) `cmp` <constant>)
2568 * TYPE_OUT: The vector type of the output of this pattern.
2570 * Return value: A new stmt that will be used to replace the sequence of
2571 stmts that constitute the pattern. If the precision of type_out is bigger
2572 than the precision type of _1 we perform the widening before the shifting,
2573 since the new precision will be large enough to shift the value and moving
2574 widening operations up the statement chain enables the generation of
2575 widening loads. If we are widening and the operation after the pattern is
2576 an addition then we mask first and shift later, to enable the generation of
2577 shifting adds. In the case of narrowing we will always mask first, shift
2578 last and then perform a narrowing operation. This will enable the
2579 generation of narrowing shifts.
2581 Widening with mask first, shift later:
2582 container = (type_out) container;
2583 masked = container & (((1 << bitsize) - 1) << bitpos);
2584 result = masked >> bitpos;
2586 Widening with shift first, mask last:
2587 container = (type_out) container;
2588 shifted = container >> bitpos;
2589 result = shifted & ((1 << bitsize) - 1);
2592 masked = container & (((1 << bitsize) - 1) << bitpos);
2593 result = masked >> bitpos;
2594 result = (type_out) result;
2596 If the bitfield is signed and it's wider than type_out, we need to
2597 keep the result sign-extended:
2598 container = (type) container;
2599 masked = container << (prec - bitsize - bitpos);
2600 result = (type_out) (masked >> (prec - bitsize));
2602 Here type is the signed variant of the wider of type_out and the type
2605 The shifting is always optional depending on whether bitpos != 0.
2607 When the original bitfield was inside a gcond then an new gcond is also
2608 generated with the newly `result` as the operand to the comparison.
2613 vect_recog_bitfield_ref_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2616 gimple
*bf_stmt
= NULL
;
2617 tree lhs
= NULL_TREE
;
2618 tree ret_type
= NULL_TREE
;
2619 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
2620 if (gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt
))
2622 tree op
= gimple_cond_lhs (cond_stmt
);
2623 if (TREE_CODE (op
) != SSA_NAME
)
2625 bf_stmt
= dyn_cast
<gassign
*> (SSA_NAME_DEF_STMT (op
));
2626 if (TREE_CODE (gimple_cond_rhs (cond_stmt
)) != INTEGER_CST
)
2629 else if (is_gimple_assign (stmt
)
2630 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt
))
2631 && TREE_CODE (gimple_assign_rhs1 (stmt
)) == SSA_NAME
)
2633 gimple
*second_stmt
= SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt
));
2634 bf_stmt
= dyn_cast
<gassign
*> (second_stmt
);
2635 lhs
= gimple_assign_lhs (stmt
);
2636 ret_type
= TREE_TYPE (lhs
);
2640 || gimple_assign_rhs_code (bf_stmt
) != BIT_FIELD_REF
)
2643 tree bf_ref
= gimple_assign_rhs1 (bf_stmt
);
2644 tree container
= TREE_OPERAND (bf_ref
, 0);
2645 ret_type
= ret_type
? ret_type
: TREE_TYPE (container
);
2647 if (!bit_field_offset (bf_ref
).is_constant ()
2648 || !bit_field_size (bf_ref
).is_constant ()
2649 || !tree_fits_uhwi_p (TYPE_SIZE (TREE_TYPE (container
))))
2652 if (!INTEGRAL_TYPE_P (TREE_TYPE (bf_ref
))
2653 || !INTEGRAL_TYPE_P (TREE_TYPE (container
))
2654 || TYPE_MODE (TREE_TYPE (container
)) == E_BLKmode
)
2657 gimple
*use_stmt
, *pattern_stmt
;
2658 use_operand_p use_p
;
2659 bool shift_first
= true;
2660 tree container_type
= TREE_TYPE (container
);
2661 tree vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2663 /* Calculate shift_n before the adjustments for widening loads, otherwise
2664 the container may change and we have to consider offset change for
2665 widening loads on big endianness. The shift_n calculated here can be
2666 independent of widening. */
2667 unsigned HOST_WIDE_INT shift_n
= bit_field_offset (bf_ref
).to_constant ();
2668 unsigned HOST_WIDE_INT mask_width
= bit_field_size (bf_ref
).to_constant ();
2669 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2670 if (BYTES_BIG_ENDIAN
)
2671 shift_n
= prec
- shift_n
- mask_width
;
2673 bool ref_sext
= (!TYPE_UNSIGNED (TREE_TYPE (bf_ref
)) &&
2674 TYPE_PRECISION (ret_type
) > mask_width
);
2675 bool load_widen
= (TYPE_PRECISION (TREE_TYPE (container
)) <
2676 TYPE_PRECISION (ret_type
));
2678 /* We move the conversion earlier if the loaded type is smaller than the
2679 return type to enable the use of widening loads. And if we need a
2680 sign extension, we need to convert the loaded value early to a signed
2682 if (ref_sext
|| load_widen
)
2684 tree type
= load_widen
? ret_type
: container_type
;
2686 type
= gimple_signed_type (type
);
2687 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
),
2688 NOP_EXPR
, container
);
2689 container
= gimple_get_lhs (pattern_stmt
);
2690 container_type
= TREE_TYPE (container
);
2691 prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2692 vectype
= get_vectype_for_scalar_type (vinfo
, container_type
);
2693 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2695 else if (!useless_type_conversion_p (TREE_TYPE (container
), ret_type
))
2696 /* If we are doing the conversion last then also delay the shift as we may
2697 be able to combine the shift and conversion in certain cases. */
2698 shift_first
= false;
2700 /* If the only use of the result of this BIT_FIELD_REF + CONVERT is a
2701 PLUS_EXPR then do the shift last as some targets can combine the shift and
2702 add into a single instruction. */
2703 if (lhs
&& single_imm_use (lhs
, &use_p
, &use_stmt
))
2705 if (gimple_code (use_stmt
) == GIMPLE_ASSIGN
2706 && gimple_assign_rhs_code (use_stmt
) == PLUS_EXPR
)
2707 shift_first
= false;
2710 /* If we don't have to shift we only generate the mask, so just fix the
2711 code-path to shift_first. */
2716 if (shift_first
&& !ref_sext
)
2718 tree shifted
= container
;
2722 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2723 RSHIFT_EXPR
, container
,
2724 build_int_cst (sizetype
, shift_n
));
2725 shifted
= gimple_assign_lhs (pattern_stmt
);
2726 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2729 tree mask
= wide_int_to_tree (container_type
,
2730 wi::mask (mask_width
, false, prec
));
2733 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2734 BIT_AND_EXPR
, shifted
, mask
);
2735 result
= gimple_assign_lhs (pattern_stmt
);
2739 tree temp
= vect_recog_temp_ssa_var (container_type
);
2742 tree mask
= wide_int_to_tree (container_type
,
2743 wi::shifted_mask (shift_n
,
2746 pattern_stmt
= gimple_build_assign (temp
, BIT_AND_EXPR
,
2751 HOST_WIDE_INT shl
= prec
- shift_n
- mask_width
;
2753 pattern_stmt
= gimple_build_assign (temp
, LSHIFT_EXPR
,
2755 build_int_cst (sizetype
,
2759 tree masked
= gimple_assign_lhs (pattern_stmt
);
2760 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2762 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2763 RSHIFT_EXPR
, masked
,
2764 build_int_cst (sizetype
, shift_n
));
2765 result
= gimple_assign_lhs (pattern_stmt
);
2768 if (!useless_type_conversion_p (TREE_TYPE (result
), ret_type
))
2770 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2772 = gimple_build_assign (vect_recog_temp_ssa_var (ret_type
),
2781 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
, vectype
);
2782 vectype
= truth_type_for (vectype
);
2784 /* FIXME: This part extracts the boolean value out of the bitfield in the
2785 same way as vect_recog_gcond_pattern does. However because
2786 patterns cannot match the same root twice, when we handle and
2787 lower the bitfield in the gcond, vect_recog_gcond_pattern can't
2788 apply anymore. We should really fix it so that we don't need to
2789 duplicate transformations like these. */
2790 tree new_lhs
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
2791 gcond
*cond_stmt
= dyn_cast
<gcond
*> (stmt_info
->stmt
);
2792 tree cond_cst
= gimple_cond_rhs (cond_stmt
);
2794 = gimple_build_assign (new_lhs
, gimple_cond_code (cond_stmt
),
2795 gimple_get_lhs (pattern_stmt
),
2796 fold_convert (container_type
, cond_cst
));
2797 append_pattern_def_seq (vinfo
, stmt_info
, new_stmt
, vectype
, container_type
);
2799 = gimple_build_cond (NE_EXPR
, new_lhs
,
2800 build_zero_cst (TREE_TYPE (new_lhs
)),
2801 NULL_TREE
, NULL_TREE
);
2804 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2805 vect_pattern_detected ("bitfield_ref pattern", stmt_info
->stmt
);
2807 return pattern_stmt
;
2810 /* Function vect_recog_bit_insert_pattern
2812 Try to find the following pattern:
2814 written = BIT_INSERT_EXPR (container, value, bitpos);
2818 * STMT_VINFO: The stmt we want to replace.
2822 * TYPE_OUT: The vector type of the output of this pattern.
2824 * Return value: A new stmt that will be used to replace the sequence of
2825 stmts that constitute the pattern. In this case it will be:
2826 value = (container_type) value; // Make sure
2827 shifted = value << bitpos; // Shift value into place
2828 masked = shifted & (mask << bitpos); // Mask off the non-relevant bits in
2829 // the 'to-write value'.
2830 cleared = container & ~(mask << bitpos); // Clearing the bits we want to
2831 // write to from the value we want
2833 written = cleared | masked; // Write bits.
2836 where mask = ((1 << TYPE_PRECISION (value)) - 1), a mask to keep the number of
2837 bits corresponding to the real size of the bitfield value we are writing to.
2838 The shifting is always optional depending on whether bitpos != 0.
2843 vect_recog_bit_insert_pattern (vec_info
*vinfo
, stmt_vec_info stmt_info
,
2846 gassign
*bf_stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
2847 if (!bf_stmt
|| gimple_assign_rhs_code (bf_stmt
) != BIT_INSERT_EXPR
)
2850 tree container
= gimple_assign_rhs1 (bf_stmt
);
2851 tree value
= gimple_assign_rhs2 (bf_stmt
);
2852 tree shift
= gimple_assign_rhs3 (bf_stmt
);
2854 tree bf_type
= TREE_TYPE (value
);
2855 tree container_type
= TREE_TYPE (container
);
2857 if (!INTEGRAL_TYPE_P (container_type
)
2858 || !tree_fits_uhwi_p (TYPE_SIZE (container_type
)))
2861 gimple
*pattern_stmt
;
2863 vect_unpromoted_value unprom
;
2864 unprom
.set_op (value
, vect_internal_def
);
2865 value
= vect_convert_input (vinfo
, stmt_info
, container_type
, &unprom
,
2866 get_vectype_for_scalar_type (vinfo
,
2869 unsigned HOST_WIDE_INT mask_width
= TYPE_PRECISION (bf_type
);
2870 unsigned HOST_WIDE_INT prec
= tree_to_uhwi (TYPE_SIZE (container_type
));
2871 unsigned HOST_WIDE_INT shift_n
= tree_to_uhwi (shift
);
2872 if (BYTES_BIG_ENDIAN
)
2874 shift_n
= prec
- shift_n
- mask_width
;
2875 shift
= build_int_cst (TREE_TYPE (shift
), shift_n
);
2878 if (!useless_type_conversion_p (TREE_TYPE (value
), container_type
))
2881 gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2883 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2884 value
= gimple_get_lhs (pattern_stmt
);
2887 /* Shift VALUE into place. */
2888 tree shifted
= value
;
2891 gimple_seq stmts
= NULL
;
2893 = gimple_build (&stmts
, LSHIFT_EXPR
, container_type
, value
, shift
);
2894 if (!gimple_seq_empty_p (stmts
))
2895 append_pattern_def_seq (vinfo
, stmt_info
,
2896 gimple_seq_first_stmt (stmts
));
2900 = wide_int_to_tree (container_type
,
2901 wi::shifted_mask (shift_n
, mask_width
, false, prec
));
2903 /* Clear bits we don't want to write back from SHIFTED. */
2904 gimple_seq stmts
= NULL
;
2905 tree masked
= gimple_build (&stmts
, BIT_AND_EXPR
, container_type
, shifted
,
2907 if (!gimple_seq_empty_p (stmts
))
2909 pattern_stmt
= gimple_seq_first_stmt (stmts
);
2910 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2913 /* Mask off the bits in the container that we are to write to. */
2914 mask_t
= wide_int_to_tree (container_type
,
2915 wi::shifted_mask (shift_n
, mask_width
, true, prec
));
2916 tree cleared
= vect_recog_temp_ssa_var (container_type
);
2917 pattern_stmt
= gimple_build_assign (cleared
, BIT_AND_EXPR
, container
, mask_t
);
2918 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
);
2920 /* Write MASKED into CLEARED. */
2922 = gimple_build_assign (vect_recog_temp_ssa_var (container_type
),
2923 BIT_IOR_EXPR
, cleared
, masked
);
2925 *type_out
= STMT_VINFO_VECTYPE (stmt_info
);
2926 vect_pattern_detected ("bit_insert pattern", stmt_info
->stmt
);
2928 return pattern_stmt
;
2932 /* Recognize cases in which an operation is performed in one type WTYPE
2933 but could be done more efficiently in a narrower type NTYPE. For example,
2936 ATYPE a; // narrower than NTYPE
2937 BTYPE b; // narrower than NTYPE
2938 WTYPE aw = (WTYPE) a;
2939 WTYPE bw = (WTYPE) b;
2940 WTYPE res = aw + bw; // only uses of aw and bw
2942 then it would be more efficient to do:
2944 NTYPE an = (NTYPE) a;
2945 NTYPE bn = (NTYPE) b;
2946 NTYPE resn = an + bn;
2947 WTYPE res = (WTYPE) resn;
2949 Other situations include things like:
2951 ATYPE a; // NTYPE or narrower
2952 WTYPE aw = (WTYPE) a;
2955 when only "(NTYPE) res" is significant. In that case it's more efficient
2956 to truncate "b" and do the operation on NTYPE instead:
2958 NTYPE an = (NTYPE) a;
2959 NTYPE bn = (NTYPE) b; // truncation
2960 NTYPE resn = an + bn;
2961 WTYPE res = (WTYPE) resn;
2963 All users of "res" should then use "resn" instead, making the final
2964 statement dead (not marked as relevant). The final statement is still
2965 needed to maintain the type correctness of the IR.
2967 vect_determine_precisions has already determined the minimum
2968 precison of the operation and the minimum precision required
2969 by users of the result. */
2972 vect_recog_over_widening_pattern (vec_info
*vinfo
,
2973 stmt_vec_info last_stmt_info
, tree
*type_out
)
2975 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
2979 /* See whether we have found that this operation can be done on a
2980 narrower type without changing its semantics. */
2981 unsigned int new_precision
= last_stmt_info
->operation_precision
;
2985 tree lhs
= gimple_assign_lhs (last_stmt
);
2986 tree type
= TREE_TYPE (lhs
);
2987 tree_code code
= gimple_assign_rhs_code (last_stmt
);
2989 /* Punt for reductions where we don't handle the type conversions. */
2990 if (STMT_VINFO_DEF_TYPE (last_stmt_info
) == vect_reduction_def
)
2993 /* Keep the first operand of a COND_EXPR as-is: only the other two
2994 operands are interesting. */
2995 unsigned int first_op
= (code
== COND_EXPR
? 2 : 1);
2997 /* Check the operands. */
2998 unsigned int nops
= gimple_num_ops (last_stmt
) - first_op
;
2999 auto_vec
<vect_unpromoted_value
, 3> unprom (nops
);
3000 unprom
.quick_grow_cleared (nops
);
3001 unsigned int min_precision
= 0;
3002 bool single_use_p
= false;
3003 for (unsigned int i
= 0; i
< nops
; ++i
)
3005 tree op
= gimple_op (last_stmt
, first_op
+ i
);
3006 if (TREE_CODE (op
) == INTEGER_CST
)
3007 unprom
[i
].set_op (op
, vect_constant_def
);
3008 else if (TREE_CODE (op
) == SSA_NAME
)
3010 bool op_single_use_p
= true;
3011 if (!vect_look_through_possible_promotion (vinfo
, op
, &unprom
[i
],
3016 (1) N bits of the result are needed;
3017 (2) all inputs are widened from M<N bits; and
3018 (3) one operand OP is a single-use SSA name
3020 we can shift the M->N widening from OP to the output
3021 without changing the number or type of extensions involved.
3022 This then reduces the number of copies of STMT_INFO.
3024 If instead of (3) more than one operand is a single-use SSA name,
3025 shifting the extension to the output is even more of a win.
3029 (1) N bits of the result are needed;
3030 (2) one operand OP2 is widened from M2<N bits;
3031 (3) another operand OP1 is widened from M1<M2 bits; and
3032 (4) both OP1 and OP2 are single-use
3034 the choice is between:
3036 (a) truncating OP2 to M1, doing the operation on M1,
3037 and then widening the result to N
3039 (b) widening OP1 to M2, doing the operation on M2, and then
3040 widening the result to N
3042 Both shift the M2->N widening of the inputs to the output.
3043 (a) additionally shifts the M1->M2 widening to the output;
3044 it requires fewer copies of STMT_INFO but requires an extra
3047 Which is better will depend on the complexity and cost of
3048 STMT_INFO, which is hard to predict at this stage. However,
3049 a clear tie-breaker in favor of (b) is the fact that the
3050 truncation in (a) increases the length of the operation chain.
3052 If instead of (4) only one of OP1 or OP2 is single-use,
3053 (b) is still a win over doing the operation in N bits:
3054 it still shifts the M2->N widening on the single-use operand
3055 to the output and reduces the number of STMT_INFO copies.
3057 If neither operand is single-use then operating on fewer than
3058 N bits might lead to more extensions overall. Whether it does
3059 or not depends on global information about the vectorization
3060 region, and whether that's a good trade-off would again
3061 depend on the complexity and cost of the statements involved,
3062 as well as things like register pressure that are not normally
3063 modelled at this stage. We therefore ignore these cases
3064 and just optimize the clear single-use wins above.
3066 Thus we take the maximum precision of the unpromoted operands
3067 and record whether any operand is single-use. */
3068 if (unprom
[i
].dt
== vect_internal_def
)
3070 min_precision
= MAX (min_precision
,
3071 TYPE_PRECISION (unprom
[i
].type
));
3072 single_use_p
|= op_single_use_p
;
3079 /* Although the operation could be done in operation_precision, we have
3080 to balance that against introducing extra truncations or extensions.
3081 Calculate the minimum precision that can be handled efficiently.
3083 The loop above determined that the operation could be handled
3084 efficiently in MIN_PRECISION if SINGLE_USE_P; this would shift an
3085 extension from the inputs to the output without introducing more
3086 instructions, and would reduce the number of instructions required
3087 for STMT_INFO itself.
3089 vect_determine_precisions has also determined that the result only
3090 needs min_output_precision bits. Truncating by a factor of N times
3091 requires a tree of N - 1 instructions, so if TYPE is N times wider
3092 than min_output_precision, doing the operation in TYPE and truncating
3093 the result requires N + (N - 1) = 2N - 1 instructions per output vector.
3096 - truncating the input to a unary operation and doing the operation
3097 in the new type requires at most N - 1 + 1 = N instructions per
3100 - doing the same for a binary operation requires at most
3101 (N - 1) * 2 + 1 = 2N - 1 instructions per output vector
3103 Both unary and binary operations require fewer instructions than
3104 this if the operands were extended from a suitable truncated form.
3105 Thus there is usually nothing to lose by doing operations in
3106 min_output_precision bits, but there can be something to gain. */
3108 min_precision
= last_stmt_info
->min_output_precision
;
3110 min_precision
= MIN (min_precision
, last_stmt_info
->min_output_precision
);
3112 /* Apply the minimum efficient precision we just calculated. */
3113 if (new_precision
< min_precision
)
3114 new_precision
= min_precision
;
3115 new_precision
= vect_element_precision (new_precision
);
3116 if (new_precision
>= TYPE_PRECISION (type
))
3119 vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt
);
3121 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3125 /* We've found a viable pattern. Get the new type of the operation. */
3126 bool unsigned_p
= (last_stmt_info
->operation_sign
== UNSIGNED
);
3127 tree new_type
= build_nonstandard_integer_type (new_precision
, unsigned_p
);
3129 /* If we're truncating an operation, we need to make sure that we
3130 don't introduce new undefined overflow. The codes tested here are
3131 a subset of those accepted by vect_truncatable_operation_p. */
3132 tree op_type
= new_type
;
3133 if (TYPE_OVERFLOW_UNDEFINED (new_type
)
3134 && (code
== PLUS_EXPR
|| code
== MINUS_EXPR
|| code
== MULT_EXPR
))
3135 op_type
= build_nonstandard_integer_type (new_precision
, true);
3137 /* We specifically don't check here whether the target supports the
3138 new operation, since it might be something that a later pattern
3139 wants to rewrite anyway. If targets have a minimum element size
3140 for some optabs, we should pattern-match smaller ops to larger ops
3141 where beneficial. */
3142 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3143 tree op_vectype
= get_vectype_for_scalar_type (vinfo
, op_type
);
3144 if (!new_vectype
|| !op_vectype
)
3147 if (dump_enabled_p ())
3148 dump_printf_loc (MSG_NOTE
, vect_location
, "demoting %T to %T\n",
3151 /* Calculate the rhs operands for an operation on OP_TYPE. */
3153 for (unsigned int i
= 1; i
< first_op
; ++i
)
3154 ops
[i
- 1] = gimple_op (last_stmt
, i
);
3155 vect_convert_inputs (vinfo
, last_stmt_info
, nops
, &ops
[first_op
- 1],
3156 op_type
, &unprom
[0], op_vectype
);
3158 /* Use the operation to produce a result of type OP_TYPE. */
3159 tree new_var
= vect_recog_temp_ssa_var (op_type
, NULL
);
3160 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
,
3161 ops
[0], ops
[1], ops
[2]);
3162 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3164 if (dump_enabled_p ())
3165 dump_printf_loc (MSG_NOTE
, vect_location
,
3166 "created pattern stmt: %G", pattern_stmt
);
3168 /* Convert back to the original signedness, if OP_TYPE is different
3170 if (op_type
!= new_type
)
3171 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, new_type
,
3172 pattern_stmt
, op_vectype
);
3174 /* Promote the result to the original type. */
3175 pattern_stmt
= vect_convert_output (vinfo
, last_stmt_info
, type
,
3176 pattern_stmt
, new_vectype
);
3178 return pattern_stmt
;
3181 /* Recognize the following patterns:
3183 ATYPE a; // narrower than TYPE
3184 BTYPE b; // narrower than TYPE
3186 1) Multiply high with scaling
3187 TYPE res = ((TYPE) a * (TYPE) b) >> c;
3188 Here, c is bitsize (TYPE) / 2 - 1.
3190 2) ... or also with rounding
3191 TYPE res = (((TYPE) a * (TYPE) b) >> d + 1) >> 1;
3192 Here, d is bitsize (TYPE) / 2 - 2.
3194 3) Normal multiply high
3195 TYPE res = ((TYPE) a * (TYPE) b) >> e;
3196 Here, e is bitsize (TYPE) / 2.
3198 where only the bottom half of res is used. */
3201 vect_recog_mulhs_pattern (vec_info
*vinfo
,
3202 stmt_vec_info last_stmt_info
, tree
*type_out
)
3204 /* Check for a right shift. */
3205 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3207 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
)
3210 /* Check that the shift result is wider than the users of the
3211 result need (i.e. that narrowing would be a natural choice). */
3212 tree lhs_type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
3213 unsigned int target_precision
3214 = vect_element_precision (last_stmt_info
->min_output_precision
);
3215 if (!INTEGRAL_TYPE_P (lhs_type
)
3216 || target_precision
>= TYPE_PRECISION (lhs_type
))
3219 /* Look through any change in sign on the outer shift input. */
3220 vect_unpromoted_value unprom_rshift_input
;
3221 tree rshift_input
= vect_look_through_possible_promotion
3222 (vinfo
, gimple_assign_rhs1 (last_stmt
), &unprom_rshift_input
);
3224 || TYPE_PRECISION (TREE_TYPE (rshift_input
))
3225 != TYPE_PRECISION (lhs_type
))
3228 /* Get the definition of the shift input. */
3229 stmt_vec_info rshift_input_stmt_info
3230 = vect_get_internal_def (vinfo
, rshift_input
);
3231 if (!rshift_input_stmt_info
)
3233 gassign
*rshift_input_stmt
3234 = dyn_cast
<gassign
*> (rshift_input_stmt_info
->stmt
);
3235 if (!rshift_input_stmt
)
3238 stmt_vec_info mulh_stmt_info
;
3240 bool rounding_p
= false;
3242 /* Check for the presence of the rounding term. */
3243 if (gimple_assign_rhs_code (rshift_input_stmt
) == PLUS_EXPR
)
3245 /* Check that the outer shift was by 1. */
3246 if (!integer_onep (gimple_assign_rhs2 (last_stmt
)))
3249 /* Check that the second operand of the PLUS_EXPR is 1. */
3250 if (!integer_onep (gimple_assign_rhs2 (rshift_input_stmt
)))
3253 /* Look through any change in sign on the addition input. */
3254 vect_unpromoted_value unprom_plus_input
;
3255 tree plus_input
= vect_look_through_possible_promotion
3256 (vinfo
, gimple_assign_rhs1 (rshift_input_stmt
), &unprom_plus_input
);
3258 || TYPE_PRECISION (TREE_TYPE (plus_input
))
3259 != TYPE_PRECISION (TREE_TYPE (rshift_input
)))
3262 /* Get the definition of the multiply-high-scale part. */
3263 stmt_vec_info plus_input_stmt_info
3264 = vect_get_internal_def (vinfo
, plus_input
);
3265 if (!plus_input_stmt_info
)
3267 gassign
*plus_input_stmt
3268 = dyn_cast
<gassign
*> (plus_input_stmt_info
->stmt
);
3269 if (!plus_input_stmt
3270 || gimple_assign_rhs_code (plus_input_stmt
) != RSHIFT_EXPR
)
3273 /* Look through any change in sign on the scaling input. */
3274 vect_unpromoted_value unprom_scale_input
;
3275 tree scale_input
= vect_look_through_possible_promotion
3276 (vinfo
, gimple_assign_rhs1 (plus_input_stmt
), &unprom_scale_input
);
3278 || TYPE_PRECISION (TREE_TYPE (scale_input
))
3279 != TYPE_PRECISION (TREE_TYPE (plus_input
)))
3282 /* Get the definition of the multiply-high part. */
3283 mulh_stmt_info
= vect_get_internal_def (vinfo
, scale_input
);
3284 if (!mulh_stmt_info
)
3287 /* Get the scaling term. */
3288 scale_term
= gimple_assign_rhs2 (plus_input_stmt
);
3293 mulh_stmt_info
= rshift_input_stmt_info
;
3294 scale_term
= gimple_assign_rhs2 (last_stmt
);
3297 /* Check that the scaling factor is constant. */
3298 if (TREE_CODE (scale_term
) != INTEGER_CST
)
3301 /* Check whether the scaling input term can be seen as two widened
3302 inputs multiplied together. */
3303 vect_unpromoted_value unprom_mult
[2];
3306 = vect_widened_op_tree (vinfo
, mulh_stmt_info
, MULT_EXPR
, WIDEN_MULT_EXPR
,
3307 false, 2, unprom_mult
, &new_type
);
3311 /* Adjust output precision. */
3312 if (TYPE_PRECISION (new_type
) < target_precision
)
3313 new_type
= build_nonstandard_integer_type
3314 (target_precision
, TYPE_UNSIGNED (new_type
));
3316 unsigned mult_precision
= TYPE_PRECISION (new_type
);
3318 /* Check that the scaling factor is expected. Instead of
3319 target_precision, we should use the one that we actually
3320 use for internal function. */
3323 /* Check pattern 2). */
3324 if (wi::to_widest (scale_term
) + mult_precision
+ 2
3325 != TYPE_PRECISION (lhs_type
))
3332 /* Check for pattern 1). */
3333 if (wi::to_widest (scale_term
) + mult_precision
+ 1
3334 == TYPE_PRECISION (lhs_type
))
3336 /* Check for pattern 3). */
3337 else if (wi::to_widest (scale_term
) + mult_precision
3338 == TYPE_PRECISION (lhs_type
))
3344 vect_pattern_detected ("vect_recog_mulhs_pattern", last_stmt
);
3346 /* Check for target support. */
3347 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3349 || !direct_internal_fn_supported_p
3350 (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3353 /* The IR requires a valid vector type for the cast result, even though
3354 it's likely to be discarded. */
3355 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3359 /* Generate the IFN_MULHRS call. */
3360 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3362 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3363 unprom_mult
, new_vectype
);
3365 = gimple_build_call_internal (ifn
, 2, new_ops
[0], new_ops
[1]);
3366 gimple_call_set_lhs (mulhrs_stmt
, new_var
);
3367 gimple_set_location (mulhrs_stmt
, gimple_location (last_stmt
));
3369 if (dump_enabled_p ())
3370 dump_printf_loc (MSG_NOTE
, vect_location
,
3371 "created pattern stmt: %G", (gimple
*) mulhrs_stmt
);
3373 return vect_convert_output (vinfo
, last_stmt_info
, lhs_type
,
3374 mulhrs_stmt
, new_vectype
);
3377 /* Recognize the patterns:
3379 ATYPE a; // narrower than TYPE
3380 BTYPE b; // narrower than TYPE
3381 (1) TYPE avg = ((TYPE) a + (TYPE) b) >> 1;
3382 or (2) TYPE avg = ((TYPE) a + (TYPE) b + 1) >> 1;
3384 where only the bottom half of avg is used. Try to transform them into:
3386 (1) NTYPE avg' = .AVG_FLOOR ((NTYPE) a, (NTYPE) b);
3387 or (2) NTYPE avg' = .AVG_CEIL ((NTYPE) a, (NTYPE) b);
3391 TYPE avg = (TYPE) avg';
3393 where NTYPE is no wider than half of TYPE. Since only the bottom half
3394 of avg is used, all or part of the cast of avg' should become redundant.
3396 If there is no target support available, generate code to distribute rshift
3397 over plus and add a carry. */
3400 vect_recog_average_pattern (vec_info
*vinfo
,
3401 stmt_vec_info last_stmt_info
, tree
*type_out
)
3403 /* Check for a shift right by one bit. */
3404 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3406 || gimple_assign_rhs_code (last_stmt
) != RSHIFT_EXPR
3407 || !integer_onep (gimple_assign_rhs2 (last_stmt
)))
3410 /* Check that the shift result is wider than the users of the
3411 result need (i.e. that narrowing would be a natural choice). */
3412 tree lhs
= gimple_assign_lhs (last_stmt
);
3413 tree type
= TREE_TYPE (lhs
);
3414 unsigned int target_precision
3415 = vect_element_precision (last_stmt_info
->min_output_precision
);
3416 if (!INTEGRAL_TYPE_P (type
) || target_precision
>= TYPE_PRECISION (type
))
3419 /* Look through any change in sign on the shift input. */
3420 tree rshift_rhs
= gimple_assign_rhs1 (last_stmt
);
3421 vect_unpromoted_value unprom_plus
;
3422 rshift_rhs
= vect_look_through_possible_promotion (vinfo
, rshift_rhs
,
3425 || TYPE_PRECISION (TREE_TYPE (rshift_rhs
)) != TYPE_PRECISION (type
))
3428 /* Get the definition of the shift input. */
3429 stmt_vec_info plus_stmt_info
= vect_get_internal_def (vinfo
, rshift_rhs
);
3430 if (!plus_stmt_info
)
3433 /* Check whether the shift input can be seen as a tree of additions on
3434 2 or 3 widened inputs.
3436 Note that the pattern should be a win even if the result of one or
3437 more additions is reused elsewhere: if the pattern matches, we'd be
3438 replacing 2N RSHIFT_EXPRs and N VEC_PACK_*s with N IFN_AVG_*s. */
3439 internal_fn ifn
= IFN_AVG_FLOOR
;
3440 vect_unpromoted_value unprom
[3];
3442 unsigned int nops
= vect_widened_op_tree (vinfo
, plus_stmt_info
, PLUS_EXPR
,
3443 IFN_VEC_WIDEN_PLUS
, false, 3,
3449 /* Check that one operand is 1. */
3451 for (i
= 0; i
< 3; ++i
)
3452 if (integer_onep (unprom
[i
].op
))
3456 /* Throw away the 1 operand and keep the other two. */
3458 unprom
[i
] = unprom
[2];
3462 vect_pattern_detected ("vect_recog_average_pattern", last_stmt
);
3466 (a) the operation can be viewed as:
3468 TYPE widened0 = (TYPE) UNPROM[0];
3469 TYPE widened1 = (TYPE) UNPROM[1];
3470 TYPE tmp1 = widened0 + widened1 {+ 1};
3471 TYPE tmp2 = tmp1 >> 1; // LAST_STMT_INFO
3473 (b) the first two statements are equivalent to:
3475 TYPE widened0 = (TYPE) (NEW_TYPE) UNPROM[0];
3476 TYPE widened1 = (TYPE) (NEW_TYPE) UNPROM[1];
3478 (c) vect_recog_over_widening_pattern has already tried to narrow TYPE
3481 (d) all the operations can be performed correctly at twice the width of
3482 NEW_TYPE, due to the nature of the average operation; and
3484 (e) users of the result of the right shift need only TARGET_PRECISION
3485 bits, where TARGET_PRECISION is no more than half of TYPE's
3488 Under these circumstances, the only situation in which NEW_TYPE
3489 could be narrower than TARGET_PRECISION is if widened0, widened1
3490 and an addition result are all used more than once. Thus we can
3491 treat any widening of UNPROM[0] and UNPROM[1] to TARGET_PRECISION
3492 as "free", whereas widening the result of the average instruction
3493 from NEW_TYPE to TARGET_PRECISION would be a new operation. It's
3494 therefore better not to go narrower than TARGET_PRECISION. */
3495 if (TYPE_PRECISION (new_type
) < target_precision
)
3496 new_type
= build_nonstandard_integer_type (target_precision
,
3497 TYPE_UNSIGNED (new_type
));
3499 /* Check for target support. */
3500 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, new_type
);
3504 bool fallback_p
= false;
3506 if (direct_internal_fn_supported_p (ifn
, new_vectype
, OPTIMIZE_FOR_SPEED
))
3508 else if (TYPE_UNSIGNED (new_type
)
3509 && optab_for_tree_code (RSHIFT_EXPR
, new_vectype
, optab_scalar
)
3510 && optab_for_tree_code (PLUS_EXPR
, new_vectype
, optab_default
)
3511 && optab_for_tree_code (BIT_IOR_EXPR
, new_vectype
, optab_default
)
3512 && optab_for_tree_code (BIT_AND_EXPR
, new_vectype
, optab_default
))
3517 /* The IR requires a valid vector type for the cast result, even though
3518 it's likely to be discarded. */
3519 *type_out
= get_vectype_for_scalar_type (vinfo
, type
);
3523 tree new_var
= vect_recog_temp_ssa_var (new_type
, NULL
);
3525 vect_convert_inputs (vinfo
, last_stmt_info
, 2, new_ops
, new_type
,
3526 unprom
, new_vectype
);
3530 /* As a fallback, generate code for following sequence:
3532 shifted_op0 = new_ops[0] >> 1;
3533 shifted_op1 = new_ops[1] >> 1;
3534 sum_of_shifted = shifted_op0 + shifted_op1;
3535 unmasked_carry = new_ops[0] and/or new_ops[1];
3536 carry = unmasked_carry & 1;
3537 new_var = sum_of_shifted + carry;
3540 tree one_cst
= build_one_cst (new_type
);
3543 tree shifted_op0
= vect_recog_temp_ssa_var (new_type
, NULL
);
3544 g
= gimple_build_assign (shifted_op0
, RSHIFT_EXPR
, new_ops
[0], one_cst
);
3545 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3547 tree shifted_op1
= vect_recog_temp_ssa_var (new_type
, NULL
);
3548 g
= gimple_build_assign (shifted_op1
, RSHIFT_EXPR
, new_ops
[1], one_cst
);
3549 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3551 tree sum_of_shifted
= vect_recog_temp_ssa_var (new_type
, NULL
);
3552 g
= gimple_build_assign (sum_of_shifted
, PLUS_EXPR
,
3553 shifted_op0
, shifted_op1
);
3554 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3556 tree unmasked_carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3557 tree_code c
= (ifn
== IFN_AVG_CEIL
) ? BIT_IOR_EXPR
: BIT_AND_EXPR
;
3558 g
= gimple_build_assign (unmasked_carry
, c
, new_ops
[0], new_ops
[1]);
3559 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3561 tree carry
= vect_recog_temp_ssa_var (new_type
, NULL
);
3562 g
= gimple_build_assign (carry
, BIT_AND_EXPR
, unmasked_carry
, one_cst
);
3563 append_pattern_def_seq (vinfo
, last_stmt_info
, g
, new_vectype
);
3565 g
= gimple_build_assign (new_var
, PLUS_EXPR
, sum_of_shifted
, carry
);
3566 return vect_convert_output (vinfo
, last_stmt_info
, type
, g
, new_vectype
);
3569 /* Generate the IFN_AVG* call. */
3570 gcall
*average_stmt
= gimple_build_call_internal (ifn
, 2, new_ops
[0],
3572 gimple_call_set_lhs (average_stmt
, new_var
);
3573 gimple_set_location (average_stmt
, gimple_location (last_stmt
));
3575 if (dump_enabled_p ())
3576 dump_printf_loc (MSG_NOTE
, vect_location
,
3577 "created pattern stmt: %G", (gimple
*) average_stmt
);
3579 return vect_convert_output (vinfo
, last_stmt_info
,
3580 type
, average_stmt
, new_vectype
);
3583 /* Recognize cases in which the input to a cast is wider than its
3584 output, and the input is fed by a widening operation. Fold this
3585 by removing the unnecessary intermediate widening. E.g.:
3588 unsigned int b = (unsigned int) a;
3589 unsigned short c = (unsigned short) b;
3593 unsigned short c = (unsigned short) a;
3595 Although this is rare in input IR, it is an expected side-effect
3596 of the over-widening pattern above.
3598 This is beneficial also for integer-to-float conversions, if the
3599 widened integer has more bits than the float, and if the unwidened
3603 vect_recog_cast_forwprop_pattern (vec_info
*vinfo
,
3604 stmt_vec_info last_stmt_info
, tree
*type_out
)
3606 /* Check for a cast, including an integer-to-float conversion. */
3607 gassign
*last_stmt
= dyn_cast
<gassign
*> (last_stmt_info
->stmt
);
3610 tree_code code
= gimple_assign_rhs_code (last_stmt
);
3611 if (!CONVERT_EXPR_CODE_P (code
) && code
!= FLOAT_EXPR
)
3614 /* Make sure that the rhs is a scalar with a natural bitsize. */
3615 tree lhs
= gimple_assign_lhs (last_stmt
);
3618 tree lhs_type
= TREE_TYPE (lhs
);
3619 scalar_mode lhs_mode
;
3620 if (VECT_SCALAR_BOOLEAN_TYPE_P (lhs_type
)
3621 || !is_a
<scalar_mode
> (TYPE_MODE (lhs_type
), &lhs_mode
))
3624 /* Check for a narrowing operation (from a vector point of view). */
3625 tree rhs
= gimple_assign_rhs1 (last_stmt
);
3626 tree rhs_type
= TREE_TYPE (rhs
);
3627 if (!INTEGRAL_TYPE_P (rhs_type
)
3628 || VECT_SCALAR_BOOLEAN_TYPE_P (rhs_type
)
3629 || TYPE_PRECISION (rhs_type
) <= GET_MODE_BITSIZE (lhs_mode
))
3632 /* Try to find an unpromoted input. */
3633 vect_unpromoted_value unprom
;
3634 if (!vect_look_through_possible_promotion (vinfo
, rhs
, &unprom
)
3635 || TYPE_PRECISION (unprom
.type
) >= TYPE_PRECISION (rhs_type
))
3638 /* If the bits above RHS_TYPE matter, make sure that they're the
3639 same when extending from UNPROM as they are when extending from RHS. */
3640 if (!INTEGRAL_TYPE_P (lhs_type
)
3641 && TYPE_SIGN (rhs_type
) != TYPE_SIGN (unprom
.type
))
3644 /* We can get the same result by casting UNPROM directly, to avoid
3645 the unnecessary widening and narrowing. */
3646 vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt
);
3648 *type_out
= get_vectype_for_scalar_type (vinfo
, lhs_type
);
3652 tree new_var
= vect_recog_temp_ssa_var (lhs_type
, NULL
);
3653 gimple
*pattern_stmt
= gimple_build_assign (new_var
, code
, unprom
.op
);
3654 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
3656 return pattern_stmt
;
3659 /* Try to detect a shift left of a widened input, converting LSHIFT_EXPR
3660 to WIDEN_LSHIFT_EXPR. See vect_recog_widen_op_pattern for details. */
3663 vect_recog_widen_shift_pattern (vec_info
*vinfo
,
3664 stmt_vec_info last_stmt_info
, tree
*type_out
)
3666 return vect_recog_widen_op_pattern (vinfo
, last_stmt_info
, type_out
,
3667 LSHIFT_EXPR
, WIDEN_LSHIFT_EXPR
, true,
3668 "vect_recog_widen_shift_pattern");
3671 /* Detect a rotate pattern wouldn't be otherwise vectorized:
3675 S0 a_t = b_t r<< c_t;
3679 * STMT_VINFO: The stmt from which the pattern search begins,
3680 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
3684 S2 e_t = d_t & (B - 1);
3685 S3 f_t = b_t << c_t;
3686 S4 g_t = b_t >> e_t;
3689 where B is element bitsize of type.
3693 * TYPE_OUT: The type of the output of this pattern.
3695 * Return value: A new stmt that will be used to replace the rotate
3699 vect_recog_rotate_pattern (vec_info
*vinfo
,
3700 stmt_vec_info stmt_vinfo
, tree
*type_out
)
3702 gimple
*last_stmt
= stmt_vinfo
->stmt
;
3703 tree oprnd0
, oprnd1
, lhs
, var
, var1
, var2
, vectype
, type
, stype
, def
, def2
;
3704 gimple
*pattern_stmt
, *def_stmt
;
3705 enum tree_code rhs_code
;
3706 enum vect_def_type dt
;
3707 optab optab1
, optab2
;
3708 edge ext_def
= NULL
;
3709 bool bswap16_p
= false;
3711 if (is_gimple_assign (last_stmt
))
3713 rhs_code
= gimple_assign_rhs_code (last_stmt
);
3723 lhs
= gimple_assign_lhs (last_stmt
);
3724 oprnd0
= gimple_assign_rhs1 (last_stmt
);
3725 type
= TREE_TYPE (oprnd0
);
3726 oprnd1
= gimple_assign_rhs2 (last_stmt
);
3728 else if (gimple_call_builtin_p (last_stmt
, BUILT_IN_BSWAP16
))
3730 /* __builtin_bswap16 (x) is another form of x r>> 8.
3731 The vectorizer has bswap support, but only if the argument isn't
3733 lhs
= gimple_call_lhs (last_stmt
);
3734 oprnd0
= gimple_call_arg (last_stmt
, 0);
3735 type
= TREE_TYPE (oprnd0
);
3737 || TYPE_PRECISION (TREE_TYPE (lhs
)) != 16
3738 || TYPE_PRECISION (type
) <= 16
3739 || TREE_CODE (oprnd0
) != SSA_NAME
3740 || BITS_PER_UNIT
!= 8)
3743 stmt_vec_info def_stmt_info
;
3744 if (!vect_is_simple_use (oprnd0
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3747 if (dt
!= vect_internal_def
)
3750 if (gimple_assign_cast_p (def_stmt
))
3752 def
= gimple_assign_rhs1 (def_stmt
);
3753 if (INTEGRAL_TYPE_P (TREE_TYPE (def
))
3754 && TYPE_PRECISION (TREE_TYPE (def
)) == 16)
3758 type
= TREE_TYPE (lhs
);
3759 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3760 if (vectype
== NULL_TREE
)
3763 if (tree char_vectype
= get_same_sized_vectype (char_type_node
, vectype
))
3765 /* The encoding uses one stepped pattern for each byte in the
3767 vec_perm_builder
elts (TYPE_VECTOR_SUBPARTS (char_vectype
), 2, 3);
3768 for (unsigned i
= 0; i
< 3; ++i
)
3769 for (unsigned j
= 0; j
< 2; ++j
)
3770 elts
.quick_push ((i
+ 1) * 2 - j
- 1);
3772 vec_perm_indices
indices (elts
, 1,
3773 TYPE_VECTOR_SUBPARTS (char_vectype
));
3774 machine_mode vmode
= TYPE_MODE (char_vectype
);
3775 if (can_vec_perm_const_p (vmode
, vmode
, indices
))
3777 /* vectorizable_bswap can handle the __builtin_bswap16 if we
3778 undo the argument promotion. */
3779 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3781 def
= vect_recog_temp_ssa_var (type
, NULL
);
3782 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3783 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3787 /* Pattern detected. */
3788 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3790 *type_out
= vectype
;
3792 /* Pattern supported. Create a stmt to be used to replace the
3793 pattern, with the unpromoted argument. */
3794 var
= vect_recog_temp_ssa_var (type
, NULL
);
3795 pattern_stmt
= gimple_build_call (gimple_call_fndecl (last_stmt
),
3797 gimple_call_set_lhs (pattern_stmt
, var
);
3798 gimple_call_set_fntype (as_a
<gcall
*> (pattern_stmt
),
3799 gimple_call_fntype (last_stmt
));
3800 return pattern_stmt
;
3804 oprnd1
= build_int_cst (integer_type_node
, 8);
3805 rhs_code
= LROTATE_EXPR
;
3811 if (TREE_CODE (oprnd0
) != SSA_NAME
3812 || !INTEGRAL_TYPE_P (type
)
3813 || TYPE_PRECISION (TREE_TYPE (lhs
)) != TYPE_PRECISION (type
))
3816 stmt_vec_info def_stmt_info
;
3817 if (!vect_is_simple_use (oprnd1
, vinfo
, &dt
, &def_stmt_info
, &def_stmt
))
3820 if (dt
!= vect_internal_def
3821 && dt
!= vect_constant_def
3822 && dt
!= vect_external_def
)
3825 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
3826 if (vectype
== NULL_TREE
)
3829 /* If vector/vector or vector/scalar rotate is supported by the target,
3830 don't do anything here. */
3831 optab1
= optab_for_tree_code (rhs_code
, vectype
, optab_vector
);
3833 && optab_handler (optab1
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3838 if (!useless_type_conversion_p (type
, TREE_TYPE (oprnd0
)))
3840 def
= vect_recog_temp_ssa_var (type
, NULL
);
3841 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3842 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
3846 /* Pattern detected. */
3847 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3849 *type_out
= vectype
;
3851 /* Pattern supported. Create a stmt to be used to replace the
3853 var
= vect_recog_temp_ssa_var (type
, NULL
);
3854 pattern_stmt
= gimple_build_assign (var
, LROTATE_EXPR
, oprnd0
,
3856 return pattern_stmt
;
3861 if (is_a
<bb_vec_info
> (vinfo
) || dt
!= vect_internal_def
)
3863 optab2
= optab_for_tree_code (rhs_code
, vectype
, optab_scalar
);
3865 && optab_handler (optab2
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
)
3869 tree utype
= unsigned_type_for (type
);
3870 tree uvectype
= get_vectype_for_scalar_type (vinfo
, utype
);
3874 /* If vector/vector or vector/scalar shifts aren't supported by the target,
3875 don't do anything here either. */
3876 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_vector
);
3877 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_vector
);
3879 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
3881 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
3883 if (! is_a
<bb_vec_info
> (vinfo
) && dt
== vect_internal_def
)
3885 optab1
= optab_for_tree_code (LSHIFT_EXPR
, uvectype
, optab_scalar
);
3886 optab2
= optab_for_tree_code (RSHIFT_EXPR
, uvectype
, optab_scalar
);
3888 || optab_handler (optab1
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
3890 || optab_handler (optab2
, TYPE_MODE (uvectype
)) == CODE_FOR_nothing
)
3894 *type_out
= vectype
;
3896 if (!useless_type_conversion_p (utype
, TREE_TYPE (oprnd0
)))
3898 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3899 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd0
);
3900 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3904 if (dt
== vect_external_def
&& TREE_CODE (oprnd1
) == SSA_NAME
)
3905 ext_def
= vect_get_external_def_edge (vinfo
, oprnd1
);
3908 scalar_int_mode mode
= SCALAR_INT_TYPE_MODE (utype
);
3909 if (dt
!= vect_internal_def
|| TYPE_MODE (TREE_TYPE (oprnd1
)) == mode
)
3911 else if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
3913 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
3914 if (TYPE_MODE (TREE_TYPE (rhs1
)) == mode
3915 && TYPE_PRECISION (TREE_TYPE (rhs1
))
3916 == TYPE_PRECISION (type
))
3920 if (def
== NULL_TREE
)
3922 def
= vect_recog_temp_ssa_var (utype
, NULL
);
3923 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
3924 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3926 stype
= TREE_TYPE (def
);
3928 if (TREE_CODE (def
) == INTEGER_CST
)
3930 if (!tree_fits_uhwi_p (def
)
3931 || tree_to_uhwi (def
) >= GET_MODE_PRECISION (mode
)
3932 || integer_zerop (def
))
3934 def2
= build_int_cst (stype
,
3935 GET_MODE_PRECISION (mode
) - tree_to_uhwi (def
));
3939 tree vecstype
= get_vectype_for_scalar_type (vinfo
, stype
);
3941 if (vecstype
== NULL_TREE
)
3943 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
3944 def_stmt
= gimple_build_assign (def2
, NEGATE_EXPR
, def
);
3948 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
3949 gcc_assert (!new_bb
);
3952 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
3954 def2
= vect_recog_temp_ssa_var (stype
, NULL
);
3955 tree mask
= build_int_cst (stype
, GET_MODE_PRECISION (mode
) - 1);
3956 def_stmt
= gimple_build_assign (def2
, BIT_AND_EXPR
,
3957 gimple_assign_lhs (def_stmt
), mask
);
3961 = gsi_insert_on_edge_immediate (ext_def
, def_stmt
);
3962 gcc_assert (!new_bb
);
3965 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
3968 var1
= vect_recog_temp_ssa_var (utype
, NULL
);
3969 def_stmt
= gimple_build_assign (var1
, rhs_code
== LROTATE_EXPR
3970 ? LSHIFT_EXPR
: RSHIFT_EXPR
,
3972 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3974 var2
= vect_recog_temp_ssa_var (utype
, NULL
);
3975 def_stmt
= gimple_build_assign (var2
, rhs_code
== LROTATE_EXPR
3976 ? RSHIFT_EXPR
: LSHIFT_EXPR
,
3978 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, uvectype
);
3980 /* Pattern detected. */
3981 vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt
);
3983 /* Pattern supported. Create a stmt to be used to replace the pattern. */
3984 var
= vect_recog_temp_ssa_var (utype
, NULL
);
3985 pattern_stmt
= gimple_build_assign (var
, BIT_IOR_EXPR
, var1
, var2
);
3987 if (!useless_type_conversion_p (type
, utype
))
3989 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, uvectype
);
3990 tree result
= vect_recog_temp_ssa_var (type
, NULL
);
3991 pattern_stmt
= gimple_build_assign (result
, NOP_EXPR
, var
);
3993 return pattern_stmt
;
3996 /* Detect a vector by vector shift pattern that wouldn't be otherwise
4004 S3 res_T = b_T op a_t;
4006 where type 'TYPE' is a type with different size than 'type',
4007 and op is <<, >> or rotate.
4012 TYPE b_T, c_T, res_T;
4015 S1 a_t = (type) c_T;
4017 S3 res_T = b_T op a_t;
4021 * STMT_VINFO: The stmt from which the pattern search begins,
4022 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
4023 with a shift/rotate which has same type on both operands, in the
4024 second case just b_T op c_T, in the first case with added cast
4025 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
4029 * TYPE_OUT: The type of the output of this pattern.
4031 * Return value: A new stmt that will be used to replace the shift/rotate
4035 vect_recog_vector_vector_shift_pattern (vec_info
*vinfo
,
4036 stmt_vec_info stmt_vinfo
,
4039 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4040 tree oprnd0
, oprnd1
, lhs
, var
;
4041 gimple
*pattern_stmt
;
4042 enum tree_code rhs_code
;
4044 if (!is_gimple_assign (last_stmt
))
4047 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4059 lhs
= gimple_assign_lhs (last_stmt
);
4060 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4061 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4062 if (TREE_CODE (oprnd0
) != SSA_NAME
4063 || TREE_CODE (oprnd1
) != SSA_NAME
4064 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
4065 || !INTEGRAL_TYPE_P (TREE_TYPE (oprnd0
))
4066 || !type_has_mode_precision_p (TREE_TYPE (oprnd1
))
4067 || TYPE_PRECISION (TREE_TYPE (lhs
))
4068 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4071 stmt_vec_info def_vinfo
= vect_get_internal_def (vinfo
, oprnd1
);
4075 *type_out
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (oprnd0
));
4076 if (*type_out
== NULL_TREE
)
4079 tree def
= NULL_TREE
;
4080 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_vinfo
->stmt
);
4081 if (def_stmt
&& gimple_assign_cast_p (def_stmt
))
4083 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
4084 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
4085 && TYPE_PRECISION (TREE_TYPE (rhs1
))
4086 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
4088 if (TYPE_PRECISION (TREE_TYPE (oprnd1
))
4089 >= TYPE_PRECISION (TREE_TYPE (rhs1
)))
4094 = build_low_bits_mask (TREE_TYPE (rhs1
),
4095 TYPE_PRECISION (TREE_TYPE (oprnd1
)));
4096 def
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
4097 def_stmt
= gimple_build_assign (def
, BIT_AND_EXPR
, rhs1
, mask
);
4098 tree vecstype
= get_vectype_for_scalar_type (vinfo
,
4100 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecstype
);
4105 if (def
== NULL_TREE
)
4107 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4108 def_stmt
= gimple_build_assign (def
, NOP_EXPR
, oprnd1
);
4109 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4112 /* Pattern detected. */
4113 vect_pattern_detected ("vect_recog_vector_vector_shift_pattern", last_stmt
);
4115 /* Pattern supported. Create a stmt to be used to replace the pattern. */
4116 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
4117 pattern_stmt
= gimple_build_assign (var
, rhs_code
, oprnd0
, def
);
4119 return pattern_stmt
;
4122 /* Return true iff the target has a vector optab implementing the operation
4123 CODE on type VECTYPE. */
4126 target_has_vecop_for_code (tree_code code
, tree vectype
)
4128 optab voptab
= optab_for_tree_code (code
, vectype
, optab_vector
);
4130 && optab_handler (voptab
, TYPE_MODE (vectype
)) != CODE_FOR_nothing
;
4133 /* Verify that the target has optabs of VECTYPE to perform all the steps
4134 needed by the multiplication-by-immediate synthesis algorithm described by
4135 ALG and VAR. If SYNTH_SHIFT_P is true ensure that vector addition is
4136 present. Return true iff the target supports all the steps. */
4139 target_supports_mult_synth_alg (struct algorithm
*alg
, mult_variant var
,
4140 tree vectype
, bool synth_shift_p
)
4142 if (alg
->op
[0] != alg_zero
&& alg
->op
[0] != alg_m
)
4145 bool supports_vminus
= target_has_vecop_for_code (MINUS_EXPR
, vectype
);
4146 bool supports_vplus
= target_has_vecop_for_code (PLUS_EXPR
, vectype
);
4148 if (var
== negate_variant
4149 && !target_has_vecop_for_code (NEGATE_EXPR
, vectype
))
4152 /* If we must synthesize shifts with additions make sure that vector
4153 addition is available. */
4154 if ((var
== add_variant
|| synth_shift_p
) && !supports_vplus
)
4157 for (int i
= 1; i
< alg
->ops
; i
++)
4165 case alg_add_factor
:
4166 if (!supports_vplus
)
4171 case alg_sub_factor
:
4172 if (!supports_vminus
)
4178 case alg_impossible
:
4188 /* Synthesize a left shift of OP by AMNT bits using a series of additions and
4189 putting the final result in DEST. Append all statements but the last into
4190 VINFO. Return the last statement. */
4193 synth_lshift_by_additions (vec_info
*vinfo
,
4194 tree dest
, tree op
, HOST_WIDE_INT amnt
,
4195 stmt_vec_info stmt_info
)
4198 tree itype
= TREE_TYPE (op
);
4200 gcc_assert (amnt
>= 0);
4201 for (i
= 0; i
< amnt
; i
++)
4203 tree tmp_var
= (i
< amnt
- 1) ? vect_recog_temp_ssa_var (itype
, NULL
)
4206 = gimple_build_assign (tmp_var
, PLUS_EXPR
, prev_res
, prev_res
);
4209 append_pattern_def_seq (vinfo
, stmt_info
, stmt
);
4217 /* Helper for vect_synth_mult_by_constant. Apply a binary operation
4218 CODE to operands OP1 and OP2, creating a new temporary SSA var in
4219 the process if necessary. Append the resulting assignment statements
4220 to the sequence in STMT_VINFO. Return the SSA variable that holds the
4221 result of the binary operation. If SYNTH_SHIFT_P is true synthesize
4222 left shifts using additions. */
4225 apply_binop_and_append_stmt (vec_info
*vinfo
,
4226 tree_code code
, tree op1
, tree op2
,
4227 stmt_vec_info stmt_vinfo
, bool synth_shift_p
)
4229 if (integer_zerop (op2
)
4230 && (code
== LSHIFT_EXPR
4231 || code
== PLUS_EXPR
))
4233 gcc_assert (TREE_CODE (op1
) == SSA_NAME
);
4238 tree itype
= TREE_TYPE (op1
);
4239 tree tmp_var
= vect_recog_temp_ssa_var (itype
, NULL
);
4241 if (code
== LSHIFT_EXPR
4244 stmt
= synth_lshift_by_additions (vinfo
, tmp_var
, op1
,
4245 TREE_INT_CST_LOW (op2
), stmt_vinfo
);
4246 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4250 stmt
= gimple_build_assign (tmp_var
, code
, op1
, op2
);
4251 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4255 /* Synthesize a multiplication of OP by an INTEGER_CST VAL using shifts
4256 and simple arithmetic operations to be vectorized. Record the statements
4257 produced in STMT_VINFO and return the last statement in the sequence or
4258 NULL if it's not possible to synthesize such a multiplication.
4259 This function mirrors the behavior of expand_mult_const in expmed.cc but
4260 works on tree-ssa form. */
4263 vect_synth_mult_by_constant (vec_info
*vinfo
, tree op
, tree val
,
4264 stmt_vec_info stmt_vinfo
)
4266 tree itype
= TREE_TYPE (op
);
4267 machine_mode mode
= TYPE_MODE (itype
);
4268 struct algorithm alg
;
4269 mult_variant variant
;
4270 if (!tree_fits_shwi_p (val
))
4273 /* Multiplication synthesis by shifts, adds and subs can introduce
4274 signed overflow where the original operation didn't. Perform the
4275 operations on an unsigned type and cast back to avoid this.
4276 In the future we may want to relax this for synthesis algorithms
4277 that we can prove do not cause unexpected overflow. */
4278 bool cast_to_unsigned_p
= !TYPE_OVERFLOW_WRAPS (itype
);
4280 tree multtype
= cast_to_unsigned_p
? unsigned_type_for (itype
) : itype
;
4281 tree vectype
= get_vectype_for_scalar_type (vinfo
, multtype
);
4285 /* Targets that don't support vector shifts but support vector additions
4286 can synthesize shifts that way. */
4287 bool synth_shift_p
= !vect_supportable_shift (vinfo
, LSHIFT_EXPR
, multtype
);
4289 HOST_WIDE_INT hwval
= tree_to_shwi (val
);
4290 /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
4291 The vectorizer's benefit analysis will decide whether it's beneficial
4293 bool possible
= choose_mult_variant (VECTOR_MODE_P (TYPE_MODE (vectype
))
4294 ? TYPE_MODE (vectype
) : mode
,
4295 hwval
, &alg
, &variant
, MAX_COST
);
4299 if (!target_supports_mult_synth_alg (&alg
, variant
, vectype
, synth_shift_p
))
4304 /* Clear out the sequence of statements so we can populate it below. */
4305 gimple
*stmt
= NULL
;
4307 if (cast_to_unsigned_p
)
4309 tree tmp_op
= vect_recog_temp_ssa_var (multtype
, NULL
);
4310 stmt
= gimple_build_assign (tmp_op
, CONVERT_EXPR
, op
);
4311 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4315 if (alg
.op
[0] == alg_zero
)
4316 accumulator
= build_int_cst (multtype
, 0);
4320 bool needs_fixup
= (variant
== negate_variant
)
4321 || (variant
== add_variant
);
4323 for (int i
= 1; i
< alg
.ops
; i
++)
4325 tree shft_log
= build_int_cst (multtype
, alg
.log
[i
]);
4326 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4327 tree tmp_var
= NULL_TREE
;
4334 = synth_lshift_by_additions (vinfo
, accum_tmp
, accumulator
,
4335 alg
.log
[i
], stmt_vinfo
);
4337 stmt
= gimple_build_assign (accum_tmp
, LSHIFT_EXPR
, accumulator
,
4342 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
, shft_log
,
4343 stmt_vinfo
, synth_shift_p
);
4344 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4348 tmp_var
= apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, op
,
4349 shft_log
, stmt_vinfo
,
4351 /* In some algorithms the first step involves zeroing the
4352 accumulator. If subtracting from such an accumulator
4353 just emit the negation directly. */
4354 if (integer_zerop (accumulator
))
4355 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, tmp_var
);
4357 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, accumulator
,
4362 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4363 shft_log
, stmt_vinfo
, synth_shift_p
);
4364 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, tmp_var
, op
);
4368 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4369 shft_log
, stmt_vinfo
, synth_shift_p
);
4370 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
, op
);
4372 case alg_add_factor
:
4374 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4375 shft_log
, stmt_vinfo
, synth_shift_p
);
4376 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
,
4379 case alg_sub_factor
:
4381 = apply_binop_and_append_stmt (vinfo
, LSHIFT_EXPR
, accumulator
,
4382 shft_log
, stmt_vinfo
, synth_shift_p
);
4383 stmt
= gimple_build_assign (accum_tmp
, MINUS_EXPR
, tmp_var
,
4389 /* We don't want to append the last stmt in the sequence to stmt_vinfo
4390 but rather return it directly. */
4392 if ((i
< alg
.ops
- 1) || needs_fixup
|| cast_to_unsigned_p
)
4393 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4394 accumulator
= accum_tmp
;
4396 if (variant
== negate_variant
)
4398 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4399 stmt
= gimple_build_assign (accum_tmp
, NEGATE_EXPR
, accumulator
);
4400 accumulator
= accum_tmp
;
4401 if (cast_to_unsigned_p
)
4402 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4404 else if (variant
== add_variant
)
4406 tree accum_tmp
= vect_recog_temp_ssa_var (multtype
, NULL
);
4407 stmt
= gimple_build_assign (accum_tmp
, PLUS_EXPR
, accumulator
, op
);
4408 accumulator
= accum_tmp
;
4409 if (cast_to_unsigned_p
)
4410 append_pattern_def_seq (vinfo
, stmt_vinfo
, stmt
);
4412 /* Move back to a signed if needed. */
4413 if (cast_to_unsigned_p
)
4415 tree accum_tmp
= vect_recog_temp_ssa_var (itype
, NULL
);
4416 stmt
= gimple_build_assign (accum_tmp
, CONVERT_EXPR
, accumulator
);
4422 /* Detect multiplication by constant and convert it into a sequence of
4423 shifts and additions, subtractions, negations. We reuse the
4424 choose_mult_variant algorithms from expmed.cc
4428 STMT_VINFO: The stmt from which the pattern search begins,
4433 * TYPE_OUT: The type of the output of this pattern.
4435 * Return value: A new stmt that will be used to replace
4436 the multiplication. */
4439 vect_recog_mult_pattern (vec_info
*vinfo
,
4440 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4442 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4443 tree oprnd0
, oprnd1
, vectype
, itype
;
4444 gimple
*pattern_stmt
;
4446 if (!is_gimple_assign (last_stmt
))
4449 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
4452 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4453 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4454 itype
= TREE_TYPE (oprnd0
);
4456 if (TREE_CODE (oprnd0
) != SSA_NAME
4457 || TREE_CODE (oprnd1
) != INTEGER_CST
4458 || !INTEGRAL_TYPE_P (itype
)
4459 || !type_has_mode_precision_p (itype
))
4462 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4463 if (vectype
== NULL_TREE
)
4466 /* If the target can handle vectorized multiplication natively,
4467 don't attempt to optimize this. */
4468 optab mul_optab
= optab_for_tree_code (MULT_EXPR
, vectype
, optab_default
);
4469 if (mul_optab
!= unknown_optab
)
4471 machine_mode vec_mode
= TYPE_MODE (vectype
);
4472 int icode
= (int) optab_handler (mul_optab
, vec_mode
);
4473 if (icode
!= CODE_FOR_nothing
)
4477 pattern_stmt
= vect_synth_mult_by_constant (vinfo
,
4478 oprnd0
, oprnd1
, stmt_vinfo
);
4482 /* Pattern detected. */
4483 vect_pattern_detected ("vect_recog_mult_pattern", last_stmt
);
4485 *type_out
= vectype
;
4487 return pattern_stmt
;
4490 extern bool gimple_unsigned_integer_sat_add (tree
, tree
*, tree (*)(tree
));
4493 * Try to detect saturation add pattern (SAT_ADD), aka below gimple:
4496 * _9 = (long unsigned int) _8;
4500 * And then simplied to
4501 * _12 = .SAT_ADD (_4, _6);
4505 vect_recog_sat_add_pattern (vec_info
*vinfo
, stmt_vec_info stmt_vinfo
,
4508 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
4510 if (!is_gimple_assign (last_stmt
))
4514 tree lhs
= gimple_assign_lhs (last_stmt
);
4516 if (gimple_unsigned_integer_sat_add (lhs
, res_ops
, NULL
))
4518 tree itype
= TREE_TYPE (res_ops
[0]);
4519 tree vtype
= get_vectype_for_scalar_type (vinfo
, itype
);
4521 if (vtype
!= NULL_TREE
4522 && direct_internal_fn_supported_p (IFN_SAT_ADD
, vtype
,
4526 gcall
*call
= gimple_build_call_internal (IFN_SAT_ADD
, 2, res_ops
[0],
4529 gimple_call_set_lhs (call
, vect_recog_temp_ssa_var (itype
, NULL
));
4530 gimple_call_set_nothrow (call
, /* nothrow_p */ false);
4531 gimple_set_location (call
, gimple_location (last_stmt
));
4533 vect_pattern_detected ("vect_recog_sat_add_pattern", last_stmt
);
4541 /* Detect a signed division by a constant that wouldn't be
4542 otherwise vectorized:
4548 where type 'type' is an integral type and N is a constant.
4550 Similarly handle modulo by a constant:
4556 * STMT_VINFO: The stmt from which the pattern search begins,
4557 i.e. the division stmt. S1 is replaced by if N is a power
4558 of two constant and type is signed:
4559 S3 y_t = b_t < 0 ? N - 1 : 0;
4561 S1' a_t = x_t >> log2 (N);
4563 S4 is replaced if N is a power of two constant and
4564 type is signed by (where *_T temporaries have unsigned type):
4565 S9 y_T = b_t < 0 ? -1U : 0U;
4566 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
4567 S7 z_t = (type) z_T;
4569 S5 x_t = w_t & (N - 1);
4570 S4' a_t = x_t - z_t;
4574 * TYPE_OUT: The type of the output of this pattern.
4576 * Return value: A new stmt that will be used to replace the division
4577 S1 or modulo S4 stmt. */
4580 vect_recog_divmod_pattern (vec_info
*vinfo
,
4581 stmt_vec_info stmt_vinfo
, tree
*type_out
)
4583 gimple
*last_stmt
= stmt_vinfo
->stmt
;
4584 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
4585 gimple
*pattern_stmt
, *def_stmt
;
4586 enum tree_code rhs_code
;
4591 if (!is_gimple_assign (last_stmt
))
4594 rhs_code
= gimple_assign_rhs_code (last_stmt
);
4597 case TRUNC_DIV_EXPR
:
4598 case EXACT_DIV_EXPR
:
4599 case TRUNC_MOD_EXPR
:
4605 oprnd0
= gimple_assign_rhs1 (last_stmt
);
4606 oprnd1
= gimple_assign_rhs2 (last_stmt
);
4607 itype
= TREE_TYPE (oprnd0
);
4608 if (TREE_CODE (oprnd0
) != SSA_NAME
4609 || TREE_CODE (oprnd1
) != INTEGER_CST
4610 || TREE_CODE (itype
) != INTEGER_TYPE
4611 || !type_has_mode_precision_p (itype
))
4614 scalar_int_mode itype_mode
= SCALAR_INT_TYPE_MODE (itype
);
4615 vectype
= get_vectype_for_scalar_type (vinfo
, itype
);
4616 if (vectype
== NULL_TREE
)
4619 if (optimize_bb_for_size_p (gimple_bb (last_stmt
)))
4621 /* If the target can handle vectorized division or modulo natively,
4622 don't attempt to optimize this, since native division is likely
4623 to give smaller code. */
4624 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
4625 if (optab
!= unknown_optab
)
4627 machine_mode vec_mode
= TYPE_MODE (vectype
);
4628 int icode
= (int) optab_handler (optab
, vec_mode
);
4629 if (icode
!= CODE_FOR_nothing
)
4634 prec
= TYPE_PRECISION (itype
);
4635 if (integer_pow2p (oprnd1
))
4637 if (TYPE_UNSIGNED (itype
) || tree_int_cst_sgn (oprnd1
) != 1)
4640 /* Pattern detected. */
4641 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
4643 *type_out
= vectype
;
4645 /* Check if the target supports this internal function. */
4646 internal_fn ifn
= IFN_DIV_POW2
;
4647 if (direct_internal_fn_supported_p (ifn
, vectype
, OPTIMIZE_FOR_SPEED
))
4649 tree shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4651 tree var_div
= vect_recog_temp_ssa_var (itype
, NULL
);
4652 gimple
*div_stmt
= gimple_build_call_internal (ifn
, 2, oprnd0
, shift
);
4653 gimple_call_set_lhs (div_stmt
, var_div
);
4655 if (rhs_code
== TRUNC_MOD_EXPR
)
4657 append_pattern_def_seq (vinfo
, stmt_vinfo
, div_stmt
);
4659 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4660 LSHIFT_EXPR
, var_div
, shift
);
4661 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4663 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4665 gimple_assign_lhs (def_stmt
));
4668 pattern_stmt
= div_stmt
;
4669 gimple_set_location (pattern_stmt
, gimple_location (last_stmt
));
4671 return pattern_stmt
;
4674 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
,
4675 build_int_cst (itype
, 0));
4676 if (rhs_code
== TRUNC_DIV_EXPR
4677 || rhs_code
== EXACT_DIV_EXPR
)
4679 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
4682 = gimple_build_assign (var
, COND_EXPR
, cond
,
4683 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
4684 build_int_cst (itype
, 1)),
4685 build_int_cst (itype
, 0));
4686 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4687 var
= vect_recog_temp_ssa_var (itype
, NULL
);
4689 = gimple_build_assign (var
, PLUS_EXPR
, oprnd0
,
4690 gimple_assign_lhs (def_stmt
));
4691 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4693 shift
= build_int_cst (itype
, tree_log2 (oprnd1
));
4695 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4696 RSHIFT_EXPR
, var
, shift
);
4701 if (compare_tree_int (oprnd1
, 2) == 0)
4703 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4704 def_stmt
= gimple_build_assign (signmask
, COND_EXPR
, cond
,
4705 build_int_cst (itype
, 1),
4706 build_int_cst (itype
, 0));
4707 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4712 = build_nonstandard_integer_type (prec
, 1);
4713 tree vecutype
= get_vectype_for_scalar_type (vinfo
, utype
);
4715 = build_int_cst (utype
, GET_MODE_BITSIZE (itype_mode
)
4716 - tree_log2 (oprnd1
));
4717 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
4719 def_stmt
= gimple_build_assign (var
, COND_EXPR
, cond
,
4720 build_int_cst (utype
, -1),
4721 build_int_cst (utype
, 0));
4722 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4723 var
= vect_recog_temp_ssa_var (utype
, NULL
);
4724 def_stmt
= gimple_build_assign (var
, RSHIFT_EXPR
,
4725 gimple_assign_lhs (def_stmt
),
4727 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecutype
);
4728 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
4730 = gimple_build_assign (signmask
, NOP_EXPR
, var
);
4731 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4734 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4735 PLUS_EXPR
, oprnd0
, signmask
);
4736 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4738 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4739 BIT_AND_EXPR
, gimple_assign_lhs (def_stmt
),
4740 fold_build2 (MINUS_EXPR
, itype
, oprnd1
,
4741 build_int_cst (itype
, 1)));
4742 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4745 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
4746 MINUS_EXPR
, gimple_assign_lhs (def_stmt
),
4750 return pattern_stmt
;
4753 if ((cst
= uniform_integer_cst_p (oprnd1
))
4754 && TYPE_UNSIGNED (itype
)
4755 && rhs_code
== TRUNC_DIV_EXPR
4757 && targetm
.vectorize
.preferred_div_as_shifts_over_mult (vectype
))
4759 /* We can use the relationship:
4761 x // N == ((x+N+2) // (N+1) + x) // (N+1) for 0 <= x < N(N+3)
4763 to optimize cases where N+1 is a power of 2, and where // (N+1)
4764 is therefore a shift right. When operating in modes that are
4765 multiples of a byte in size, there are two cases:
4767 (1) N(N+3) is not representable, in which case the question
4768 becomes whether the replacement expression overflows.
4769 It is enough to test that x+N+2 does not overflow,
4770 i.e. that x < MAX-(N+1).
4772 (2) N(N+3) is representable, in which case it is the (only)
4773 bound that we need to check.
4775 ??? For now we just handle the case where // (N+1) is a shift
4776 right by half the precision, since some architectures can
4777 optimize the associated addition and shift combinations
4778 into single instructions. */
4780 auto wcst
= wi::to_wide (cst
);
4781 int pow
= wi::exact_log2 (wcst
+ 1);
4782 if (pow
== prec
/ 2)
4784 gimple
*stmt
= SSA_NAME_DEF_STMT (oprnd0
);
4786 gimple_ranger ranger
;
4789 /* Check that no overflow will occur. If we don't have range
4790 information we can't perform the optimization. */
4792 if (ranger
.range_of_expr (r
, oprnd0
, stmt
) && !r
.undefined_p ())
4794 wide_int max
= r
.upper_bound ();
4795 wide_int one
= wi::shwi (1, prec
);
4796 wide_int adder
= wi::add (one
, wi::lshift (one
, pow
));
4797 wi::overflow_type ovf
;
4798 wi::add (max
, adder
, UNSIGNED
, &ovf
);
4799 if (ovf
== wi::OVF_NONE
)
4801 *type_out
= vectype
;
4802 tree tadder
= wide_int_to_tree (itype
, adder
);
4803 tree rshift
= wide_int_to_tree (itype
, pow
);
4805 tree new_lhs1
= vect_recog_temp_ssa_var (itype
, NULL
);
4807 = gimple_build_assign (new_lhs1
, PLUS_EXPR
, oprnd0
, tadder
);
4808 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
4810 tree new_lhs2
= vect_recog_temp_ssa_var (itype
, NULL
);
4811 patt1
= gimple_build_assign (new_lhs2
, RSHIFT_EXPR
, new_lhs1
,
4813 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
4815 tree new_lhs3
= vect_recog_temp_ssa_var (itype
, NULL
);
4816 patt1
= gimple_build_assign (new_lhs3
, PLUS_EXPR
, new_lhs2
,
4818 append_pattern_def_seq (vinfo
, stmt_vinfo
, patt1
, vectype
);
4820 tree new_lhs4
= vect_recog_temp_ssa_var (itype
, NULL
);
4821 pattern_stmt
= gimple_build_assign (new_lhs4
, RSHIFT_EXPR
,
4824 return pattern_stmt
;
4830 if (prec
> HOST_BITS_PER_WIDE_INT
4831 || integer_zerop (oprnd1
))
4834 if (!can_mult_highpart_p (TYPE_MODE (vectype
), TYPE_UNSIGNED (itype
)))
4837 if (TYPE_UNSIGNED (itype
))
4839 unsigned HOST_WIDE_INT mh
, ml
;
4840 int pre_shift
, post_shift
;
4841 unsigned HOST_WIDE_INT d
= (TREE_INT_CST_LOW (oprnd1
)
4842 & GET_MODE_MASK (itype_mode
));
4843 tree t1
, t2
, t3
, t4
;
4845 if (d
>= (HOST_WIDE_INT_1U
<< (prec
- 1)))
4846 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
4849 /* Find a suitable multiplier and right shift count instead of
4850 directly dividing by D. */
4851 mh
= choose_multiplier (d
, prec
, prec
, &ml
, &post_shift
);
4853 /* If the suggested multiplier is more than PREC bits, we can do better
4854 for even divisors, using an initial right shift. */
4855 if (mh
!= 0 && (d
& 1) == 0)
4857 pre_shift
= ctz_or_zero (d
);
4858 mh
= choose_multiplier (d
>> pre_shift
, prec
, prec
- pre_shift
,
4867 if (post_shift
- 1 >= prec
)
4870 /* t1 = oprnd0 h* ml;
4874 q = t4 >> (post_shift - 1); */
4875 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
4876 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
4877 build_int_cst (itype
, ml
));
4878 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4880 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
4882 = gimple_build_assign (t2
, MINUS_EXPR
, oprnd0
, t1
);
4883 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4885 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
4887 = gimple_build_assign (t3
, RSHIFT_EXPR
, t2
, integer_one_node
);
4888 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4890 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
4892 = gimple_build_assign (t4
, PLUS_EXPR
, t1
, t3
);
4894 if (post_shift
!= 1)
4896 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4898 q
= vect_recog_temp_ssa_var (itype
, NULL
);
4900 = gimple_build_assign (q
, RSHIFT_EXPR
, t4
,
4901 build_int_cst (itype
, post_shift
- 1));
4906 pattern_stmt
= def_stmt
;
4911 if (pre_shift
>= prec
|| post_shift
>= prec
)
4914 /* t1 = oprnd0 >> pre_shift;
4916 q = t2 >> post_shift; */
4919 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
4921 = gimple_build_assign (t1
, RSHIFT_EXPR
, oprnd0
,
4922 build_int_cst (NULL
, pre_shift
));
4923 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4928 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
4929 def_stmt
= gimple_build_assign (t2
, MULT_HIGHPART_EXPR
, t1
,
4930 build_int_cst (itype
, ml
));
4934 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4936 q
= vect_recog_temp_ssa_var (itype
, NULL
);
4938 = gimple_build_assign (q
, RSHIFT_EXPR
, t2
,
4939 build_int_cst (itype
, post_shift
));
4944 pattern_stmt
= def_stmt
;
4949 unsigned HOST_WIDE_INT ml
;
4951 HOST_WIDE_INT d
= TREE_INT_CST_LOW (oprnd1
);
4952 unsigned HOST_WIDE_INT abs_d
;
4954 tree t1
, t2
, t3
, t4
;
4956 /* Give up for -1. */
4960 /* Since d might be INT_MIN, we have to cast to
4961 unsigned HOST_WIDE_INT before negating to avoid
4962 undefined signed overflow. */
4964 ? (unsigned HOST_WIDE_INT
) d
4965 : - (unsigned HOST_WIDE_INT
) d
);
4967 /* n rem d = n rem -d */
4968 if (rhs_code
== TRUNC_MOD_EXPR
&& d
< 0)
4971 oprnd1
= build_int_cst (itype
, abs_d
);
4973 if (HOST_BITS_PER_WIDE_INT
>= prec
4974 && abs_d
== HOST_WIDE_INT_1U
<< (prec
- 1))
4975 /* This case is not handled correctly below. */
4978 choose_multiplier (abs_d
, prec
, prec
- 1, &ml
, &post_shift
);
4979 if (ml
>= HOST_WIDE_INT_1U
<< (prec
- 1))
4982 ml
|= HOST_WIDE_INT_M1U
<< (prec
- 1);
4984 if (post_shift
>= prec
)
4987 /* t1 = oprnd0 h* ml; */
4988 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
4989 def_stmt
= gimple_build_assign (t1
, MULT_HIGHPART_EXPR
, oprnd0
,
4990 build_int_cst (itype
, ml
));
4994 /* t2 = t1 + oprnd0; */
4995 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
4996 t2
= vect_recog_temp_ssa_var (itype
, NULL
);
4997 def_stmt
= gimple_build_assign (t2
, PLUS_EXPR
, t1
, oprnd0
);
5004 /* t3 = t2 >> post_shift; */
5005 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5006 t3
= vect_recog_temp_ssa_var (itype
, NULL
);
5007 def_stmt
= gimple_build_assign (t3
, RSHIFT_EXPR
, t2
,
5008 build_int_cst (itype
, post_shift
));
5015 get_range_query (cfun
)->range_of_expr (r
, oprnd0
);
5016 if (!r
.varying_p () && !r
.undefined_p ())
5018 if (!wi::neg_p (r
.lower_bound (), TYPE_SIGN (itype
)))
5020 else if (wi::neg_p (r
.upper_bound (), TYPE_SIGN (itype
)))
5024 if (msb
== 0 && d
>= 0)
5028 pattern_stmt
= def_stmt
;
5032 /* t4 = oprnd0 >> (prec - 1);
5033 or if we know from VRP that oprnd0 >= 0
5035 or if we know from VRP that oprnd0 < 0
5037 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5038 t4
= vect_recog_temp_ssa_var (itype
, NULL
);
5040 def_stmt
= gimple_build_assign (t4
, INTEGER_CST
,
5041 build_int_cst (itype
, msb
));
5043 def_stmt
= gimple_build_assign (t4
, RSHIFT_EXPR
, oprnd0
,
5044 build_int_cst (itype
, prec
- 1));
5045 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5047 /* q = t3 - t4; or q = t4 - t3; */
5048 q
= vect_recog_temp_ssa_var (itype
, NULL
);
5049 pattern_stmt
= gimple_build_assign (q
, MINUS_EXPR
, d
< 0 ? t4
: t3
,
5054 if (rhs_code
== TRUNC_MOD_EXPR
)
5058 /* We divided. Now finish by:
5061 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
);
5063 t1
= vect_recog_temp_ssa_var (itype
, NULL
);
5064 def_stmt
= gimple_build_assign (t1
, MULT_EXPR
, q
, oprnd1
);
5065 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
);
5067 r
= vect_recog_temp_ssa_var (itype
, NULL
);
5068 pattern_stmt
= gimple_build_assign (r
, MINUS_EXPR
, oprnd0
, t1
);
5071 /* Pattern detected. */
5072 vect_pattern_detected ("vect_recog_divmod_pattern", last_stmt
);
5074 *type_out
= vectype
;
5075 return pattern_stmt
;
5078 /* Function vect_recog_mixed_size_cond_pattern
5080 Try to find the following pattern:
5085 S1 a_T = x_t CMP y_t ? b_T : c_T;
5087 where type 'TYPE' is an integral type which has different size
5088 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
5089 than 'type', the constants need to fit into an integer type
5090 with the same width as 'type') or results of conversion from 'type'.
5094 * STMT_VINFO: The stmt from which the pattern search begins.
5098 * TYPE_OUT: The type of the output of this pattern.
5100 * Return value: A new stmt that will be used to replace the pattern.
5101 Additionally a def_stmt is added.
5103 a_it = x_t CMP y_t ? b_it : c_it;
5104 a_T = (TYPE) a_it; */
5107 vect_recog_mixed_size_cond_pattern (vec_info
*vinfo
,
5108 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5110 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5111 tree cond_expr
, then_clause
, else_clause
;
5112 tree type
, vectype
, comp_vectype
, itype
= NULL_TREE
, vecitype
;
5113 gimple
*pattern_stmt
, *def_stmt
;
5114 tree orig_type0
= NULL_TREE
, orig_type1
= NULL_TREE
;
5115 gimple
*def_stmt0
= NULL
, *def_stmt1
= NULL
;
5117 tree comp_scalar_type
;
5119 if (!is_gimple_assign (last_stmt
)
5120 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
5121 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
5124 cond_expr
= gimple_assign_rhs1 (last_stmt
);
5125 then_clause
= gimple_assign_rhs2 (last_stmt
);
5126 else_clause
= gimple_assign_rhs3 (last_stmt
);
5128 if (!COMPARISON_CLASS_P (cond_expr
))
5131 comp_scalar_type
= TREE_TYPE (TREE_OPERAND (cond_expr
, 0));
5132 comp_vectype
= get_vectype_for_scalar_type (vinfo
, comp_scalar_type
);
5133 if (comp_vectype
== NULL_TREE
)
5136 type
= TREE_TYPE (gimple_assign_lhs (last_stmt
));
5137 if (types_compatible_p (type
, comp_scalar_type
)
5138 || ((TREE_CODE (then_clause
) != INTEGER_CST
5139 || TREE_CODE (else_clause
) != INTEGER_CST
)
5140 && !INTEGRAL_TYPE_P (comp_scalar_type
))
5141 || !INTEGRAL_TYPE_P (type
))
5144 if ((TREE_CODE (then_clause
) != INTEGER_CST
5145 && !type_conversion_p (vinfo
, then_clause
, false,
5146 &orig_type0
, &def_stmt0
, &promotion
))
5147 || (TREE_CODE (else_clause
) != INTEGER_CST
5148 && !type_conversion_p (vinfo
, else_clause
, false,
5149 &orig_type1
, &def_stmt1
, &promotion
)))
5152 if (orig_type0
&& orig_type1
5153 && !types_compatible_p (orig_type0
, orig_type1
))
5158 if (!types_compatible_p (orig_type0
, comp_scalar_type
))
5160 then_clause
= gimple_assign_rhs1 (def_stmt0
);
5166 if (!types_compatible_p (orig_type1
, comp_scalar_type
))
5168 else_clause
= gimple_assign_rhs1 (def_stmt1
);
5173 HOST_WIDE_INT cmp_mode_size
5174 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype
));
5176 scalar_int_mode type_mode
= SCALAR_INT_TYPE_MODE (type
);
5177 if (GET_MODE_BITSIZE (type_mode
) == cmp_mode_size
)
5180 vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5181 if (vectype
== NULL_TREE
)
5184 if (expand_vec_cond_expr_p (vectype
, comp_vectype
, TREE_CODE (cond_expr
)))
5187 if (itype
== NULL_TREE
)
5188 itype
= build_nonstandard_integer_type (cmp_mode_size
,
5189 TYPE_UNSIGNED (type
));
5191 if (itype
== NULL_TREE
5192 || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype
)) != cmp_mode_size
)
5195 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
5196 if (vecitype
== NULL_TREE
)
5199 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
, TREE_CODE (cond_expr
)))
5202 if (GET_MODE_BITSIZE (type_mode
) > cmp_mode_size
)
5204 if ((TREE_CODE (then_clause
) == INTEGER_CST
5205 && !int_fits_type_p (then_clause
, itype
))
5206 || (TREE_CODE (else_clause
) == INTEGER_CST
5207 && !int_fits_type_p (else_clause
, itype
)))
5211 def_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5212 COND_EXPR
, unshare_expr (cond_expr
),
5213 fold_convert (itype
, then_clause
),
5214 fold_convert (itype
, else_clause
));
5215 pattern_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
5216 NOP_EXPR
, gimple_assign_lhs (def_stmt
));
5218 append_pattern_def_seq (vinfo
, stmt_vinfo
, def_stmt
, vecitype
);
5219 *type_out
= vectype
;
5221 vect_pattern_detected ("vect_recog_mixed_size_cond_pattern", last_stmt
);
5223 return pattern_stmt
;
5227 /* Helper function of vect_recog_bool_pattern. Called recursively, return
5228 true if bool VAR can and should be optimized that way. Assume it shouldn't
5229 in case it's a result of a comparison which can be directly vectorized into
5230 a vector comparison. Fills in STMTS with all stmts visited during the
5234 check_bool_pattern (tree var
, vec_info
*vinfo
, hash_set
<gimple
*> &stmts
)
5237 enum tree_code rhs_code
;
5239 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
5243 gassign
*def_stmt
= dyn_cast
<gassign
*> (def_stmt_info
->stmt
);
5247 if (stmts
.contains (def_stmt
))
5250 rhs1
= gimple_assign_rhs1 (def_stmt
);
5251 rhs_code
= gimple_assign_rhs_code (def_stmt
);
5255 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5260 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1
)))
5262 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5267 if (! check_bool_pattern (rhs1
, vinfo
, stmts
))
5274 if (! check_bool_pattern (rhs1
, vinfo
, stmts
)
5275 || ! check_bool_pattern (gimple_assign_rhs2 (def_stmt
), vinfo
, stmts
))
5280 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
5282 tree vecitype
, comp_vectype
;
5284 /* If the comparison can throw, then is_gimple_condexpr will be
5285 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
5286 if (stmt_could_throw_p (cfun
, def_stmt
))
5289 comp_vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs1
));
5290 if (comp_vectype
== NULL_TREE
)
5293 tree mask_type
= get_mask_type_for_scalar_type (vinfo
,
5296 && expand_vec_cmp_expr_p (comp_vectype
, mask_type
, rhs_code
))
5299 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
5301 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
5303 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
5304 vecitype
= get_vectype_for_scalar_type (vinfo
, itype
);
5305 if (vecitype
== NULL_TREE
)
5309 vecitype
= comp_vectype
;
5310 if (! expand_vec_cond_expr_p (vecitype
, comp_vectype
, rhs_code
))
5318 bool res
= stmts
.add (def_stmt
);
5319 /* We can't end up recursing when just visiting SSA defs but not PHIs. */
5326 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
5327 stmt (SSA_NAME_DEF_STMT of VAR) adding a cast to STMT_INFOs
5328 pattern sequence. */
5331 adjust_bool_pattern_cast (vec_info
*vinfo
,
5332 tree type
, tree var
, stmt_vec_info stmt_info
)
5334 gimple
*cast_stmt
= gimple_build_assign (vect_recog_temp_ssa_var (type
, NULL
),
5336 append_pattern_def_seq (vinfo
, stmt_info
, cast_stmt
,
5337 get_vectype_for_scalar_type (vinfo
, type
));
5338 return gimple_assign_lhs (cast_stmt
);
5341 /* Helper function of vect_recog_bool_pattern. Do the actual transformations.
5342 VAR is an SSA_NAME that should be transformed from bool to a wider integer
5343 type, OUT_TYPE is the desired final integer type of the whole pattern.
5344 STMT_INFO is the info of the pattern root and is where pattern stmts should
5345 be associated with. DEFS is a map of pattern defs. */
5348 adjust_bool_pattern (vec_info
*vinfo
, tree var
, tree out_type
,
5349 stmt_vec_info stmt_info
, hash_map
<tree
, tree
> &defs
)
5351 gimple
*stmt
= SSA_NAME_DEF_STMT (var
);
5352 enum tree_code rhs_code
, def_rhs_code
;
5353 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
5355 gimple
*pattern_stmt
, *def_stmt
;
5356 tree trueval
= NULL_TREE
;
5358 rhs1
= gimple_assign_rhs1 (stmt
);
5359 rhs2
= gimple_assign_rhs2 (stmt
);
5360 rhs_code
= gimple_assign_rhs_code (stmt
);
5361 loc
= gimple_location (stmt
);
5366 irhs1
= *defs
.get (rhs1
);
5367 itype
= TREE_TYPE (irhs1
);
5369 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5374 irhs1
= *defs
.get (rhs1
);
5375 itype
= TREE_TYPE (irhs1
);
5377 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5378 BIT_XOR_EXPR
, irhs1
, build_int_cst (itype
, 1));
5382 /* Try to optimize x = y & (a < b ? 1 : 0); into
5383 x = (a < b ? y : 0);
5389 S1 a_b = x1 CMP1 y1;
5390 S2 b_b = x2 CMP2 y2;
5392 S4 d_T = (TYPE) c_b;
5394 we would normally emit:
5396 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5397 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5398 S3' c_T = a_T & b_T;
5401 but we can save one stmt by using the
5402 result of one of the COND_EXPRs in the other COND_EXPR and leave
5403 BIT_AND_EXPR stmt out:
5405 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5406 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5409 At least when VEC_COND_EXPR is implemented using masks
5410 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
5411 computes the comparison masks and ands it, in one case with
5412 all ones vector, in the other case with a vector register.
5413 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
5414 often more expensive. */
5415 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
5416 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
5417 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
5419 irhs1
= *defs
.get (rhs1
);
5420 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
5421 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
5422 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
5424 rhs_code
= def_rhs_code
;
5426 rhs2
= gimple_assign_rhs2 (def_stmt
);
5431 irhs2
= *defs
.get (rhs2
);
5434 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
5435 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
5436 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
5438 irhs2
= *defs
.get (rhs2
);
5439 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
5440 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
5441 == GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (def_rhs1
))))
5443 rhs_code
= def_rhs_code
;
5445 rhs2
= gimple_assign_rhs2 (def_stmt
);
5450 irhs1
= *defs
.get (rhs1
);
5456 irhs1
= *defs
.get (rhs1
);
5457 irhs2
= *defs
.get (rhs2
);
5459 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
5460 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
5462 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
5463 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
5464 int out_prec
= TYPE_PRECISION (out_type
);
5465 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
5466 irhs2
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs1
), irhs2
,
5468 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
5469 irhs1
= adjust_bool_pattern_cast (vinfo
, TREE_TYPE (irhs2
), irhs1
,
5473 irhs1
= adjust_bool_pattern_cast (vinfo
,
5474 out_type
, irhs1
, stmt_info
);
5475 irhs2
= adjust_bool_pattern_cast (vinfo
,
5476 out_type
, irhs2
, stmt_info
);
5479 itype
= TREE_TYPE (irhs1
);
5481 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5482 rhs_code
, irhs1
, irhs2
);
5487 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
5488 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
5489 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
))
5490 || maybe_ne (TYPE_PRECISION (TREE_TYPE (rhs1
)),
5491 GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1
)))))
5493 scalar_mode mode
= SCALAR_TYPE_MODE (TREE_TYPE (rhs1
));
5495 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
5498 itype
= TREE_TYPE (rhs1
);
5499 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
5500 if (trueval
== NULL_TREE
)
5501 trueval
= build_int_cst (itype
, 1);
5503 gcc_checking_assert (useless_type_conversion_p (itype
,
5504 TREE_TYPE (trueval
)));
5506 = gimple_build_assign (vect_recog_temp_ssa_var (itype
, NULL
),
5507 COND_EXPR
, cond_expr
, trueval
,
5508 build_int_cst (itype
, 0));
5512 gimple_set_location (pattern_stmt
, loc
);
5513 append_pattern_def_seq (vinfo
, stmt_info
, pattern_stmt
,
5514 get_vectype_for_scalar_type (vinfo
, itype
));
5515 defs
.put (var
, gimple_assign_lhs (pattern_stmt
));
5518 /* Comparison function to qsort a vector of gimple stmts after UID. */
5521 sort_after_uid (const void *p1
, const void *p2
)
5523 const gimple
*stmt1
= *(const gimple
* const *)p1
;
5524 const gimple
*stmt2
= *(const gimple
* const *)p2
;
5525 return gimple_uid (stmt1
) - gimple_uid (stmt2
);
5528 /* Create pattern stmts for all stmts participating in the bool pattern
5529 specified by BOOL_STMT_SET and its root STMT_INFO with the desired type
5530 OUT_TYPE. Return the def of the pattern root. */
5533 adjust_bool_stmts (vec_info
*vinfo
, hash_set
<gimple
*> &bool_stmt_set
,
5534 tree out_type
, stmt_vec_info stmt_info
)
5536 /* Gather original stmts in the bool pattern in their order of appearance
5538 auto_vec
<gimple
*> bool_stmts (bool_stmt_set
.elements ());
5539 for (hash_set
<gimple
*>::iterator i
= bool_stmt_set
.begin ();
5540 i
!= bool_stmt_set
.end (); ++i
)
5541 bool_stmts
.quick_push (*i
);
5542 bool_stmts
.qsort (sort_after_uid
);
5544 /* Now process them in that order, producing pattern stmts. */
5545 hash_map
<tree
, tree
> defs
;
5546 for (unsigned i
= 0; i
< bool_stmts
.length (); ++i
)
5547 adjust_bool_pattern (vinfo
, gimple_assign_lhs (bool_stmts
[i
]),
5548 out_type
, stmt_info
, defs
);
5550 /* Pop the last pattern seq stmt and install it as pattern root for STMT. */
5551 gimple
*pattern_stmt
5552 = gimple_seq_last_stmt (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
5553 return gimple_assign_lhs (pattern_stmt
);
5556 /* Return the proper type for converting bool VAR into
5557 an integer value or NULL_TREE if no such type exists.
5558 The type is chosen so that the converted value has the
5559 same number of elements as VAR's vector type. */
5562 integer_type_for_mask (tree var
, vec_info
*vinfo
)
5564 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
5567 stmt_vec_info def_stmt_info
= vect_get_internal_def (vinfo
, var
);
5568 if (!def_stmt_info
|| !vect_use_mask_type_p (def_stmt_info
))
5571 return build_nonstandard_integer_type (def_stmt_info
->mask_precision
, 1);
5574 /* Function vect_recog_gcond_pattern
5576 Try to find pattern like following:
5580 where operator 'op' is not != and convert it to an adjusted boolean pattern
5585 and set the mask type on MASK.
5589 * STMT_VINFO: The stmt at the end from which the pattern
5590 search begins, i.e. cast of a bool to
5595 * TYPE_OUT: The type of the output of this pattern.
5597 * Return value: A new stmt that will be used to replace the pattern. */
5600 vect_recog_gcond_pattern (vec_info
*vinfo
,
5601 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5603 /* Currently we only support this for loop vectorization and when multiple
5605 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
5606 if (!loop_vinfo
|| !LOOP_VINFO_EARLY_BREAKS (loop_vinfo
))
5609 gimple
*last_stmt
= STMT_VINFO_STMT (stmt_vinfo
);
5611 if (!(cond
= dyn_cast
<gcond
*> (last_stmt
)))
5614 auto lhs
= gimple_cond_lhs (cond
);
5615 auto rhs
= gimple_cond_rhs (cond
);
5616 auto code
= gimple_cond_code (cond
);
5618 tree scalar_type
= TREE_TYPE (lhs
);
5619 if (VECTOR_TYPE_P (scalar_type
))
5624 && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type
))
5627 tree vecitype
= get_vectype_for_scalar_type (vinfo
, scalar_type
);
5628 if (vecitype
== NULL_TREE
)
5631 tree vectype
= truth_type_for (vecitype
);
5633 tree new_lhs
= vect_recog_temp_ssa_var (boolean_type_node
, NULL
);
5634 gimple
*new_stmt
= gimple_build_assign (new_lhs
, code
, lhs
, rhs
);
5635 append_pattern_def_seq (vinfo
, stmt_vinfo
, new_stmt
, vectype
, scalar_type
);
5637 gimple
*pattern_stmt
5638 = gimple_build_cond (NE_EXPR
, new_lhs
,
5639 build_int_cst (TREE_TYPE (new_lhs
), 0),
5640 NULL_TREE
, NULL_TREE
);
5641 *type_out
= vectype
;
5642 vect_pattern_detected ("vect_recog_gcond_pattern", last_stmt
);
5643 return pattern_stmt
;
5646 /* Function vect_recog_bool_pattern
5648 Try to find pattern like following:
5650 bool a_b, b_b, c_b, d_b, e_b;
5653 S1 a_b = x1 CMP1 y1;
5654 S2 b_b = x2 CMP2 y2;
5656 S4 d_b = x3 CMP3 y3;
5658 S6 f_T = (TYPE) e_b;
5660 where type 'TYPE' is an integral type. Or a similar pattern
5663 S6 f_Y = e_b ? r_Y : s_Y;
5665 as results from if-conversion of a complex condition.
5669 * STMT_VINFO: The stmt at the end from which the pattern
5670 search begins, i.e. cast of a bool to
5675 * TYPE_OUT: The type of the output of this pattern.
5677 * Return value: A new stmt that will be used to replace the pattern.
5679 Assuming size of TYPE is the same as size of all comparisons
5680 (otherwise some casts would be added where needed), the above
5681 sequence we create related pattern stmts:
5682 S1' a_T = x1 CMP1 y1 ? 1 : 0;
5683 S3' c_T = x2 CMP2 y2 ? a_T : 0;
5684 S4' d_T = x3 CMP3 y3 ? 1 : 0;
5685 S5' e_T = c_T | d_T;
5688 Instead of the above S3' we could emit:
5689 S2' b_T = x2 CMP2 y2 ? 1 : 0;
5690 S3' c_T = a_T | b_T;
5691 but the above is more efficient. */
5694 vect_recog_bool_pattern (vec_info
*vinfo
,
5695 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5697 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5698 enum tree_code rhs_code
;
5699 tree var
, lhs
, rhs
, vectype
;
5700 gimple
*pattern_stmt
;
5702 if (!is_gimple_assign (last_stmt
))
5705 var
= gimple_assign_rhs1 (last_stmt
);
5706 lhs
= gimple_assign_lhs (last_stmt
);
5707 rhs_code
= gimple_assign_rhs_code (last_stmt
);
5709 if (rhs_code
== VIEW_CONVERT_EXPR
)
5710 var
= TREE_OPERAND (var
, 0);
5712 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var
)))
5715 hash_set
<gimple
*> bool_stmts
;
5717 if (CONVERT_EXPR_CODE_P (rhs_code
)
5718 || rhs_code
== VIEW_CONVERT_EXPR
)
5720 if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs
))
5721 || VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
5723 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5725 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
5727 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
5728 TREE_TYPE (lhs
), stmt_vinfo
);
5729 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5730 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
5731 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
5734 = gimple_build_assign (lhs
, NOP_EXPR
, rhs
);
5738 tree type
= integer_type_for_mask (var
, vinfo
);
5739 tree cst0
, cst1
, tmp
;
5744 /* We may directly use cond with narrowed type to avoid
5745 multiple cond exprs with following result packing and
5746 perform single cond with packed mask instead. In case
5747 of widening we better make cond first and then extract
5749 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (lhs
)))
5750 type
= TREE_TYPE (lhs
);
5752 cst0
= build_int_cst (type
, 0);
5753 cst1
= build_int_cst (type
, 1);
5754 tmp
= vect_recog_temp_ssa_var (type
, NULL
);
5755 pattern_stmt
= gimple_build_assign (tmp
, COND_EXPR
, var
, cst1
, cst0
);
5757 if (!useless_type_conversion_p (type
, TREE_TYPE (lhs
)))
5759 tree new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5760 append_pattern_def_seq (vinfo
, stmt_vinfo
,
5761 pattern_stmt
, new_vectype
);
5763 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5764 pattern_stmt
= gimple_build_assign (lhs
, CONVERT_EXPR
, tmp
);
5768 *type_out
= vectype
;
5769 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5771 return pattern_stmt
;
5773 else if (rhs_code
== COND_EXPR
5774 && TREE_CODE (var
) == SSA_NAME
)
5776 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5777 if (vectype
== NULL_TREE
)
5780 /* Build a scalar type for the boolean result that when
5781 vectorized matches the vector type of the result in
5782 size and number of elements. */
5784 = vector_element_size (tree_to_poly_uint64 (TYPE_SIZE (vectype
)),
5785 TYPE_VECTOR_SUBPARTS (vectype
));
5788 = build_nonstandard_integer_type (prec
,
5789 TYPE_UNSIGNED (TREE_TYPE (var
)));
5790 if (get_vectype_for_scalar_type (vinfo
, type
) == NULL_TREE
)
5793 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
5794 var
= adjust_bool_stmts (vinfo
, bool_stmts
, type
, stmt_vinfo
);
5795 else if (integer_type_for_mask (var
, vinfo
))
5798 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5800 = gimple_build_assign (lhs
, COND_EXPR
,
5801 build2 (NE_EXPR
, boolean_type_node
,
5802 var
, build_int_cst (TREE_TYPE (var
), 0)),
5803 gimple_assign_rhs2 (last_stmt
),
5804 gimple_assign_rhs3 (last_stmt
));
5805 *type_out
= vectype
;
5806 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5808 return pattern_stmt
;
5810 else if (rhs_code
== SSA_NAME
5811 && STMT_VINFO_DATA_REF (stmt_vinfo
))
5813 stmt_vec_info pattern_stmt_info
;
5814 vectype
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5815 if (!vectype
|| !VECTOR_MODE_P (TYPE_MODE (vectype
)))
5818 if (check_bool_pattern (var
, vinfo
, bool_stmts
))
5819 rhs
= adjust_bool_stmts (vinfo
, bool_stmts
,
5820 TREE_TYPE (vectype
), stmt_vinfo
);
5823 tree type
= integer_type_for_mask (var
, vinfo
);
5824 tree cst0
, cst1
, new_vectype
;
5829 if (TYPE_MODE (type
) == TYPE_MODE (TREE_TYPE (vectype
)))
5830 type
= TREE_TYPE (vectype
);
5832 cst0
= build_int_cst (type
, 0);
5833 cst1
= build_int_cst (type
, 1);
5834 new_vectype
= get_vectype_for_scalar_type (vinfo
, type
);
5836 rhs
= vect_recog_temp_ssa_var (type
, NULL
);
5837 pattern_stmt
= gimple_build_assign (rhs
, COND_EXPR
, var
, cst1
, cst0
);
5838 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, new_vectype
);
5841 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
5842 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
5844 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5845 gimple
*cast_stmt
= gimple_build_assign (rhs2
, NOP_EXPR
, rhs
);
5846 append_pattern_def_seq (vinfo
, stmt_vinfo
, cast_stmt
);
5849 pattern_stmt
= gimple_build_assign (lhs
, SSA_NAME
, rhs
);
5850 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
5851 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
5852 *type_out
= vectype
;
5853 vect_pattern_detected ("vect_recog_bool_pattern", last_stmt
);
5855 return pattern_stmt
;
5862 /* A helper for vect_recog_mask_conversion_pattern. Build
5863 conversion of MASK to a type suitable for masking VECTYPE.
5864 Built statement gets required vectype and is appended to
5865 a pattern sequence of STMT_VINFO.
5867 Return converted mask. */
5870 build_mask_conversion (vec_info
*vinfo
,
5871 tree mask
, tree vectype
, stmt_vec_info stmt_vinfo
)
5876 masktype
= truth_type_for (vectype
);
5877 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (masktype
), NULL
);
5878 stmt
= gimple_build_assign (tmp
, CONVERT_EXPR
, mask
);
5879 append_pattern_def_seq (vinfo
, stmt_vinfo
,
5880 stmt
, masktype
, TREE_TYPE (vectype
));
5886 /* Function vect_recog_mask_conversion_pattern
5888 Try to find statements which require boolean type
5889 converison. Additional conversion statements are
5890 added to handle such cases. For example:
5900 S4 c_1 = m_3 ? c_2 : c_3;
5902 Will be transformed into:
5906 S3'' m_2' = (_Bool[bitsize=32])m_2
5907 S3' m_3' = m_1 & m_2';
5908 S4'' m_3'' = (_Bool[bitsize=8])m_3'
5909 S4' c_1' = m_3'' ? c_2 : c_3; */
5912 vect_recog_mask_conversion_pattern (vec_info
*vinfo
,
5913 stmt_vec_info stmt_vinfo
, tree
*type_out
)
5915 gimple
*last_stmt
= stmt_vinfo
->stmt
;
5916 enum tree_code rhs_code
;
5917 tree lhs
= NULL_TREE
, rhs1
, rhs2
, tmp
, rhs1_type
, rhs2_type
;
5918 tree vectype1
, vectype2
;
5919 stmt_vec_info pattern_stmt_info
;
5920 tree rhs1_op0
= NULL_TREE
, rhs1_op1
= NULL_TREE
;
5921 tree rhs1_op0_type
= NULL_TREE
, rhs1_op1_type
= NULL_TREE
;
5923 /* Check for MASK_LOAD and MASK_STORE as well as COND_OP calls requiring mask
5925 if (is_gimple_call (last_stmt
)
5926 && gimple_call_internal_p (last_stmt
))
5928 gcall
*pattern_stmt
;
5930 internal_fn ifn
= gimple_call_internal_fn (last_stmt
);
5931 int mask_argno
= internal_fn_mask_index (ifn
);
5935 bool store_p
= internal_store_fn_p (ifn
);
5936 bool load_p
= internal_store_fn_p (ifn
);
5939 int rhs_index
= internal_fn_stored_value_index (ifn
);
5940 tree rhs
= gimple_call_arg (last_stmt
, rhs_index
);
5941 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (rhs
));
5945 lhs
= gimple_call_lhs (last_stmt
);
5948 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
5954 tree mask_arg
= gimple_call_arg (last_stmt
, mask_argno
);
5955 tree mask_arg_type
= integer_type_for_mask (mask_arg
, vinfo
);
5958 vectype2
= get_mask_type_for_scalar_type (vinfo
, mask_arg_type
);
5961 || known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
5962 TYPE_VECTOR_SUBPARTS (vectype2
)))
5965 else if (store_p
|| load_p
)
5968 tmp
= build_mask_conversion (vinfo
, mask_arg
, vectype1
, stmt_vinfo
);
5970 auto_vec
<tree
, 8> args
;
5971 unsigned int nargs
= gimple_call_num_args (last_stmt
);
5972 args
.safe_grow (nargs
, true);
5973 for (unsigned int i
= 0; i
< nargs
; ++i
)
5974 args
[i
] = ((int) i
== mask_argno
5976 : gimple_call_arg (last_stmt
, i
));
5977 pattern_stmt
= gimple_build_call_internal_vec (ifn
, args
);
5981 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
5982 gimple_call_set_lhs (pattern_stmt
, lhs
);
5985 if (load_p
|| store_p
)
5986 gimple_call_set_nothrow (pattern_stmt
, true);
5988 pattern_stmt_info
= vinfo
->add_stmt (pattern_stmt
);
5989 if (STMT_VINFO_DATA_REF (stmt_vinfo
))
5990 vinfo
->move_dr (pattern_stmt_info
, stmt_vinfo
);
5992 *type_out
= vectype1
;
5993 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
5995 return pattern_stmt
;
5998 if (!is_gimple_assign (last_stmt
))
6001 gimple
*pattern_stmt
;
6002 lhs
= gimple_assign_lhs (last_stmt
);
6003 rhs1
= gimple_assign_rhs1 (last_stmt
);
6004 rhs_code
= gimple_assign_rhs_code (last_stmt
);
6006 /* Check for cond expression requiring mask conversion. */
6007 if (rhs_code
== COND_EXPR
)
6009 vectype1
= get_vectype_for_scalar_type (vinfo
, TREE_TYPE (lhs
));
6011 if (TREE_CODE (rhs1
) == SSA_NAME
)
6013 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
6017 else if (COMPARISON_CLASS_P (rhs1
))
6019 /* Check whether we're comparing scalar booleans and (if so)
6020 whether a better mask type exists than the mask associated
6021 with boolean-sized elements. This avoids unnecessary packs
6022 and unpacks if the booleans are set from comparisons of
6023 wider types. E.g. in:
6025 int x1, x2, x3, x4, y1, y1;
6027 bool b1 = (x1 == x2);
6028 bool b2 = (x3 == x4);
6029 ... = b1 == b2 ? y1 : y2;
6031 it is better for b1 and b2 to use the mask type associated
6032 with int elements rather bool (byte) elements. */
6033 rhs1_op0
= TREE_OPERAND (rhs1
, 0);
6034 rhs1_op1
= TREE_OPERAND (rhs1
, 1);
6035 if (!rhs1_op0
|| !rhs1_op1
)
6037 rhs1_op0_type
= integer_type_for_mask (rhs1_op0
, vinfo
);
6038 rhs1_op1_type
= integer_type_for_mask (rhs1_op1
, vinfo
);
6041 rhs1_type
= TREE_TYPE (rhs1_op0
);
6042 else if (!rhs1_op1_type
)
6043 rhs1_type
= TREE_TYPE (rhs1_op1
);
6044 else if (TYPE_PRECISION (rhs1_op0_type
)
6045 != TYPE_PRECISION (rhs1_op1_type
))
6047 int tmp0
= (int) TYPE_PRECISION (rhs1_op0_type
)
6048 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
6049 int tmp1
= (int) TYPE_PRECISION (rhs1_op1_type
)
6050 - (int) TYPE_PRECISION (TREE_TYPE (lhs
));
6051 if ((tmp0
> 0 && tmp1
> 0) || (tmp0
< 0 && tmp1
< 0))
6053 if (abs (tmp0
) > abs (tmp1
))
6054 rhs1_type
= rhs1_op1_type
;
6056 rhs1_type
= rhs1_op0_type
;
6059 rhs1_type
= build_nonstandard_integer_type
6060 (TYPE_PRECISION (TREE_TYPE (lhs
)), 1);
6063 rhs1_type
= rhs1_op0_type
;
6068 vectype2
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
6070 if (!vectype1
|| !vectype2
)
6073 /* Continue if a conversion is needed. Also continue if we have
6074 a comparison whose vector type would normally be different from
6075 VECTYPE2 when considered in isolation. In that case we'll
6076 replace the comparison with an SSA name (so that we can record
6077 its vector type) and behave as though the comparison was an SSA
6078 name from the outset. */
6079 if (known_eq (TYPE_VECTOR_SUBPARTS (vectype1
),
6080 TYPE_VECTOR_SUBPARTS (vectype2
))
6085 /* If rhs1 is invariant and we can promote it leave the COND_EXPR
6086 in place, we can handle it in vectorizable_condition. This avoids
6087 unnecessary promotion stmts and increased vectorization factor. */
6088 if (COMPARISON_CLASS_P (rhs1
)
6089 && INTEGRAL_TYPE_P (rhs1_type
)
6090 && known_le (TYPE_VECTOR_SUBPARTS (vectype1
),
6091 TYPE_VECTOR_SUBPARTS (vectype2
)))
6093 enum vect_def_type dt
;
6094 if (vect_is_simple_use (TREE_OPERAND (rhs1
, 0), vinfo
, &dt
)
6095 && dt
== vect_external_def
6096 && vect_is_simple_use (TREE_OPERAND (rhs1
, 1), vinfo
, &dt
)
6097 && (dt
== vect_external_def
6098 || dt
== vect_constant_def
))
6100 tree wide_scalar_type
= build_nonstandard_integer_type
6101 (vector_element_bits (vectype1
), TYPE_UNSIGNED (rhs1_type
));
6102 tree vectype3
= get_vectype_for_scalar_type (vinfo
,
6104 if (expand_vec_cond_expr_p (vectype1
, vectype3
, TREE_CODE (rhs1
)))
6109 /* If rhs1 is a comparison we need to move it into a
6110 separate statement. */
6111 if (TREE_CODE (rhs1
) != SSA_NAME
)
6113 tmp
= vect_recog_temp_ssa_var (TREE_TYPE (rhs1
), NULL
);
6115 && TYPE_PRECISION (rhs1_op0_type
) != TYPE_PRECISION (rhs1_type
))
6116 rhs1_op0
= build_mask_conversion (vinfo
, rhs1_op0
,
6117 vectype2
, stmt_vinfo
);
6119 && TYPE_PRECISION (rhs1_op1_type
) != TYPE_PRECISION (rhs1_type
))
6120 rhs1_op1
= build_mask_conversion (vinfo
, rhs1_op1
,
6121 vectype2
, stmt_vinfo
);
6122 pattern_stmt
= gimple_build_assign (tmp
, TREE_CODE (rhs1
),
6123 rhs1_op0
, rhs1_op1
);
6125 append_pattern_def_seq (vinfo
, stmt_vinfo
, pattern_stmt
, vectype2
,
6129 if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1
),
6130 TYPE_VECTOR_SUBPARTS (vectype2
)))
6131 tmp
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
6135 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6136 pattern_stmt
= gimple_build_assign (lhs
, COND_EXPR
, tmp
,
6137 gimple_assign_rhs2 (last_stmt
),
6138 gimple_assign_rhs3 (last_stmt
));
6140 *type_out
= vectype1
;
6141 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
6143 return pattern_stmt
;
6146 /* Now check for binary boolean operations requiring conversion for
6148 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
6151 if (rhs_code
!= BIT_IOR_EXPR
6152 && rhs_code
!= BIT_XOR_EXPR
6153 && rhs_code
!= BIT_AND_EXPR
6154 && TREE_CODE_CLASS (rhs_code
) != tcc_comparison
)
6157 rhs2
= gimple_assign_rhs2 (last_stmt
);
6159 rhs1_type
= integer_type_for_mask (rhs1
, vinfo
);
6160 rhs2_type
= integer_type_for_mask (rhs2
, vinfo
);
6162 if (!rhs1_type
|| !rhs2_type
6163 || TYPE_PRECISION (rhs1_type
) == TYPE_PRECISION (rhs2_type
))
6166 if (TYPE_PRECISION (rhs1_type
) < TYPE_PRECISION (rhs2_type
))
6168 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs1_type
);
6171 rhs2
= build_mask_conversion (vinfo
, rhs2
, vectype1
, stmt_vinfo
);
6175 vectype1
= get_mask_type_for_scalar_type (vinfo
, rhs2_type
);
6178 rhs1
= build_mask_conversion (vinfo
, rhs1
, vectype1
, stmt_vinfo
);
6181 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
6182 pattern_stmt
= gimple_build_assign (lhs
, rhs_code
, rhs1
, rhs2
);
6184 *type_out
= vectype1
;
6185 vect_pattern_detected ("vect_recog_mask_conversion_pattern", last_stmt
);
6187 return pattern_stmt
;
6190 /* STMT_INFO is a load or store. If the load or store is conditional, return
6191 the boolean condition under which it occurs, otherwise return null. */
6194 vect_get_load_store_mask (stmt_vec_info stmt_info
)
6196 if (gassign
*def_assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6198 gcc_assert (gimple_assign_single_p (def_assign
));
6202 if (gcall
*def_call
= dyn_cast
<gcall
*> (stmt_info
->stmt
))
6204 internal_fn ifn
= gimple_call_internal_fn (def_call
);
6205 int mask_index
= internal_fn_mask_index (ifn
);
6206 return gimple_call_arg (def_call
, mask_index
);
6212 /* Return MASK if MASK is suitable for masking an operation on vectors
6213 of type VECTYPE, otherwise convert it into such a form and return
6214 the result. Associate any conversion statements with STMT_INFO's
6218 vect_convert_mask_for_vectype (tree mask
, tree vectype
,
6219 stmt_vec_info stmt_info
, vec_info
*vinfo
)
6221 tree mask_type
= integer_type_for_mask (mask
, vinfo
);
6224 tree mask_vectype
= get_mask_type_for_scalar_type (vinfo
, mask_type
);
6226 && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype
),
6227 TYPE_VECTOR_SUBPARTS (mask_vectype
)))
6228 mask
= build_mask_conversion (vinfo
, mask
, vectype
, stmt_info
);
6233 /* Return the equivalent of:
6235 fold_convert (TYPE, VALUE)
6237 with the expectation that the operation will be vectorized.
6238 If new statements are needed, add them as pattern statements
6242 vect_add_conversion_to_pattern (vec_info
*vinfo
,
6243 tree type
, tree value
, stmt_vec_info stmt_info
)
6245 if (useless_type_conversion_p (type
, TREE_TYPE (value
)))
6248 tree new_value
= vect_recog_temp_ssa_var (type
, NULL
);
6249 gassign
*conversion
= gimple_build_assign (new_value
, CONVERT_EXPR
, value
);
6250 append_pattern_def_seq (vinfo
, stmt_info
, conversion
,
6251 get_vectype_for_scalar_type (vinfo
, type
));
6255 /* Try to convert STMT_INFO into a call to a gather load or scatter store
6256 internal function. Return the final statement on success and set
6257 *TYPE_OUT to the vector type being loaded or stored.
6259 This function only handles gathers and scatters that were recognized
6260 as such from the outset (indicated by STMT_VINFO_GATHER_SCATTER_P). */
6263 vect_recog_gather_scatter_pattern (vec_info
*vinfo
,
6264 stmt_vec_info stmt_info
, tree
*type_out
)
6266 /* Currently we only support this for loop vectorization. */
6267 loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
);
6271 /* Make sure that we're looking at a gather load or scatter store. */
6272 data_reference
*dr
= STMT_VINFO_DATA_REF (stmt_info
);
6273 if (!dr
|| !STMT_VINFO_GATHER_SCATTER_P (stmt_info
))
6276 /* Get the boolean that controls whether the load or store happens.
6277 This is null if the operation is unconditional. */
6278 tree mask
= vect_get_load_store_mask (stmt_info
);
6280 /* Make sure that the target supports an appropriate internal
6281 function for the gather/scatter operation. */
6282 gather_scatter_info gs_info
;
6283 if (!vect_check_gather_scatter (stmt_info
, loop_vinfo
, &gs_info
)
6284 || gs_info
.ifn
== IFN_LAST
)
6287 /* Convert the mask to the right form. */
6288 tree gs_vectype
= get_vectype_for_scalar_type (loop_vinfo
,
6289 gs_info
.element_type
);
6291 mask
= vect_convert_mask_for_vectype (mask
, gs_vectype
, stmt_info
,
6293 else if (gs_info
.ifn
== IFN_MASK_SCATTER_STORE
6294 || gs_info
.ifn
== IFN_MASK_GATHER_LOAD
6295 || gs_info
.ifn
== IFN_MASK_LEN_SCATTER_STORE
6296 || gs_info
.ifn
== IFN_MASK_LEN_GATHER_LOAD
)
6297 mask
= build_int_cst (TREE_TYPE (truth_type_for (gs_vectype
)), -1);
6299 /* Get the invariant base and non-invariant offset, converting the
6300 latter to the same width as the vector elements. */
6301 tree base
= gs_info
.base
;
6302 tree offset_type
= TREE_TYPE (gs_info
.offset_vectype
);
6303 tree offset
= vect_add_conversion_to_pattern (vinfo
, offset_type
,
6304 gs_info
.offset
, stmt_info
);
6306 /* Build the new pattern statement. */
6307 tree scale
= size_int (gs_info
.scale
);
6308 gcall
*pattern_stmt
;
6309 if (DR_IS_READ (dr
))
6311 tree zero
= build_zero_cst (gs_info
.element_type
);
6313 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5, base
,
6314 offset
, scale
, zero
, mask
);
6316 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4, base
,
6317 offset
, scale
, zero
);
6318 tree load_lhs
= vect_recog_temp_ssa_var (gs_info
.element_type
, NULL
);
6319 gimple_call_set_lhs (pattern_stmt
, load_lhs
);
6323 tree rhs
= vect_get_store_rhs (stmt_info
);
6325 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 5,
6326 base
, offset
, scale
, rhs
,
6329 pattern_stmt
= gimple_build_call_internal (gs_info
.ifn
, 4,
6330 base
, offset
, scale
, rhs
);
6332 gimple_call_set_nothrow (pattern_stmt
, true);
6334 /* Copy across relevant vectorization info and associate DR with the
6335 new pattern statement instead of the original statement. */
6336 stmt_vec_info pattern_stmt_info
= loop_vinfo
->add_stmt (pattern_stmt
);
6337 loop_vinfo
->move_dr (pattern_stmt_info
, stmt_info
);
6339 tree vectype
= STMT_VINFO_VECTYPE (stmt_info
);
6340 *type_out
= vectype
;
6341 vect_pattern_detected ("gather/scatter pattern", stmt_info
->stmt
);
6343 return pattern_stmt
;
6346 /* Return true if TYPE is a non-boolean integer type. These are the types
6347 that we want to consider for narrowing. */
6350 vect_narrowable_type_p (tree type
)
6352 return INTEGRAL_TYPE_P (type
) && !VECT_SCALAR_BOOLEAN_TYPE_P (type
);
6355 /* Return true if the operation given by CODE can be truncated to N bits
6356 when only N bits of the output are needed. This is only true if bit N+1
6357 of the inputs has no effect on the low N bits of the result. */
6360 vect_truncatable_operation_p (tree_code code
)
6380 /* Record that STMT_INFO could be changed from operating on TYPE to
6381 operating on a type with the precision and sign given by PRECISION
6382 and SIGN respectively. PRECISION is an arbitrary bit precision;
6383 it might not be a whole number of bytes. */
6386 vect_set_operation_type (stmt_vec_info stmt_info
, tree type
,
6387 unsigned int precision
, signop sign
)
6389 /* Round the precision up to a whole number of bytes. */
6390 precision
= vect_element_precision (precision
);
6391 if (precision
< TYPE_PRECISION (type
)
6392 && (!stmt_info
->operation_precision
6393 || stmt_info
->operation_precision
> precision
))
6395 stmt_info
->operation_precision
= precision
;
6396 stmt_info
->operation_sign
= sign
;
6400 /* Record that STMT_INFO only requires MIN_INPUT_PRECISION from its
6401 non-boolean inputs, all of which have type TYPE. MIN_INPUT_PRECISION
6402 is an arbitrary bit precision; it might not be a whole number of bytes. */
6405 vect_set_min_input_precision (stmt_vec_info stmt_info
, tree type
,
6406 unsigned int min_input_precision
)
6408 /* This operation in isolation only requires the inputs to have
6409 MIN_INPUT_PRECISION of precision, However, that doesn't mean
6410 that MIN_INPUT_PRECISION is a natural precision for the chain
6411 as a whole. E.g. consider something like:
6413 unsigned short *x, *y;
6414 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6416 The right shift can be done on unsigned chars, and only requires the
6417 result of "*x & 0xf0" to be done on unsigned chars. But taking that
6418 approach would mean turning a natural chain of single-vector unsigned
6419 short operations into one that truncates "*x" and then extends
6420 "(*x & 0xf0) >> 4", with two vectors for each unsigned short
6421 operation and one vector for each unsigned char operation.
6422 This would be a significant pessimization.
6424 Instead only propagate the maximum of this precision and the precision
6425 required by the users of the result. This means that we don't pessimize
6426 the case above but continue to optimize things like:
6430 *y = ((*x & 0xf0) >> 4) | (*y << 4);
6432 Here we would truncate two vectors of *x to a single vector of
6433 unsigned chars and use single-vector unsigned char operations for
6434 everything else, rather than doing two unsigned short copies of
6435 "(*x & 0xf0) >> 4" and then truncating the result. */
6436 min_input_precision
= MAX (min_input_precision
,
6437 stmt_info
->min_output_precision
);
6439 if (min_input_precision
< TYPE_PRECISION (type
)
6440 && (!stmt_info
->min_input_precision
6441 || stmt_info
->min_input_precision
> min_input_precision
))
6442 stmt_info
->min_input_precision
= min_input_precision
;
6445 /* Subroutine of vect_determine_min_output_precision. Return true if
6446 we can calculate a reduced number of output bits for STMT_INFO,
6447 whose result is LHS. */
6450 vect_determine_min_output_precision_1 (vec_info
*vinfo
,
6451 stmt_vec_info stmt_info
, tree lhs
)
6453 /* Take the maximum precision required by users of the result. */
6454 unsigned int precision
= 0;
6455 imm_use_iterator iter
;
6457 FOR_EACH_IMM_USE_FAST (use
, iter
, lhs
)
6459 gimple
*use_stmt
= USE_STMT (use
);
6460 if (is_gimple_debug (use_stmt
))
6462 stmt_vec_info use_stmt_info
= vinfo
->lookup_stmt (use_stmt
);
6463 if (!use_stmt_info
|| !use_stmt_info
->min_input_precision
)
6465 /* The input precision recorded for COND_EXPRs applies only to the
6466 "then" and "else" values. */
6467 gassign
*assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
);
6469 && gimple_assign_rhs_code (assign
) == COND_EXPR
6470 && use
->use
!= gimple_assign_rhs2_ptr (assign
)
6471 && use
->use
!= gimple_assign_rhs3_ptr (assign
))
6473 precision
= MAX (precision
, use_stmt_info
->min_input_precision
);
6476 if (dump_enabled_p ())
6477 dump_printf_loc (MSG_NOTE
, vect_location
,
6478 "only the low %d bits of %T are significant\n",
6480 stmt_info
->min_output_precision
= precision
;
6484 /* Calculate min_output_precision for STMT_INFO. */
6487 vect_determine_min_output_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6489 /* We're only interested in statements with a narrowable result. */
6490 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
6492 || TREE_CODE (lhs
) != SSA_NAME
6493 || !vect_narrowable_type_p (TREE_TYPE (lhs
)))
6496 if (!vect_determine_min_output_precision_1 (vinfo
, stmt_info
, lhs
))
6497 stmt_info
->min_output_precision
= TYPE_PRECISION (TREE_TYPE (lhs
));
6500 /* Use range information to decide whether STMT (described by STMT_INFO)
6501 could be done in a narrower type. This is effectively a forward
6502 propagation, since it uses context-independent information that applies
6503 to all users of an SSA name. */
6506 vect_determine_precisions_from_range (stmt_vec_info stmt_info
, gassign
*stmt
)
6508 tree lhs
= gimple_assign_lhs (stmt
);
6509 if (!lhs
|| TREE_CODE (lhs
) != SSA_NAME
)
6512 tree type
= TREE_TYPE (lhs
);
6513 if (!vect_narrowable_type_p (type
))
6516 /* First see whether we have any useful range information for the result. */
6517 unsigned int precision
= TYPE_PRECISION (type
);
6518 signop sign
= TYPE_SIGN (type
);
6519 wide_int min_value
, max_value
;
6520 if (!vect_get_range_info (lhs
, &min_value
, &max_value
))
6523 tree_code code
= gimple_assign_rhs_code (stmt
);
6524 unsigned int nops
= gimple_num_ops (stmt
);
6526 if (!vect_truncatable_operation_p (code
))
6528 /* Handle operations that can be computed in type T if all inputs
6529 and outputs can be represented in type T. Also handle left and
6530 right shifts, where (in addition) the maximum shift amount must
6531 be less than the number of bits in T. */
6543 case TRUNC_DIV_EXPR
:
6545 case FLOOR_DIV_EXPR
:
6546 case ROUND_DIV_EXPR
:
6547 case EXACT_DIV_EXPR
:
6548 /* Modulus is excluded because it is typically calculated by doing
6549 a division, for which minimum signed / -1 isn't representable in
6550 the original signed type. We could take the division range into
6551 account instead, if handling modulus ever becomes important. */
6558 for (unsigned int i
= 1; i
< nops
; ++i
)
6560 tree op
= gimple_op (stmt
, i
);
6561 wide_int op_min_value
, op_max_value
;
6562 if (TREE_CODE (op
) == INTEGER_CST
)
6564 unsigned int op_precision
= TYPE_PRECISION (TREE_TYPE (op
));
6565 op_min_value
= op_max_value
= wi::to_wide (op
, op_precision
);
6567 else if (TREE_CODE (op
) == SSA_NAME
)
6569 if (!vect_get_range_info (op
, &op_min_value
, &op_max_value
))
6575 if (is_shift
&& i
== 2)
6577 /* There needs to be one more bit than the maximum shift amount.
6579 If the maximum shift amount is already 1 less than PRECISION
6580 then we can't narrow the shift further. Dealing with that
6581 case first ensures that we can safely use an unsigned range
6584 op_min_value isn't relevant, since shifts by negative amounts
6586 if (wi::geu_p (op_max_value
, precision
- 1))
6588 unsigned int min_bits
= op_max_value
.to_uhwi () + 1;
6590 /* As explained below, we can convert a signed shift into an
6591 unsigned shift if the sign bit is always clear. At this
6592 point we've already processed the ranges of the output and
6594 auto op_sign
= sign
;
6595 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
6597 op_min_value
= wide_int::from (wi::min_value (min_bits
, op_sign
),
6598 precision
, op_sign
);
6599 op_max_value
= wide_int::from (wi::max_value (min_bits
, op_sign
),
6600 precision
, op_sign
);
6602 min_value
= wi::min (min_value
, op_min_value
, sign
);
6603 max_value
= wi::max (max_value
, op_max_value
, sign
);
6607 /* Try to switch signed types for unsigned types if we can.
6608 This is better for two reasons. First, unsigned ops tend
6609 to be cheaper than signed ops. Second, it means that we can
6613 int res = (int) c & 0xff00; // range [0x0000, 0xff00]
6618 unsigned short res_1 = (unsigned short) c & 0xff00;
6619 int res = (int) res_1;
6621 where the intermediate result res_1 has unsigned rather than
6623 if (sign
== SIGNED
&& !wi::neg_p (min_value
))
6626 /* See what precision is required for MIN_VALUE and MAX_VALUE. */
6627 unsigned int precision1
= wi::min_precision (min_value
, sign
);
6628 unsigned int precision2
= wi::min_precision (max_value
, sign
);
6629 unsigned int value_precision
= MAX (precision1
, precision2
);
6630 if (value_precision
>= precision
)
6633 if (dump_enabled_p ())
6634 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
6635 " without loss of precision: %G",
6636 sign
== SIGNED
? "signed" : "unsigned",
6637 value_precision
, (gimple
*) stmt
);
6639 vect_set_operation_type (stmt_info
, type
, value_precision
, sign
);
6640 vect_set_min_input_precision (stmt_info
, type
, value_precision
);
6643 /* Use information about the users of STMT's result to decide whether
6644 STMT (described by STMT_INFO) could be done in a narrower type.
6645 This is effectively a backward propagation. */
6648 vect_determine_precisions_from_users (stmt_vec_info stmt_info
, gassign
*stmt
)
6650 tree_code code
= gimple_assign_rhs_code (stmt
);
6651 unsigned int opno
= (code
== COND_EXPR
? 2 : 1);
6652 tree type
= TREE_TYPE (gimple_op (stmt
, opno
));
6653 if (!vect_narrowable_type_p (type
))
6656 unsigned int precision
= TYPE_PRECISION (type
);
6657 unsigned int operation_precision
, min_input_precision
;
6661 /* Only the bits that contribute to the output matter. Don't change
6662 the precision of the operation itself. */
6663 operation_precision
= precision
;
6664 min_input_precision
= stmt_info
->min_output_precision
;
6670 tree shift
= gimple_assign_rhs2 (stmt
);
6671 if (TREE_CODE (shift
) != INTEGER_CST
6672 || !wi::ltu_p (wi::to_widest (shift
), precision
))
6674 unsigned int const_shift
= TREE_INT_CST_LOW (shift
);
6675 if (code
== LSHIFT_EXPR
)
6677 /* Avoid creating an undefined shift.
6679 ??? We could instead use min_output_precision as-is and
6680 optimize out-of-range shifts to zero. However, only
6681 degenerate testcases shift away all their useful input data,
6682 and it isn't natural to drop input operations in the middle
6683 of vectorization. This sort of thing should really be
6684 handled before vectorization. */
6685 operation_precision
= MAX (stmt_info
->min_output_precision
,
6687 /* We need CONST_SHIFT fewer bits of the input. */
6688 min_input_precision
= (MAX (operation_precision
, const_shift
)
6693 /* We need CONST_SHIFT extra bits to do the operation. */
6694 operation_precision
= (stmt_info
->min_output_precision
6696 min_input_precision
= operation_precision
;
6702 if (vect_truncatable_operation_p (code
))
6704 /* Input bit N has no effect on output bits N-1 and lower. */
6705 operation_precision
= stmt_info
->min_output_precision
;
6706 min_input_precision
= operation_precision
;
6712 if (operation_precision
< precision
)
6714 if (dump_enabled_p ())
6715 dump_printf_loc (MSG_NOTE
, vect_location
, "can narrow to %s:%d"
6716 " without affecting users: %G",
6717 TYPE_UNSIGNED (type
) ? "unsigned" : "signed",
6718 operation_precision
, (gimple
*) stmt
);
6719 vect_set_operation_type (stmt_info
, type
, operation_precision
,
6722 vect_set_min_input_precision (stmt_info
, type
, min_input_precision
);
6725 /* Return true if the statement described by STMT_INFO sets a boolean
6726 SSA_NAME and if we know how to vectorize this kind of statement using
6727 vector mask types. */
6730 possible_vector_mask_operation_p (stmt_vec_info stmt_info
)
6732 tree lhs
= gimple_get_lhs (stmt_info
->stmt
);
6733 tree_code code
= ERROR_MARK
;
6734 gassign
*assign
= NULL
;
6737 if ((assign
= dyn_cast
<gassign
*> (stmt_info
->stmt
)))
6738 code
= gimple_assign_rhs_code (assign
);
6739 else if ((cond
= dyn_cast
<gcond
*> (stmt_info
->stmt
)))
6741 lhs
= gimple_cond_lhs (cond
);
6742 code
= gimple_cond_code (cond
);
6746 || TREE_CODE (lhs
) != SSA_NAME
6747 || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs
)))
6750 if (code
!= ERROR_MARK
)
6763 return TREE_CODE_CLASS (code
) == tcc_comparison
;
6766 else if (is_a
<gphi
*> (stmt_info
->stmt
))
6771 /* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
6772 a vector mask type instead of a normal vector type. Record the
6773 result in STMT_INFO->mask_precision. */
6776 vect_determine_mask_precision (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6778 if (!possible_vector_mask_operation_p (stmt_info
))
6781 /* If at least one boolean input uses a vector mask type,
6782 pick the mask type with the narrowest elements.
6784 ??? This is the traditional behavior. It should always produce
6785 the smallest number of operations, but isn't necessarily the
6786 optimal choice. For example, if we have:
6792 - the user of a wants it to have a mask type for 16-bit elements (M16)
6794 - c uses a mask type for 8-bit elements (M8)
6796 then picking M8 gives:
6798 - 1 M16->M8 pack for b
6800 - 2 M8->M16 unpacks for the user of a
6802 whereas picking M16 would have given:
6804 - 2 M8->M16 unpacks for c
6807 The number of operations are equal, but M16 would have given
6808 a shorter dependency chain and allowed more ILP. */
6809 unsigned int precision
= ~0U;
6810 gimple
*stmt
= STMT_VINFO_STMT (stmt_info
);
6812 /* If the statement compares two values that shouldn't use vector masks,
6813 try comparing the values as normal scalars instead. */
6814 tree_code code
= ERROR_MARK
;
6816 unsigned int nops
= -1;
6817 unsigned int ops_start
= 0;
6819 if (gassign
*assign
= dyn_cast
<gassign
*> (stmt
))
6821 code
= gimple_assign_rhs_code (assign
);
6822 op0_type
= TREE_TYPE (gimple_assign_rhs1 (assign
));
6823 nops
= gimple_num_ops (assign
);
6826 else if (gcond
*cond
= dyn_cast
<gcond
*> (stmt
))
6828 code
= gimple_cond_code (cond
);
6829 op0_type
= TREE_TYPE (gimple_cond_lhs (cond
));
6834 if (code
!= ERROR_MARK
)
6836 for (unsigned int i
= ops_start
; i
< nops
; ++i
)
6838 tree rhs
= gimple_op (stmt
, i
);
6839 if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs
)))
6842 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
6844 /* Don't let external or constant operands influence the choice.
6845 We can convert them to whichever vector type we pick. */
6848 if (def_stmt_info
->mask_precision
)
6850 if (precision
> def_stmt_info
->mask_precision
)
6851 precision
= def_stmt_info
->mask_precision
;
6855 if (precision
== ~0U
6856 && TREE_CODE_CLASS (code
) == tcc_comparison
)
6859 tree vectype
, mask_type
;
6860 if (is_a
<scalar_mode
> (TYPE_MODE (op0_type
), &mode
)
6861 && (vectype
= get_vectype_for_scalar_type (vinfo
, op0_type
))
6862 && (mask_type
= get_mask_type_for_scalar_type (vinfo
, op0_type
))
6863 && expand_vec_cmp_expr_p (vectype
, mask_type
, code
))
6864 precision
= GET_MODE_BITSIZE (mode
);
6869 gphi
*phi
= as_a
<gphi
*> (stmt_info
->stmt
);
6870 for (unsigned i
= 0; i
< gimple_phi_num_args (phi
); ++i
)
6872 tree rhs
= gimple_phi_arg_def (phi
, i
);
6874 stmt_vec_info def_stmt_info
= vinfo
->lookup_def (rhs
);
6876 /* Don't let external or constant operands influence the choice.
6877 We can convert them to whichever vector type we pick. */
6880 if (def_stmt_info
->mask_precision
)
6882 if (precision
> def_stmt_info
->mask_precision
)
6883 precision
= def_stmt_info
->mask_precision
;
6888 if (dump_enabled_p ())
6890 if (precision
== ~0U)
6891 dump_printf_loc (MSG_NOTE
, vect_location
,
6892 "using normal nonmask vectors for %G",
6895 dump_printf_loc (MSG_NOTE
, vect_location
,
6896 "using boolean precision %d for %G",
6897 precision
, stmt_info
->stmt
);
6900 stmt_info
->mask_precision
= precision
;
6903 /* Handle vect_determine_precisions for STMT_INFO, given that we
6904 have already done so for the users of its result. */
6907 vect_determine_stmt_precisions (vec_info
*vinfo
, stmt_vec_info stmt_info
)
6909 vect_determine_min_output_precision (vinfo
, stmt_info
);
6910 if (gassign
*stmt
= dyn_cast
<gassign
*> (stmt_info
->stmt
))
6912 vect_determine_precisions_from_range (stmt_info
, stmt
);
6913 vect_determine_precisions_from_users (stmt_info
, stmt
);
6917 /* Walk backwards through the vectorizable region to determine the
6918 values of these fields:
6920 - min_output_precision
6921 - min_input_precision
6922 - operation_precision
6923 - operation_sign. */
6926 vect_determine_precisions (vec_info
*vinfo
)
6928 DUMP_VECT_SCOPE ("vect_determine_precisions");
6930 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
6932 class loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
6933 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
6934 unsigned int nbbs
= loop
->num_nodes
;
6936 for (unsigned int i
= 0; i
< nbbs
; i
++)
6938 basic_block bb
= bbs
[i
];
6939 for (auto gsi
= gsi_start_phis (bb
);
6940 !gsi_end_p (gsi
); gsi_next (&gsi
))
6942 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6944 vect_determine_mask_precision (vinfo
, stmt_info
);
6946 for (auto si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
6947 if (!is_gimple_debug (gsi_stmt (si
)))
6948 vect_determine_mask_precision
6949 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
6951 for (unsigned int i
= 0; i
< nbbs
; i
++)
6953 basic_block bb
= bbs
[nbbs
- i
- 1];
6954 for (gimple_stmt_iterator si
= gsi_last_bb (bb
);
6955 !gsi_end_p (si
); gsi_prev (&si
))
6956 if (!is_gimple_debug (gsi_stmt (si
)))
6957 vect_determine_stmt_precisions
6958 (vinfo
, vinfo
->lookup_stmt (gsi_stmt (si
)));
6959 for (auto gsi
= gsi_start_phis (bb
);
6960 !gsi_end_p (gsi
); gsi_next (&gsi
))
6962 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6964 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6970 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
6971 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
6973 basic_block bb
= bb_vinfo
->bbs
[i
];
6974 for (auto gsi
= gsi_start_phis (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6976 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
6977 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6978 vect_determine_mask_precision (vinfo
, stmt_info
);
6980 for (auto gsi
= gsi_start_bb (bb
); !gsi_end_p (gsi
); gsi_next (&gsi
))
6982 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
6983 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6984 vect_determine_mask_precision (vinfo
, stmt_info
);
6987 for (int i
= bb_vinfo
->bbs
.length () - 1; i
!= -1; --i
)
6989 for (gimple_stmt_iterator gsi
= gsi_last_bb (bb_vinfo
->bbs
[i
]);
6990 !gsi_end_p (gsi
); gsi_prev (&gsi
))
6992 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (gsi
));
6993 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
6994 vect_determine_stmt_precisions (vinfo
, stmt_info
);
6996 for (auto gsi
= gsi_start_phis (bb_vinfo
->bbs
[i
]);
6997 !gsi_end_p (gsi
); gsi_next (&gsi
))
6999 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi
.phi ());
7000 if (stmt_info
&& STMT_VINFO_VECTORIZABLE (stmt_info
))
7001 vect_determine_stmt_precisions (vinfo
, stmt_info
);
7007 typedef gimple
*(*vect_recog_func_ptr
) (vec_info
*, stmt_vec_info
, tree
*);
7009 struct vect_recog_func
7011 vect_recog_func_ptr fn
;
7015 /* Note that ordering matters - the first pattern matching on a stmt is
7016 taken which means usually the more complex one needs to preceed the
7017 less comples onex (widen_sum only after dot_prod or sad for example). */
7018 static vect_recog_func vect_vect_recog_func_ptrs
[] = {
7019 { vect_recog_bitfield_ref_pattern
, "bitfield_ref" },
7020 { vect_recog_bit_insert_pattern
, "bit_insert" },
7021 { vect_recog_abd_pattern
, "abd" },
7022 { vect_recog_over_widening_pattern
, "over_widening" },
7023 /* Must come after over_widening, which narrows the shift as much as
7024 possible beforehand. */
7025 { vect_recog_average_pattern
, "average" },
7026 { vect_recog_cond_expr_convert_pattern
, "cond_expr_convert" },
7027 { vect_recog_mulhs_pattern
, "mult_high" },
7028 { vect_recog_cast_forwprop_pattern
, "cast_forwprop" },
7029 { vect_recog_widen_mult_pattern
, "widen_mult" },
7030 { vect_recog_dot_prod_pattern
, "dot_prod" },
7031 { vect_recog_sad_pattern
, "sad" },
7032 { vect_recog_widen_sum_pattern
, "widen_sum" },
7033 { vect_recog_pow_pattern
, "pow" },
7034 { vect_recog_popcount_clz_ctz_ffs_pattern
, "popcount_clz_ctz_ffs" },
7035 { vect_recog_ctz_ffs_pattern
, "ctz_ffs" },
7036 { vect_recog_widen_shift_pattern
, "widen_shift" },
7037 { vect_recog_rotate_pattern
, "rotate" },
7038 { vect_recog_vector_vector_shift_pattern
, "vector_vector_shift" },
7039 { vect_recog_divmod_pattern
, "divmod" },
7040 { vect_recog_mult_pattern
, "mult" },
7041 { vect_recog_sat_add_pattern
, "sat_add" },
7042 { vect_recog_mixed_size_cond_pattern
, "mixed_size_cond" },
7043 { vect_recog_gcond_pattern
, "gcond" },
7044 { vect_recog_bool_pattern
, "bool" },
7045 /* This must come before mask conversion, and includes the parts
7046 of mask conversion that are needed for gather and scatter
7047 internal functions. */
7048 { vect_recog_gather_scatter_pattern
, "gather_scatter" },
7049 { vect_recog_mask_conversion_pattern
, "mask_conversion" },
7050 { vect_recog_widen_plus_pattern
, "widen_plus" },
7051 { vect_recog_widen_minus_pattern
, "widen_minus" },
7052 { vect_recog_widen_abd_pattern
, "widen_abd" },
7053 /* These must come after the double widening ones. */
7056 const unsigned int NUM_PATTERNS
= ARRAY_SIZE (vect_vect_recog_func_ptrs
);
7058 /* Mark statements that are involved in a pattern. */
7061 vect_mark_pattern_stmts (vec_info
*vinfo
,
7062 stmt_vec_info orig_stmt_info
, gimple
*pattern_stmt
,
7063 tree pattern_vectype
)
7065 stmt_vec_info orig_stmt_info_saved
= orig_stmt_info
;
7066 gimple
*def_seq
= STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
7068 gimple
*orig_pattern_stmt
= NULL
;
7069 if (is_pattern_stmt_p (orig_stmt_info
))
7071 /* We're replacing a statement in an existing pattern definition
7073 orig_pattern_stmt
= orig_stmt_info
->stmt
;
7074 if (dump_enabled_p ())
7075 dump_printf_loc (MSG_NOTE
, vect_location
,
7076 "replacing earlier pattern %G", orig_pattern_stmt
);
7078 /* To keep the book-keeping simple, just swap the lhs of the
7079 old and new statements, so that the old one has a valid but
7081 tree old_lhs
= gimple_get_lhs (orig_pattern_stmt
);
7082 gimple_set_lhs (orig_pattern_stmt
, gimple_get_lhs (pattern_stmt
));
7083 gimple_set_lhs (pattern_stmt
, old_lhs
);
7085 if (dump_enabled_p ())
7086 dump_printf_loc (MSG_NOTE
, vect_location
, "with %G", pattern_stmt
);
7088 /* Switch to the statement that ORIG replaces. */
7089 orig_stmt_info
= STMT_VINFO_RELATED_STMT (orig_stmt_info
);
7091 /* We shouldn't be replacing the main pattern statement. */
7092 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info
)->stmt
7093 != orig_pattern_stmt
);
7097 for (gimple_stmt_iterator si
= gsi_start (def_seq
);
7098 !gsi_end_p (si
); gsi_next (&si
))
7100 if (dump_enabled_p ())
7101 dump_printf_loc (MSG_NOTE
, vect_location
,
7102 "extra pattern stmt: %G", gsi_stmt (si
));
7103 stmt_vec_info pattern_stmt_info
7104 = vect_init_pattern_stmt (vinfo
, gsi_stmt (si
),
7105 orig_stmt_info
, pattern_vectype
);
7106 /* Stmts in the def sequence are not vectorizable cycle or
7107 induction defs, instead they should all be vect_internal_def
7108 feeding the main pattern stmt which retains this def type. */
7109 STMT_VINFO_DEF_TYPE (pattern_stmt_info
) = vect_internal_def
;
7112 if (orig_pattern_stmt
)
7114 vect_init_pattern_stmt (vinfo
, pattern_stmt
,
7115 orig_stmt_info
, pattern_vectype
);
7117 /* Insert all the new pattern statements before the original one. */
7118 gimple_seq
*orig_def_seq
= &STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
7119 gimple_stmt_iterator gsi
= gsi_for_stmt (orig_pattern_stmt
,
7121 gsi_insert_seq_before_without_update (&gsi
, def_seq
, GSI_SAME_STMT
);
7122 gsi_insert_before_without_update (&gsi
, pattern_stmt
, GSI_SAME_STMT
);
7124 /* Remove the pattern statement that this new pattern replaces. */
7125 gsi_remove (&gsi
, false);
7128 vect_set_pattern_stmt (vinfo
,
7129 pattern_stmt
, orig_stmt_info
, pattern_vectype
);
7131 /* For any conditionals mark them as vect_condition_def. */
7132 if (is_a
<gcond
*> (pattern_stmt
))
7133 STMT_VINFO_DEF_TYPE (STMT_VINFO_RELATED_STMT (orig_stmt_info
)) = vect_condition_def
;
7135 /* Transfer reduction path info to the pattern. */
7136 if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved
) != -1)
7139 if (!gimple_extract_op (orig_stmt_info_saved
->stmt
, &op
))
7141 tree lookfor
= op
.ops
[STMT_VINFO_REDUC_IDX (orig_stmt_info
)];
7142 /* Search the pattern def sequence and the main pattern stmt. Note
7143 we may have inserted all into a containing pattern def sequence
7144 so the following is a bit awkward. */
7145 gimple_stmt_iterator si
;
7149 si
= gsi_start (def_seq
);
7161 if (gimple_extract_op (s
, &op
))
7162 for (unsigned i
= 0; i
< op
.num_ops
; ++i
)
7163 if (op
.ops
[i
] == lookfor
)
7165 STMT_VINFO_REDUC_IDX (vinfo
->lookup_stmt (s
)) = i
;
7166 lookfor
= gimple_get_lhs (s
);
7170 if (s
== pattern_stmt
)
7172 if (!found
&& dump_enabled_p ())
7173 dump_printf_loc (MSG_NOTE
, vect_location
,
7174 "failed to update reduction index.\n");
7182 if (s
== pattern_stmt
)
7183 /* Found the end inside a bigger pattern def seq. */
7192 /* Function vect_pattern_recog_1
7195 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
7196 computation pattern.
7197 STMT_INFO: A stmt from which the pattern search should start.
7199 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates
7200 a sequence of statements that has the same functionality and can be
7201 used to replace STMT_INFO. It returns the last statement in the sequence
7202 and adds any earlier statements to STMT_INFO's STMT_VINFO_PATTERN_DEF_SEQ.
7203 PATTERN_RECOG_FUNC also sets *TYPE_OUT to the vector type of the final
7204 statement, having first checked that the target supports the new operation
7207 This function also does some bookkeeping, as explained in the documentation
7208 for vect_recog_pattern. */
7211 vect_pattern_recog_1 (vec_info
*vinfo
,
7212 vect_recog_func
*recog_func
, stmt_vec_info stmt_info
)
7214 gimple
*pattern_stmt
;
7215 tree pattern_vectype
;
7217 /* If this statement has already been replaced with pattern statements,
7218 leave the original statement alone, since the first match wins.
7219 Instead try to match against the definition statements that feed
7220 the main pattern statement. */
7221 if (STMT_VINFO_IN_PATTERN_P (stmt_info
))
7223 gimple_stmt_iterator gsi
;
7224 for (gsi
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7225 !gsi_end_p (gsi
); gsi_next (&gsi
))
7226 vect_pattern_recog_1 (vinfo
, recog_func
,
7227 vinfo
->lookup_stmt (gsi_stmt (gsi
)));
7231 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
));
7232 pattern_stmt
= recog_func
->fn (vinfo
, stmt_info
, &pattern_vectype
);
7235 /* Clear any half-formed pattern definition sequence. */
7236 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
7240 /* Found a vectorizable pattern. */
7241 if (dump_enabled_p ())
7242 dump_printf_loc (MSG_NOTE
, vect_location
,
7243 "%s pattern recognized: %G",
7244 recog_func
->name
, pattern_stmt
);
7246 /* Mark the stmts that are involved in the pattern. */
7247 vect_mark_pattern_stmts (vinfo
, stmt_info
, pattern_stmt
, pattern_vectype
);
7251 /* Function vect_pattern_recog
7254 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
7257 Output - for each computation idiom that is detected we create a new stmt
7258 that provides the same functionality and that can be vectorized. We
7259 also record some information in the struct_stmt_info of the relevant
7260 stmts, as explained below:
7262 At the entry to this function we have the following stmts, with the
7263 following initial value in the STMT_VINFO fields:
7265 stmt in_pattern_p related_stmt vec_stmt
7266 S1: a_i = .... - - -
7267 S2: a_2 = ..use(a_i).. - - -
7268 S3: a_1 = ..use(a_2).. - - -
7269 S4: a_0 = ..use(a_1).. - - -
7270 S5: ... = ..use(a_0).. - - -
7272 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
7273 represented by a single stmt. We then:
7274 - create a new stmt S6 equivalent to the pattern (the stmt is not
7275 inserted into the code)
7276 - fill in the STMT_VINFO fields as follows:
7278 in_pattern_p related_stmt vec_stmt
7279 S1: a_i = .... - - -
7280 S2: a_2 = ..use(a_i).. - - -
7281 S3: a_1 = ..use(a_2).. - - -
7282 S4: a_0 = ..use(a_1).. true S6 -
7283 '---> S6: a_new = .... - S4 -
7284 S5: ... = ..use(a_0).. - - -
7286 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
7287 to each other through the RELATED_STMT field).
7289 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
7290 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
7291 remain irrelevant unless used by stmts other than S4.
7293 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
7294 (because they are marked as irrelevant). It will vectorize S6, and record
7295 a pointer to the new vector stmt VS6 from S6 (as usual).
7296 S4 will be skipped, and S5 will be vectorized as usual:
7298 in_pattern_p related_stmt vec_stmt
7299 S1: a_i = .... - - -
7300 S2: a_2 = ..use(a_i).. - - -
7301 S3: a_1 = ..use(a_2).. - - -
7302 > VS6: va_new = .... - - -
7303 S4: a_0 = ..use(a_1).. true S6 VS6
7304 '---> S6: a_new = .... - S4 VS6
7305 > VS5: ... = ..vuse(va_new).. - - -
7306 S5: ... = ..use(a_0).. - - -
7308 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
7309 elsewhere), and we'll end up with:
7312 VS5: ... = ..vuse(va_new)..
7314 In case of more than one pattern statements, e.g., widen-mult with
7318 S2 a_T = (TYPE) a_t;
7319 '--> S3: a_it = (interm_type) a_t;
7320 S4 prod_T = a_T * CONST;
7321 '--> S5: prod_T' = a_it w* CONST;
7323 there may be other users of a_T outside the pattern. In that case S2 will
7324 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
7325 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
7326 be recorded in S3. */
7329 vect_pattern_recog (vec_info
*vinfo
)
7334 gimple_stmt_iterator si
;
7337 vect_determine_precisions (vinfo
);
7339 DUMP_VECT_SCOPE ("vect_pattern_recog");
7341 if (loop_vec_info loop_vinfo
= dyn_cast
<loop_vec_info
> (vinfo
))
7343 loop
= LOOP_VINFO_LOOP (loop_vinfo
);
7344 bbs
= LOOP_VINFO_BBS (loop_vinfo
);
7345 nbbs
= loop
->num_nodes
;
7347 /* Scan through the loop stmts, applying the pattern recognition
7348 functions starting at each stmt visited: */
7349 for (i
= 0; i
< nbbs
; i
++)
7351 basic_block bb
= bbs
[i
];
7352 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
7354 if (is_gimple_debug (gsi_stmt (si
)))
7356 stmt_vec_info stmt_info
= vinfo
->lookup_stmt (gsi_stmt (si
));
7357 /* Scan over all generic vect_recog_xxx_pattern functions. */
7358 for (j
= 0; j
< NUM_PATTERNS
; j
++)
7359 vect_pattern_recog_1 (vinfo
, &vect_vect_recog_func_ptrs
[j
],
7366 bb_vec_info bb_vinfo
= as_a
<bb_vec_info
> (vinfo
);
7367 for (unsigned i
= 0; i
< bb_vinfo
->bbs
.length (); ++i
)
7368 for (gimple_stmt_iterator gsi
= gsi_start_bb (bb_vinfo
->bbs
[i
]);
7369 !gsi_end_p (gsi
); gsi_next (&gsi
))
7371 stmt_vec_info stmt_info
= bb_vinfo
->lookup_stmt (gsi_stmt (gsi
));
7372 if (!stmt_info
|| !STMT_VINFO_VECTORIZABLE (stmt_info
))
7375 /* Scan over all generic vect_recog_xxx_pattern functions. */
7376 for (j
= 0; j
< NUM_PATTERNS
; j
++)
7377 vect_pattern_recog_1 (vinfo
,
7378 &vect_vect_recog_func_ptrs
[j
], stmt_info
);
7382 /* After this no more add_stmt calls are allowed. */
7383 vinfo
->stmt_vec_info_ro
= true;
7386 /* Build a GIMPLE_ASSIGN or GIMPLE_CALL with the tree_code,
7387 or internal_fn contained in ch, respectively. */
7389 vect_gimple_build (tree lhs
, code_helper ch
, tree op0
, tree op1
)
7391 gcc_assert (op0
!= NULL_TREE
);
7392 if (ch
.is_tree_code ())
7393 return gimple_build_assign (lhs
, (tree_code
) ch
, op0
, op1
);
7395 gcc_assert (ch
.is_internal_fn ());
7396 gimple
* stmt
= gimple_build_call_internal (as_internal_fn ((combined_fn
) ch
),
7397 op1
== NULL_TREE
? 1 : 2,
7399 gimple_call_set_lhs (stmt
, lhs
);