1 /* Analysis Utilities for Loop Vectorization.
2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
3 Free Software Foundation, Inc.
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
6 This file is part of GCC.
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
29 #include "basic-block.h"
30 #include "gimple-pretty-print.h"
31 #include "tree-flow.h"
32 #include "tree-dump.h"
37 #include "tree-data-ref.h"
38 #include "tree-vectorizer.h"
40 #include "diagnostic-core.h"
42 /* Pattern recognition functions */
43 static gimple
vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **, tree
*,
45 static gimple
vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **, tree
*,
47 static gimple
vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **, tree
*,
49 static gimple
vect_recog_pow_pattern (VEC (gimple
, heap
) **, tree
*, tree
*);
50 static gimple
vect_recog_over_widening_pattern (VEC (gimple
, heap
) **, tree
*,
52 static gimple
vect_recog_widen_shift_pattern (VEC (gimple
, heap
) **,
54 static gimple
vect_recog_vector_vector_shift_pattern (VEC (gimple
, heap
) **,
56 static gimple
vect_recog_sdivmod_pow2_pattern (VEC (gimple
, heap
) **,
58 static gimple
vect_recog_mixed_size_cond_pattern (VEC (gimple
, heap
) **,
60 static gimple
vect_recog_bool_pattern (VEC (gimple
, heap
) **, tree
*, tree
*);
61 static vect_recog_func_ptr vect_vect_recog_func_ptrs
[NUM_PATTERNS
] = {
62 vect_recog_widen_mult_pattern
,
63 vect_recog_widen_sum_pattern
,
64 vect_recog_dot_prod_pattern
,
65 vect_recog_pow_pattern
,
66 vect_recog_over_widening_pattern
,
67 vect_recog_widen_shift_pattern
,
68 vect_recog_vector_vector_shift_pattern
,
69 vect_recog_sdivmod_pow2_pattern
,
70 vect_recog_mixed_size_cond_pattern
,
71 vect_recog_bool_pattern
};
74 append_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
),
81 new_pattern_def_seq (stmt_vec_info stmt_info
, gimple stmt
)
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info
) = NULL
;
84 append_pattern_def_seq (stmt_info
, stmt
);
87 /* Function widened_name_p
89 Check whether NAME, an ssa-name used in USE_STMT,
90 is a result of a type-promotion, such that:
91 DEF_STMT: NAME = NOP (name0)
92 where the type of name0 (HALF_TYPE) is smaller than the type of NAME.
93 If CHECK_SIGN is TRUE, check that either both types are signed or both are
97 widened_name_p (tree name
, gimple use_stmt
, tree
*half_type
, gimple
*def_stmt
,
102 loop_vec_info loop_vinfo
;
103 stmt_vec_info stmt_vinfo
;
104 tree type
= TREE_TYPE (name
);
106 enum vect_def_type dt
;
109 stmt_vinfo
= vinfo_for_stmt (use_stmt
);
110 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
112 if (!vect_is_simple_use (name
, loop_vinfo
, NULL
, def_stmt
, &def
, &dt
))
115 if (dt
!= vect_internal_def
116 && dt
!= vect_external_def
&& dt
!= vect_constant_def
)
122 if (!is_gimple_assign (*def_stmt
))
125 if (gimple_assign_rhs_code (*def_stmt
) != NOP_EXPR
)
128 oprnd0
= gimple_assign_rhs1 (*def_stmt
);
130 *half_type
= TREE_TYPE (oprnd0
);
131 if (!INTEGRAL_TYPE_P (type
) || !INTEGRAL_TYPE_P (*half_type
)
132 || ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (*half_type
)) && check_sign
)
133 || (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 2)))
136 if (!vect_is_simple_use (oprnd0
, loop_vinfo
, NULL
, &dummy_gimple
, &dummy
,
143 /* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
144 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
147 vect_recog_temp_ssa_var (tree type
, gimple stmt
)
149 tree var
= create_tmp_var (type
, "patt");
151 add_referenced_var (var
);
152 var
= make_ssa_name (var
, stmt
);
156 /* Function vect_recog_dot_prod_pattern
158 Try to find the following pattern:
164 sum_0 = phi <init, sum_1>
167 S3 x_T = (TYPE1) x_t;
168 S4 y_T = (TYPE1) y_t;
170 [S6 prod = (TYPE2) prod; #optional]
171 S7 sum_1 = prod + sum_0;
173 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
174 same size of 'TYPE1' or bigger. This is a special case of a reduction
179 * STMTS: Contains a stmt from which the pattern search begins. In the
180 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
185 * TYPE_IN: The type of the input arguments to the pattern.
187 * TYPE_OUT: The type of the output of this pattern.
189 * Return value: A new stmt that will be used to replace the sequence of
190 stmts that constitute the pattern. In this case it will be:
191 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
193 Note: The dot-prod idiom is a widening reduction pattern that is
194 vectorized without preserving all the intermediate results. It
195 produces only N/2 (widened) results (by summing up pairs of
196 intermediate results) rather than all N results. Therefore, we
197 cannot allow this pattern when we want to get all the results and in
198 the correct order (as is the case when this computation is in an
199 inner-loop nested in an outer-loop that us being vectorized). */
202 vect_recog_dot_prod_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
205 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
207 tree oprnd00
, oprnd01
;
208 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
209 tree type
, half_type
;
212 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
213 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
216 if (!is_gimple_assign (last_stmt
))
219 type
= gimple_expr_type (last_stmt
);
221 /* Look for the following pattern
225 DDPROD = (TYPE2) DPROD;
226 sum_1 = DDPROD + sum_0;
228 - DX is double the size of X
229 - DY is double the size of Y
230 - DX, DY, DPROD all have the same type
231 - sum is the same size of DPROD or bigger
232 - sum has been recognized as a reduction variable.
234 This is equivalent to:
235 DPROD = X w* Y; #widen mult
236 sum_1 = DPROD w+ sum_0; #widen summation
238 DPROD = X w* Y; #widen mult
239 sum_1 = DPROD + sum_0; #summation
242 /* Starting from LAST_STMT, follow the defs of its uses in search
243 of the above pattern. */
245 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
248 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
250 /* Has been detected as widening-summation? */
252 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
253 type
= gimple_expr_type (stmt
);
254 if (gimple_assign_rhs_code (stmt
) != WIDEN_SUM_EXPR
)
256 oprnd0
= gimple_assign_rhs1 (stmt
);
257 oprnd1
= gimple_assign_rhs2 (stmt
);
258 half_type
= TREE_TYPE (oprnd0
);
264 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
266 oprnd0
= gimple_assign_rhs1 (last_stmt
);
267 oprnd1
= gimple_assign_rhs2 (last_stmt
);
268 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
269 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
273 if (widened_name_p (oprnd0
, stmt
, &half_type
, &def_stmt
, true))
276 oprnd0
= gimple_assign_rhs1 (stmt
);
282 /* So far so good. Since last_stmt was detected as a (summation) reduction,
283 we know that oprnd1 is the reduction variable (defined by a loop-header
284 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
285 Left to check that oprnd0 is defined by a (widen_)mult_expr */
286 if (TREE_CODE (oprnd0
) != SSA_NAME
)
289 prod_type
= half_type
;
290 stmt
= SSA_NAME_DEF_STMT (oprnd0
);
292 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
293 if (!gimple_bb (stmt
) || !flow_bb_inside_loop_p (loop
, gimple_bb (stmt
)))
296 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
297 inside the loop (in case we are analyzing an outer-loop). */
298 if (!is_gimple_assign (stmt
))
300 stmt_vinfo
= vinfo_for_stmt (stmt
);
301 gcc_assert (stmt_vinfo
);
302 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
304 if (gimple_assign_rhs_code (stmt
) != MULT_EXPR
)
306 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
308 /* Has been detected as a widening multiplication? */
310 stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
311 if (gimple_assign_rhs_code (stmt
) != WIDEN_MULT_EXPR
)
313 stmt_vinfo
= vinfo_for_stmt (stmt
);
314 gcc_assert (stmt_vinfo
);
315 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo
) == vect_internal_def
);
316 oprnd00
= gimple_assign_rhs1 (stmt
);
317 oprnd01
= gimple_assign_rhs2 (stmt
);
321 tree half_type0
, half_type1
;
325 oprnd0
= gimple_assign_rhs1 (stmt
);
326 oprnd1
= gimple_assign_rhs2 (stmt
);
327 if (!types_compatible_p (TREE_TYPE (oprnd0
), prod_type
)
328 || !types_compatible_p (TREE_TYPE (oprnd1
), prod_type
))
330 if (!widened_name_p (oprnd0
, stmt
, &half_type0
, &def_stmt
, true))
332 oprnd00
= gimple_assign_rhs1 (def_stmt
);
333 if (!widened_name_p (oprnd1
, stmt
, &half_type1
, &def_stmt
, true))
335 oprnd01
= gimple_assign_rhs1 (def_stmt
);
336 if (!types_compatible_p (half_type0
, half_type1
))
338 if (TYPE_PRECISION (prod_type
) != TYPE_PRECISION (half_type0
) * 2)
342 half_type
= TREE_TYPE (oprnd00
);
343 *type_in
= half_type
;
346 /* Pattern detected. Create a stmt to be used to replace the pattern: */
347 var
= vect_recog_temp_ssa_var (type
, NULL
);
348 pattern_stmt
= gimple_build_assign_with_ops3 (DOT_PROD_EXPR
, var
,
349 oprnd00
, oprnd01
, oprnd1
);
351 if (vect_print_dump_info (REPORT_DETAILS
))
353 fprintf (vect_dump
, "vect_recog_dot_prod_pattern: detected: ");
354 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
357 /* We don't allow changing the order of the computation in the inner-loop
358 when doing outer-loop vectorization. */
359 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
365 /* Handle widening operation by a constant. At the moment we support MULT_EXPR
368 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
369 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
371 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
372 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
373 that satisfies the above restrictions, we can perform a widening opeartion
374 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
375 with a_it = (interm_type) a_t; */
378 vect_handle_widen_op_by_const (gimple stmt
, enum tree_code code
,
379 tree const_oprnd
, tree
*oprnd
,
380 VEC (gimple
, heap
) **stmts
, tree type
,
381 tree
*half_type
, gimple def_stmt
)
383 tree new_type
, new_oprnd
, tmp
;
385 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
));
386 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
388 if (code
!= MULT_EXPR
&& code
!= LSHIFT_EXPR
)
391 if (((code
== MULT_EXPR
&& int_fits_type_p (const_oprnd
, *half_type
))
392 || (code
== LSHIFT_EXPR
393 && compare_tree_int (const_oprnd
, TYPE_PRECISION (*half_type
))
395 && TYPE_PRECISION (type
) == (TYPE_PRECISION (*half_type
) * 2))
397 /* CONST_OPRND is a constant of HALF_TYPE. */
398 *oprnd
= gimple_assign_rhs1 (def_stmt
);
402 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (*half_type
) * 4)
403 || !gimple_bb (def_stmt
)
404 || !flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
405 || !vinfo_for_stmt (def_stmt
))
408 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
409 a type 2 times bigger than HALF_TYPE. */
410 new_type
= build_nonstandard_integer_type (TYPE_PRECISION (type
) / 2,
411 TYPE_UNSIGNED (type
));
412 if ((code
== MULT_EXPR
&& !int_fits_type_p (const_oprnd
, new_type
))
413 || (code
== LSHIFT_EXPR
414 && compare_tree_int (const_oprnd
, TYPE_PRECISION (new_type
)) == 1))
417 /* Use NEW_TYPE for widening operation. */
418 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
420 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
421 /* Check if the already created pattern stmt is what we need. */
422 if (!is_gimple_assign (new_stmt
)
423 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
424 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != new_type
)
427 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
428 *oprnd
= gimple_assign_lhs (new_stmt
);
432 /* Create a_T = (NEW_TYPE) a_t; */
433 *oprnd
= gimple_assign_rhs1 (def_stmt
);
434 tmp
= create_tmp_var (new_type
, NULL
);
435 add_referenced_var (tmp
);
436 new_oprnd
= make_ssa_name (tmp
, NULL
);
437 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
, *oprnd
,
439 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
440 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
444 *half_type
= new_type
;
449 /* Function vect_recog_widen_mult_pattern
451 Try to find the following pattern:
454 TYPE a_T, b_T, prod_T;
460 S5 prod_T = a_T * b_T;
462 where type 'TYPE' is at least double the size of type 'type'.
464 Also detect unsgigned cases:
466 unsigned type a_t, b_t;
467 unsigned TYPE u_prod_T;
468 TYPE a_T, b_T, prod_T;
474 S5 prod_T = a_T * b_T;
475 S6 u_prod_T = (unsigned TYPE) prod_T;
477 and multiplication by constants:
484 S5 prod_T = a_T * CONST;
486 A special case of multiplication by constants is when 'TYPE' is 4 times
487 bigger than 'type', but CONST fits an intermediate type 2 times smaller
488 than 'TYPE'. In that case we create an additional pattern stmt for S3
489 to create a variable of the intermediate type, and perform widen-mult
490 on the intermediate type as well:
494 TYPE a_T, prod_T, prod_T';
498 '--> a_it = (interm_type) a_t;
499 S5 prod_T = a_T * CONST;
500 '--> prod_T' = a_it w* CONST;
504 * STMTS: Contains a stmt from which the pattern search begins. In the
505 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
506 is detected. In case of unsigned widen-mult, the original stmt (S5) is
507 replaced with S6 in STMTS. In case of multiplication by a constant
508 of an intermediate type (the last case above), STMTS also contains S3
509 (inserted before S5).
513 * TYPE_IN: The type of the input arguments to the pattern.
515 * TYPE_OUT: The type of the output of this pattern.
517 * Return value: A new stmt that will be used to replace the sequence of
518 stmts that constitute the pattern. In this case it will be:
519 WIDEN_MULT <a_t, b_t>
523 vect_recog_widen_mult_pattern (VEC (gimple
, heap
) **stmts
,
524 tree
*type_in
, tree
*type_out
)
526 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
527 gimple def_stmt0
, def_stmt1
;
529 tree type
, half_type0
, half_type1
;
531 tree vectype
, vectype_out
= NULL_TREE
;
534 enum tree_code dummy_code
;
536 VEC (tree
, heap
) *dummy_vec
;
539 if (!is_gimple_assign (last_stmt
))
542 type
= gimple_expr_type (last_stmt
);
544 /* Starting from LAST_STMT, follow the defs of its uses in search
545 of the above pattern. */
547 if (gimple_assign_rhs_code (last_stmt
) != MULT_EXPR
)
550 oprnd0
= gimple_assign_rhs1 (last_stmt
);
551 oprnd1
= gimple_assign_rhs2 (last_stmt
);
552 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
553 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
556 /* Check argument 0. */
557 if (!widened_name_p (oprnd0
, last_stmt
, &half_type0
, &def_stmt0
, false))
559 /* Check argument 1. */
560 op1_ok
= widened_name_p (oprnd1
, last_stmt
, &half_type1
, &def_stmt1
, false);
564 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
565 oprnd1
= gimple_assign_rhs1 (def_stmt1
);
569 if (TREE_CODE (oprnd1
) == INTEGER_CST
570 && TREE_CODE (half_type0
) == INTEGER_TYPE
571 && vect_handle_widen_op_by_const (last_stmt
, MULT_EXPR
, oprnd1
,
572 &oprnd0
, stmts
, type
,
573 &half_type0
, def_stmt0
))
574 half_type1
= half_type0
;
579 /* Handle unsigned case. Look for
580 S6 u_prod_T = (unsigned TYPE) prod_T;
581 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
582 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
584 tree lhs
= gimple_assign_lhs (last_stmt
), use_lhs
;
585 imm_use_iterator imm_iter
;
588 gimple use_stmt
= NULL
;
591 if (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (half_type1
))
594 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
596 if (is_gimple_debug (USE_STMT (use_p
)))
598 use_stmt
= USE_STMT (use_p
);
602 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
603 || gimple_assign_rhs_code (use_stmt
) != NOP_EXPR
)
606 use_lhs
= gimple_assign_lhs (use_stmt
);
607 use_type
= TREE_TYPE (use_lhs
);
608 if (!INTEGRAL_TYPE_P (use_type
)
609 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
610 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
614 last_stmt
= use_stmt
;
617 if (!types_compatible_p (half_type0
, half_type1
))
620 /* Pattern detected. */
621 if (vect_print_dump_info (REPORT_DETAILS
))
622 fprintf (vect_dump
, "vect_recog_widen_mult_pattern: detected: ");
624 /* Check target support */
625 vectype
= get_vectype_for_scalar_type (half_type0
);
626 vectype_out
= get_vectype_for_scalar_type (type
);
629 || !supportable_widening_operation (WIDEN_MULT_EXPR
, last_stmt
,
630 vectype_out
, vectype
,
631 &dummy
, &dummy
, &dummy_code
,
632 &dummy_code
, &dummy_int
, &dummy_vec
))
636 *type_out
= vectype_out
;
638 /* Pattern supported. Create a stmt to be used to replace the pattern: */
639 var
= vect_recog_temp_ssa_var (type
, NULL
);
640 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_MULT_EXPR
, var
, oprnd0
,
643 if (vect_print_dump_info (REPORT_DETAILS
))
644 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
646 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
651 /* Function vect_recog_pow_pattern
653 Try to find the following pattern:
657 with POW being one of pow, powf, powi, powif and N being
662 * LAST_STMT: A stmt from which the pattern search begins.
666 * TYPE_IN: The type of the input arguments to the pattern.
668 * TYPE_OUT: The type of the output of this pattern.
670 * Return value: A new stmt that will be used to replace the sequence of
671 stmts that constitute the pattern. In this case it will be:
678 vect_recog_pow_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
681 gimple last_stmt
= VEC_index (gimple
, *stmts
, 0);
682 tree fn
, base
, exp
= NULL
;
686 if (!is_gimple_call (last_stmt
) || gimple_call_lhs (last_stmt
) == NULL
)
689 fn
= gimple_call_fndecl (last_stmt
);
690 if (fn
== NULL_TREE
|| DECL_BUILT_IN_CLASS (fn
) != BUILT_IN_NORMAL
)
693 switch (DECL_FUNCTION_CODE (fn
))
699 base
= gimple_call_arg (last_stmt
, 0);
700 exp
= gimple_call_arg (last_stmt
, 1);
701 if (TREE_CODE (exp
) != REAL_CST
702 && TREE_CODE (exp
) != INTEGER_CST
)
710 /* We now have a pow or powi builtin function call with a constant
713 *type_out
= NULL_TREE
;
715 /* Catch squaring. */
716 if ((host_integerp (exp
, 0)
717 && tree_low_cst (exp
, 0) == 2)
718 || (TREE_CODE (exp
) == REAL_CST
719 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconst2
)))
721 *type_in
= TREE_TYPE (base
);
723 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), NULL
);
724 stmt
= gimple_build_assign_with_ops (MULT_EXPR
, var
, base
, base
);
728 /* Catch square root. */
729 if (TREE_CODE (exp
) == REAL_CST
730 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp
), dconsthalf
))
732 tree newfn
= mathfn_built_in (TREE_TYPE (base
), BUILT_IN_SQRT
);
733 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (base
));
736 gimple stmt
= gimple_build_call (newfn
, 1, base
);
737 if (vectorizable_function (stmt
, *type_in
, *type_in
)
740 var
= vect_recog_temp_ssa_var (TREE_TYPE (base
), stmt
);
741 gimple_call_set_lhs (stmt
, var
);
751 /* Function vect_recog_widen_sum_pattern
753 Try to find the following pattern:
756 TYPE x_T, sum = init;
758 sum_0 = phi <init, sum_1>
761 S3 sum_1 = x_T + sum_0;
763 where type 'TYPE' is at least double the size of type 'type', i.e - we're
764 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
765 a special case of a reduction computation.
769 * LAST_STMT: A stmt from which the pattern search begins. In the example,
770 when this function is called with S3, the pattern {S2,S3} will be detected.
774 * TYPE_IN: The type of the input arguments to the pattern.
776 * TYPE_OUT: The type of the output of this pattern.
778 * Return value: A new stmt that will be used to replace the sequence of
779 stmts that constitute the pattern. In this case it will be:
780 WIDEN_SUM <x_t, sum_0>
782 Note: The widening-sum idiom is a widening reduction pattern that is
783 vectorized without preserving all the intermediate results. It
784 produces only N/2 (widened) results (by summing up pairs of
785 intermediate results) rather than all N results. Therefore, we
786 cannot allow this pattern when we want to get all the results and in
787 the correct order (as is the case when this computation is in an
788 inner-loop nested in an outer-loop that us being vectorized). */
791 vect_recog_widen_sum_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
794 gimple stmt
, last_stmt
= VEC_index (gimple
, *stmts
, 0);
796 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
797 tree type
, half_type
;
799 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
800 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
803 if (!is_gimple_assign (last_stmt
))
806 type
= gimple_expr_type (last_stmt
);
808 /* Look for the following pattern
811 In which DX is at least double the size of X, and sum_1 has been
812 recognized as a reduction variable.
815 /* Starting from LAST_STMT, follow the defs of its uses in search
816 of the above pattern. */
818 if (gimple_assign_rhs_code (last_stmt
) != PLUS_EXPR
)
821 if (STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_reduction_def
)
824 oprnd0
= gimple_assign_rhs1 (last_stmt
);
825 oprnd1
= gimple_assign_rhs2 (last_stmt
);
826 if (!types_compatible_p (TREE_TYPE (oprnd0
), type
)
827 || !types_compatible_p (TREE_TYPE (oprnd1
), type
))
830 /* So far so good. Since last_stmt was detected as a (summation) reduction,
831 we know that oprnd1 is the reduction variable (defined by a loop-header
832 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
833 Left to check that oprnd0 is defined by a cast from type 'type' to type
836 if (!widened_name_p (oprnd0
, last_stmt
, &half_type
, &stmt
, true))
839 oprnd0
= gimple_assign_rhs1 (stmt
);
840 *type_in
= half_type
;
843 /* Pattern detected. Create a stmt to be used to replace the pattern: */
844 var
= vect_recog_temp_ssa_var (type
, NULL
);
845 pattern_stmt
= gimple_build_assign_with_ops (WIDEN_SUM_EXPR
, var
,
848 if (vect_print_dump_info (REPORT_DETAILS
))
850 fprintf (vect_dump
, "vect_recog_widen_sum_pattern: detected: ");
851 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
854 /* We don't allow changing the order of the computation in the inner-loop
855 when doing outer-loop vectorization. */
856 gcc_assert (!nested_in_vect_loop_p (loop
, last_stmt
));
862 /* Return TRUE if the operation in STMT can be performed on a smaller type.
865 STMT - a statement to check.
866 DEF - we support operations with two operands, one of which is constant.
867 The other operand can be defined by a demotion operation, or by a
868 previous statement in a sequence of over-promoted operations. In the
869 later case DEF is used to replace that operand. (It is defined by a
870 pattern statement we created for the previous statement in the
874 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
875 NULL, it's the type of DEF.
876 STMTS - additional pattern statements. If a pattern statement (type
877 conversion) is created in this function, its original statement is
881 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
882 operands to use in the new pattern statement for STMT (will be created
883 in vect_recog_over_widening_pattern ()).
884 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
885 statements for STMT: the first one is a type promotion and the second
886 one is the operation itself. We return the type promotion statement
887 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
888 the second pattern statement. */
891 vect_operation_fits_smaller_type (gimple stmt
, tree def
, tree
*new_type
,
892 tree
*op0
, tree
*op1
, gimple
*new_def_stmt
,
893 VEC (gimple
, heap
) **stmts
)
896 tree const_oprnd
, oprnd
;
897 tree interm_type
= NULL_TREE
, half_type
, tmp
, new_oprnd
, type
;
898 gimple def_stmt
, new_stmt
;
900 loop_vec_info loop_info
= STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt
));
901 struct loop
*loop
= LOOP_VINFO_LOOP (loop_info
);
905 *new_def_stmt
= NULL
;
907 if (!is_gimple_assign (stmt
))
910 code
= gimple_assign_rhs_code (stmt
);
911 if (code
!= LSHIFT_EXPR
&& code
!= RSHIFT_EXPR
912 && code
!= BIT_IOR_EXPR
&& code
!= BIT_XOR_EXPR
&& code
!= BIT_AND_EXPR
)
915 oprnd
= gimple_assign_rhs1 (stmt
);
916 const_oprnd
= gimple_assign_rhs2 (stmt
);
917 type
= gimple_expr_type (stmt
);
919 if (TREE_CODE (oprnd
) != SSA_NAME
920 || TREE_CODE (const_oprnd
) != INTEGER_CST
)
923 /* If we are in the middle of a sequence, we use DEF from a previous
924 statement. Otherwise, OPRND has to be a result of type promotion. */
927 half_type
= *new_type
;
933 if (!widened_name_p (oprnd
, stmt
, &half_type
, &def_stmt
, false)
934 || !gimple_bb (def_stmt
)
935 || !flow_bb_inside_loop_p (loop
, gimple_bb (def_stmt
))
936 || !vinfo_for_stmt (def_stmt
))
940 /* Can we perform the operation on a smaller type? */
946 if (!int_fits_type_p (const_oprnd
, half_type
))
948 /* HALF_TYPE is not enough. Try a bigger type if possible. */
949 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
952 interm_type
= build_nonstandard_integer_type (
953 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
954 if (!int_fits_type_p (const_oprnd
, interm_type
))
961 /* Try intermediate type - HALF_TYPE is not enough for sure. */
962 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
965 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
966 (e.g., if the original value was char, the shift amount is at most 8
967 if we want to use short). */
968 if (compare_tree_int (const_oprnd
, TYPE_PRECISION (half_type
)) == 1)
971 interm_type
= build_nonstandard_integer_type (
972 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
974 if (!vect_supportable_shift (code
, interm_type
))
980 if (vect_supportable_shift (code
, half_type
))
983 /* Try intermediate type - HALF_TYPE is not supported. */
984 if (TYPE_PRECISION (type
) < (TYPE_PRECISION (half_type
) * 4))
987 interm_type
= build_nonstandard_integer_type (
988 TYPE_PRECISION (half_type
) * 2, TYPE_UNSIGNED (type
));
990 if (!vect_supportable_shift (code
, interm_type
))
999 /* There are four possible cases:
1000 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1001 the first statement in the sequence)
1002 a. The original, HALF_TYPE, is not enough - we replace the promotion
1003 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1004 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1006 2. OPRND is defined by a pattern statement we created.
1007 a. Its type is not sufficient for the operation, we create a new stmt:
1008 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1009 this statement in NEW_DEF_STMT, and it is later put in
1010 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1011 b. OPRND is good to use in the new statement. */
1016 /* Replace the original type conversion HALF_TYPE->TYPE with
1017 HALF_TYPE->INTERM_TYPE. */
1018 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)))
1020 new_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
));
1021 /* Check if the already created pattern stmt is what we need. */
1022 if (!is_gimple_assign (new_stmt
)
1023 || gimple_assign_rhs_code (new_stmt
) != NOP_EXPR
1024 || TREE_TYPE (gimple_assign_lhs (new_stmt
)) != interm_type
)
1027 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
1028 oprnd
= gimple_assign_lhs (new_stmt
);
1032 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1033 oprnd
= gimple_assign_rhs1 (def_stmt
);
1034 tmp
= create_tmp_reg (interm_type
, NULL
);
1035 add_referenced_var (tmp
);
1036 new_oprnd
= make_ssa_name (tmp
, NULL
);
1037 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1039 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt
)) = new_stmt
;
1040 VEC_safe_push (gimple
, heap
, *stmts
, def_stmt
);
1046 /* Retrieve the operand before the type promotion. */
1047 oprnd
= gimple_assign_rhs1 (def_stmt
);
1054 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1055 tmp
= create_tmp_reg (interm_type
, NULL
);
1056 add_referenced_var (tmp
);
1057 new_oprnd
= make_ssa_name (tmp
, NULL
);
1058 new_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1061 *new_def_stmt
= new_stmt
;
1064 /* Otherwise, OPRND is already set. */
1068 *new_type
= interm_type
;
1070 *new_type
= half_type
;
1073 *op1
= fold_convert (*new_type
, const_oprnd
);
1079 /* Try to find a statement or a sequence of statements that can be performed
1083 TYPE x_T, res0_T, res1_T;
1086 S2 x_T = (TYPE) x_t;
1087 S3 res0_T = op (x_T, C0);
1088 S4 res1_T = op (res0_T, C1);
1089 S5 ... = () res1_T; - type demotion
1091 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1093 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1094 be 'type' or some intermediate type. For now, we expect S5 to be a type
1095 demotion operation. We also check that S3 and S4 have only one use. */
1098 vect_recog_over_widening_pattern (VEC (gimple
, heap
) **stmts
,
1099 tree
*type_in
, tree
*type_out
)
1101 gimple stmt
= VEC_pop (gimple
, *stmts
);
1102 gimple pattern_stmt
= NULL
, new_def_stmt
, prev_stmt
= NULL
, use_stmt
= NULL
;
1103 tree op0
, op1
, vectype
= NULL_TREE
, lhs
, use_lhs
, use_type
;
1104 imm_use_iterator imm_iter
;
1105 use_operand_p use_p
;
1107 tree var
= NULL_TREE
, new_type
= NULL_TREE
, tmp
, new_oprnd
;
1109 struct loop
*loop
= (gimple_bb (stmt
))->loop_father
;
1115 if (!vinfo_for_stmt (stmt
)
1116 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt
)))
1119 new_def_stmt
= NULL
;
1120 if (!vect_operation_fits_smaller_type (stmt
, var
, &new_type
,
1121 &op0
, &op1
, &new_def_stmt
,
1130 /* STMT can be performed on a smaller type. Check its uses. */
1131 lhs
= gimple_assign_lhs (stmt
);
1133 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
1135 if (is_gimple_debug (USE_STMT (use_p
)))
1137 use_stmt
= USE_STMT (use_p
);
1141 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
1142 || !gimple_bb (use_stmt
)
1143 || !flow_bb_inside_loop_p (loop
, gimple_bb (use_stmt
)))
1146 /* Create pattern statement for STMT. */
1147 vectype
= get_vectype_for_scalar_type (new_type
);
1151 /* We want to collect all the statements for which we create pattern
1152 statetments, except for the case when the last statement in the
1153 sequence doesn't have a corresponding pattern statement. In such
1154 case we associate the last pattern statement with the last statement
1155 in the sequence. Therefore, we only add the original statement to
1156 the list if we know that it is not the last. */
1158 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1160 var
= vect_recog_temp_ssa_var (new_type
, NULL
);
1162 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt
), var
,
1164 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
1165 new_pattern_def_seq (vinfo_for_stmt (stmt
), new_def_stmt
);
1167 if (vect_print_dump_info (REPORT_DETAILS
))
1169 fprintf (vect_dump
, "created pattern stmt: ");
1170 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1173 type
= gimple_expr_type (stmt
);
1180 /* We got a sequence. We expect it to end with a type demotion operation.
1181 Otherwise, we quit (for now). There are three possible cases: the
1182 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1183 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1184 NEW_TYPE differs (we create a new conversion statement). */
1185 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1187 use_lhs
= gimple_assign_lhs (use_stmt
);
1188 use_type
= TREE_TYPE (use_lhs
);
1189 /* Support only type promotion or signedess change. Check that USE_TYPE
1190 is not bigger than the original type. */
1191 if (!INTEGRAL_TYPE_P (use_type
)
1192 || TYPE_PRECISION (new_type
) > TYPE_PRECISION (use_type
)
1193 || TYPE_PRECISION (type
) < TYPE_PRECISION (use_type
))
1196 if (TYPE_UNSIGNED (new_type
) != TYPE_UNSIGNED (use_type
)
1197 || TYPE_PRECISION (new_type
) != TYPE_PRECISION (use_type
))
1199 /* Create NEW_TYPE->USE_TYPE conversion. */
1200 tmp
= create_tmp_reg (use_type
, NULL
);
1201 add_referenced_var (tmp
);
1202 new_oprnd
= make_ssa_name (tmp
, NULL
);
1203 pattern_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, new_oprnd
,
1205 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt
)) = pattern_stmt
;
1207 *type_in
= get_vectype_for_scalar_type (new_type
);
1208 *type_out
= get_vectype_for_scalar_type (use_type
);
1210 /* We created a pattern statement for the last statement in the
1211 sequence, so we don't need to associate it with the pattern
1212 statement created for PREV_STMT. Therefore, we add PREV_STMT
1213 to the list in order to mark it later in vect_pattern_recog_1. */
1215 VEC_safe_push (gimple
, heap
, *stmts
, prev_stmt
);
1220 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt
))
1221 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt
));
1224 *type_out
= NULL_TREE
;
1227 VEC_safe_push (gimple
, heap
, *stmts
, use_stmt
);
1230 /* TODO: support general case, create a conversion to the correct type. */
1233 /* Pattern detected. */
1234 if (vect_print_dump_info (REPORT_DETAILS
))
1236 fprintf (vect_dump
, "vect_recog_over_widening_pattern: detected: ");
1237 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1240 return pattern_stmt
;
1243 /* Detect widening shift pattern:
1249 S2 a_T = (TYPE) a_t;
1250 S3 res_T = a_T << CONST;
1252 where type 'TYPE' is at least double the size of type 'type'.
1254 Also detect unsigned cases:
1257 unsigned TYPE u_res_T;
1261 S2 a_T = (TYPE) a_t;
1262 S3 res_T = a_T << CONST;
1263 S4 u_res_T = (unsigned TYPE) res_T;
1265 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1266 create an additional pattern stmt for S2 to create a variable of an
1267 intermediate type, and perform widen-shift on the intermediate type:
1271 TYPE a_T, res_T, res_T';
1274 S2 a_T = (TYPE) a_t;
1275 '--> a_it = (interm_type) a_t;
1276 S3 res_T = a_T << CONST;
1277 '--> res_T' = a_it <<* CONST;
1281 * STMTS: Contains a stmt from which the pattern search begins.
1282 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1283 in STMTS. When an intermediate type is used and a pattern statement is
1284 created for S2, we also put S2 here (before S3).
1288 * TYPE_IN: The type of the input arguments to the pattern.
1290 * TYPE_OUT: The type of the output of this pattern.
1292 * Return value: A new stmt that will be used to replace the sequence of
1293 stmts that constitute the pattern. In this case it will be:
1294 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1297 vect_recog_widen_shift_pattern (VEC (gimple
, heap
) **stmts
,
1298 tree
*type_in
, tree
*type_out
)
1300 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1302 tree oprnd0
, oprnd1
;
1303 tree type
, half_type0
;
1304 gimple pattern_stmt
, orig_stmt
= NULL
;
1305 tree vectype
, vectype_out
= NULL_TREE
;
1308 enum tree_code dummy_code
;
1310 VEC (tree
, heap
) * dummy_vec
;
1311 gimple use_stmt
= NULL
;
1312 bool over_widen
= false;
1314 if (!is_gimple_assign (last_stmt
) || !vinfo_for_stmt (last_stmt
))
1317 orig_stmt
= last_stmt
;
1318 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt
)))
1320 /* This statement was also detected as over-widening operation (it can't
1321 be any other pattern, because only over-widening detects shifts).
1322 LAST_STMT is the final type demotion statement, but its related
1323 statement is shift. We analyze the related statement to catch cases:
1330 S1 a_T = (TYPE) a_t;
1331 S2 res_T = a_T << CONST;
1332 S3 res = (itype)res_T;
1334 (size of type * 2 <= size of itype
1335 and size of itype * 2 <= size of TYPE)
1337 code after over-widening pattern detection:
1339 S1 a_T = (TYPE) a_t;
1340 --> a_it = (itype) a_t;
1341 S2 res_T = a_T << CONST;
1342 S3 res = (itype)res_T; <--- LAST_STMT
1343 --> res = a_it << CONST;
1347 S1 a_T = (TYPE) a_t;
1348 --> a_it = (itype) a_t; - redundant
1349 S2 res_T = a_T << CONST;
1350 S3 res = (itype)res_T;
1351 --> res = a_t w<< CONST;
1353 i.e., we replace the three statements with res = a_t w<< CONST. */
1354 last_stmt
= STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt
));
1358 if (gimple_assign_rhs_code (last_stmt
) != LSHIFT_EXPR
)
1361 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1362 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1363 if (TREE_CODE (oprnd0
) != SSA_NAME
|| TREE_CODE (oprnd1
) != INTEGER_CST
)
1366 /* Check operand 0: it has to be defined by a type promotion. */
1367 if (!widened_name_p (oprnd0
, last_stmt
, &half_type0
, &def_stmt0
, false))
1370 /* Check operand 1: has to be positive. We check that it fits the type
1371 in vect_handle_widen_op_by_const (). */
1372 if (tree_int_cst_compare (oprnd1
, size_zero_node
) <= 0)
1375 oprnd0
= gimple_assign_rhs1 (def_stmt0
);
1376 type
= gimple_expr_type (last_stmt
);
1378 /* Check if this a widening operation. */
1379 if (!vect_handle_widen_op_by_const (last_stmt
, LSHIFT_EXPR
, oprnd1
,
1381 type
, &half_type0
, def_stmt0
))
1384 /* Handle unsigned case. Look for
1385 S4 u_res_T = (unsigned TYPE) res_T;
1386 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1387 if (TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (half_type0
))
1389 tree lhs
= gimple_assign_lhs (last_stmt
), use_lhs
;
1390 imm_use_iterator imm_iter
;
1391 use_operand_p use_p
;
1397 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1398 We check here that TYPE is the correct type for the operation,
1399 i.e., it's the type of the original result. */
1400 tree orig_type
= gimple_expr_type (orig_stmt
);
1401 if ((TYPE_UNSIGNED (type
) != TYPE_UNSIGNED (orig_type
))
1402 || (TYPE_PRECISION (type
) != TYPE_PRECISION (orig_type
)))
1407 FOR_EACH_IMM_USE_FAST (use_p
, imm_iter
, lhs
)
1409 if (is_gimple_debug (USE_STMT (use_p
)))
1411 use_stmt
= USE_STMT (use_p
);
1415 if (nuses
!= 1 || !is_gimple_assign (use_stmt
)
1416 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt
)))
1419 use_lhs
= gimple_assign_lhs (use_stmt
);
1420 use_type
= TREE_TYPE (use_lhs
);
1422 if (!INTEGRAL_TYPE_P (use_type
)
1423 || (TYPE_UNSIGNED (type
) == TYPE_UNSIGNED (use_type
))
1424 || (TYPE_PRECISION (type
) != TYPE_PRECISION (use_type
)))
1431 /* Pattern detected. */
1432 if (vect_print_dump_info (REPORT_DETAILS
))
1433 fprintf (vect_dump
, "vect_recog_widen_shift_pattern: detected: ");
1435 /* Check target support. */
1436 vectype
= get_vectype_for_scalar_type (half_type0
);
1437 vectype_out
= get_vectype_for_scalar_type (type
);
1441 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR
, last_stmt
,
1442 vectype_out
, vectype
,
1443 &dummy
, &dummy
, &dummy_code
,
1444 &dummy_code
, &dummy_int
,
1449 *type_out
= vectype_out
;
1451 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1452 var
= vect_recog_temp_ssa_var (type
, NULL
);
1454 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR
, var
, oprnd0
, oprnd1
);
1456 if (vect_print_dump_info (REPORT_DETAILS
))
1457 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1460 last_stmt
= use_stmt
;
1462 last_stmt
= orig_stmt
;
1464 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1465 return pattern_stmt
;
1468 /* Detect a vector by vector shift pattern that wouldn't be otherwise
1476 S3 res_T = b_T op a_t;
1478 where type 'TYPE' is a type with different size than 'type',
1479 and op is <<, >> or rotate.
1484 TYPE b_T, c_T, res_T;
1487 S1 a_t = (type) c_T;
1489 S3 res_T = b_T op a_t;
1493 * STMTS: Contains a stmt from which the pattern search begins,
1494 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1495 with a shift/rotate which has same type on both operands, in the
1496 second case just b_T op c_T, in the first case with added cast
1497 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
1501 * TYPE_IN: The type of the input arguments to the pattern.
1503 * TYPE_OUT: The type of the output of this pattern.
1505 * Return value: A new stmt that will be used to replace the shift/rotate
1509 vect_recog_vector_vector_shift_pattern (VEC (gimple
, heap
) **stmts
,
1510 tree
*type_in
, tree
*type_out
)
1512 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1513 tree oprnd0
, oprnd1
, lhs
, var
;
1514 gimple pattern_stmt
, def_stmt
;
1515 enum tree_code rhs_code
;
1516 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1517 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1518 enum vect_def_type dt
;
1521 if (!is_gimple_assign (last_stmt
))
1524 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1536 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1539 lhs
= gimple_assign_lhs (last_stmt
);
1540 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1541 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1542 if (TREE_CODE (oprnd0
) != SSA_NAME
1543 || TREE_CODE (oprnd1
) != SSA_NAME
1544 || TYPE_MODE (TREE_TYPE (oprnd0
)) == TYPE_MODE (TREE_TYPE (oprnd1
))
1545 || TYPE_PRECISION (TREE_TYPE (oprnd1
))
1546 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1
)))
1547 || TYPE_PRECISION (TREE_TYPE (lhs
))
1548 != TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1551 if (!vect_is_simple_use (oprnd1
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
1554 if (dt
!= vect_internal_def
)
1557 *type_in
= get_vectype_for_scalar_type (TREE_TYPE (oprnd0
));
1558 *type_out
= *type_in
;
1559 if (*type_in
== NULL_TREE
)
1563 if (gimple_assign_cast_p (def_stmt
))
1565 tree rhs1
= gimple_assign_rhs1 (def_stmt
);
1566 if (TYPE_MODE (TREE_TYPE (rhs1
)) == TYPE_MODE (TREE_TYPE (oprnd0
))
1567 && TYPE_PRECISION (TREE_TYPE (rhs1
))
1568 == TYPE_PRECISION (TREE_TYPE (oprnd0
)))
1572 if (def
== NULL_TREE
)
1574 def
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1575 def_stmt
= gimple_build_assign_with_ops (NOP_EXPR
, def
, oprnd1
,
1577 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1580 /* Pattern detected. */
1581 if (vect_print_dump_info (REPORT_DETAILS
))
1582 fprintf (vect_dump
, "vect_recog_vector_vector_shift_pattern: detected: ");
1584 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1585 var
= vect_recog_temp_ssa_var (TREE_TYPE (oprnd0
), NULL
);
1586 pattern_stmt
= gimple_build_assign_with_ops (rhs_code
, var
, oprnd0
, def
);
1588 if (vect_print_dump_info (REPORT_DETAILS
))
1589 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1591 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1592 return pattern_stmt
;
1595 /* Detect a signed division by power of two constant that wouldn't be
1596 otherwise vectorized:
1602 where type 'type' is a signed integral type and N is a constant positive
1605 Similarly handle signed modulo by power of two constant:
1611 * STMTS: Contains a stmt from which the pattern search begins,
1612 i.e. the division stmt. S1 is replaced by:
1613 S3 y_t = b_t < 0 ? N - 1 : 0;
1615 S1' a_t = x_t >> log2 (N);
1617 S4 is replaced by (where *_T temporaries have unsigned type):
1618 S9 y_T = b_t < 0 ? -1U : 0U;
1619 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1620 S7 z_t = (type) z_T;
1622 S5 x_t = w_t & (N - 1);
1623 S4' a_t = x_t - z_t;
1627 * TYPE_IN: The type of the input arguments to the pattern.
1629 * TYPE_OUT: The type of the output of this pattern.
1631 * Return value: A new stmt that will be used to replace the division
1632 S1 or modulo S4 stmt. */
1635 vect_recog_sdivmod_pow2_pattern (VEC (gimple
, heap
) **stmts
,
1636 tree
*type_in
, tree
*type_out
)
1638 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
1639 tree oprnd0
, oprnd1
, vectype
, itype
, cond
;
1640 gimple pattern_stmt
, def_stmt
;
1641 enum tree_code rhs_code
;
1642 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
1643 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1646 if (!is_gimple_assign (last_stmt
))
1649 rhs_code
= gimple_assign_rhs_code (last_stmt
);
1652 case TRUNC_DIV_EXPR
:
1653 case TRUNC_MOD_EXPR
:
1659 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo
))
1662 oprnd0
= gimple_assign_rhs1 (last_stmt
);
1663 oprnd1
= gimple_assign_rhs2 (last_stmt
);
1664 itype
= TREE_TYPE (oprnd0
);
1665 if (TREE_CODE (oprnd0
) != SSA_NAME
1666 || TREE_CODE (oprnd1
) != INTEGER_CST
1667 || TREE_CODE (itype
) != INTEGER_TYPE
1668 || TYPE_UNSIGNED (itype
)
1669 || TYPE_PRECISION (itype
) != GET_MODE_PRECISION (TYPE_MODE (itype
))
1670 || !integer_pow2p (oprnd1
)
1671 || tree_int_cst_sgn (oprnd1
) != 1)
1674 vectype
= get_vectype_for_scalar_type (itype
);
1675 if (vectype
== NULL_TREE
)
1678 /* If the target can handle vectorized division or modulo natively,
1679 don't attempt to optimize this. */
1680 optab
= optab_for_tree_code (rhs_code
, vectype
, optab_default
);
1683 enum machine_mode vec_mode
= TYPE_MODE (vectype
);
1684 int icode
= (int) optab_handler (optab
, vec_mode
);
1685 if (icode
!= CODE_FOR_nothing
1686 || GET_MODE_SIZE (vec_mode
) == UNITS_PER_WORD
)
1690 /* Pattern detected. */
1691 if (vect_print_dump_info (REPORT_DETAILS
))
1692 fprintf (vect_dump
, "vect_recog_sdivmod_pow2_pattern: detected: ");
1694 cond
= build2 (LT_EXPR
, boolean_type_node
, oprnd0
, build_int_cst (itype
, 0));
1695 if (rhs_code
== TRUNC_DIV_EXPR
)
1697 tree var
= vect_recog_temp_ssa_var (itype
, NULL
);
1699 = gimple_build_assign_with_ops3 (COND_EXPR
, var
, cond
,
1700 fold_build2 (MINUS_EXPR
, itype
,
1702 build_int_cst (itype
,
1704 build_int_cst (itype
, 0));
1705 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1706 var
= vect_recog_temp_ssa_var (itype
, NULL
);
1708 = gimple_build_assign_with_ops (PLUS_EXPR
, var
, oprnd0
,
1709 gimple_assign_lhs (def_stmt
));
1710 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1713 = gimple_build_assign_with_ops (RSHIFT_EXPR
,
1714 vect_recog_temp_ssa_var (itype
, NULL
),
1716 build_int_cst (itype
,
1717 tree_log2 (oprnd1
)));
1722 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
) = NULL
;
1723 if (compare_tree_int (oprnd1
, 2) == 0)
1725 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
1727 = gimple_build_assign_with_ops3 (COND_EXPR
, signmask
, cond
,
1728 build_int_cst (itype
, 1),
1729 build_int_cst (itype
, 0));
1730 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1735 = build_nonstandard_integer_type (TYPE_PRECISION (itype
), 1);
1736 tree vecutype
= get_vectype_for_scalar_type (utype
);
1738 = build_int_cst (utype
, GET_MODE_BITSIZE (TYPE_MODE (itype
))
1739 - tree_log2 (oprnd1
));
1740 tree var
= vect_recog_temp_ssa_var (utype
, NULL
);
1741 stmt_vec_info def_stmt_vinfo
;
1744 = gimple_build_assign_with_ops3 (COND_EXPR
, var
, cond
,
1745 build_int_cst (utype
, -1),
1746 build_int_cst (utype
, 0));
1747 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, NULL
);
1748 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1749 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
1750 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1751 var
= vect_recog_temp_ssa_var (utype
, NULL
);
1753 = gimple_build_assign_with_ops (RSHIFT_EXPR
, var
,
1754 gimple_assign_lhs (def_stmt
),
1756 def_stmt_vinfo
= new_stmt_vec_info (def_stmt
, loop_vinfo
, NULL
);
1757 set_vinfo_for_stmt (def_stmt
, def_stmt_vinfo
);
1758 STMT_VINFO_VECTYPE (def_stmt_vinfo
) = vecutype
;
1759 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1760 signmask
= vect_recog_temp_ssa_var (itype
, NULL
);
1762 = gimple_build_assign_with_ops (NOP_EXPR
, signmask
, var
,
1764 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1767 = gimple_build_assign_with_ops (PLUS_EXPR
,
1768 vect_recog_temp_ssa_var (itype
, NULL
),
1770 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1772 = gimple_build_assign_with_ops (BIT_AND_EXPR
,
1773 vect_recog_temp_ssa_var (itype
, NULL
),
1774 gimple_assign_lhs (def_stmt
),
1775 fold_build2 (MINUS_EXPR
, itype
,
1777 build_int_cst (itype
,
1779 append_pattern_def_seq (stmt_vinfo
, def_stmt
);
1782 = gimple_build_assign_with_ops (MINUS_EXPR
,
1783 vect_recog_temp_ssa_var (itype
, NULL
),
1784 gimple_assign_lhs (def_stmt
),
1788 if (vect_print_dump_info (REPORT_DETAILS
))
1789 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
1791 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
1794 *type_out
= vectype
;
1795 return pattern_stmt
;
1798 /* Function vect_recog_mixed_size_cond_pattern
1800 Try to find the following pattern:
1805 S1 a_T = x_t CMP y_t ? b_T : c_T;
1807 where type 'TYPE' is an integral type which has different size
1808 from 'type'. b_T and c_T are constants and if 'TYPE' is wider
1809 than 'type', the constants need to fit into an integer type
1810 with the same width as 'type'.
1814 * LAST_STMT: A stmt from which the pattern search begins.
1818 * TYPE_IN: The type of the input arguments to the pattern.
1820 * TYPE_OUT: The type of the output of this pattern.
1822 * Return value: A new stmt that will be used to replace the pattern.
1823 Additionally a def_stmt is added.
1825 a_it = x_t CMP y_t ? b_it : c_it;
1826 a_T = (TYPE) a_it; */
1829 vect_recog_mixed_size_cond_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
1832 gimple last_stmt
= VEC_index (gimple
, *stmts
, 0);
1833 tree cond_expr
, then_clause
, else_clause
;
1834 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
), def_stmt_info
;
1835 tree type
, vectype
, comp_vectype
, itype
, vecitype
;
1836 enum machine_mode cmpmode
;
1837 gimple pattern_stmt
, def_stmt
;
1838 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
1840 if (!is_gimple_assign (last_stmt
)
1841 || gimple_assign_rhs_code (last_stmt
) != COND_EXPR
1842 || STMT_VINFO_DEF_TYPE (stmt_vinfo
) != vect_internal_def
)
1845 cond_expr
= gimple_assign_rhs1 (last_stmt
);
1846 then_clause
= gimple_assign_rhs2 (last_stmt
);
1847 else_clause
= gimple_assign_rhs3 (last_stmt
);
1849 if (TREE_CODE (then_clause
) != INTEGER_CST
1850 || TREE_CODE (else_clause
) != INTEGER_CST
)
1853 if (!COMPARISON_CLASS_P (cond_expr
))
1857 = get_vectype_for_scalar_type (TREE_TYPE (TREE_OPERAND (cond_expr
, 0)));
1858 if (comp_vectype
== NULL_TREE
)
1861 type
= gimple_expr_type (last_stmt
);
1862 cmpmode
= GET_MODE_INNER (TYPE_MODE (comp_vectype
));
1864 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) == GET_MODE_BITSIZE (cmpmode
))
1867 vectype
= get_vectype_for_scalar_type (type
);
1868 if (vectype
== NULL_TREE
)
1871 if (expand_vec_cond_expr_p (vectype
, comp_vectype
))
1874 itype
= build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode
),
1875 TYPE_UNSIGNED (type
));
1876 if (itype
== NULL_TREE
1877 || GET_MODE_BITSIZE (TYPE_MODE (itype
)) != GET_MODE_BITSIZE (cmpmode
))
1880 vecitype
= get_vectype_for_scalar_type (itype
);
1881 if (vecitype
== NULL_TREE
)
1884 if (!expand_vec_cond_expr_p (vecitype
, comp_vectype
))
1887 if (GET_MODE_BITSIZE (TYPE_MODE (type
)) > GET_MODE_BITSIZE (cmpmode
))
1889 if (!int_fits_type_p (then_clause
, itype
)
1890 || !int_fits_type_p (else_clause
, itype
))
1895 = gimple_build_assign_with_ops3 (COND_EXPR
,
1896 vect_recog_temp_ssa_var (itype
, NULL
),
1897 unshare_expr (cond_expr
),
1898 fold_convert (itype
, then_clause
),
1899 fold_convert (itype
, else_clause
));
1901 = gimple_build_assign_with_ops (NOP_EXPR
,
1902 vect_recog_temp_ssa_var (type
, NULL
),
1903 gimple_assign_lhs (def_stmt
), NULL_TREE
);
1905 new_pattern_def_seq (stmt_vinfo
, def_stmt
);
1906 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, NULL
);
1907 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
1908 STMT_VINFO_VECTYPE (def_stmt_info
) = vecitype
;
1909 *type_in
= vecitype
;
1910 *type_out
= vectype
;
1912 return pattern_stmt
;
1916 /* Helper function of vect_recog_bool_pattern. Called recursively, return
1917 true if bool VAR can be optimized that way. */
1920 check_bool_pattern (tree var
, loop_vec_info loop_vinfo
)
1923 enum vect_def_type dt
;
1925 enum tree_code rhs_code
;
1927 if (!vect_is_simple_use (var
, loop_vinfo
, NULL
, &def_stmt
, &def
, &dt
))
1930 if (dt
!= vect_internal_def
)
1933 if (!is_gimple_assign (def_stmt
))
1936 if (!has_single_use (def
))
1939 rhs1
= gimple_assign_rhs1 (def_stmt
);
1940 rhs_code
= gimple_assign_rhs_code (def_stmt
);
1944 return check_bool_pattern (rhs1
, loop_vinfo
);
1947 if ((TYPE_PRECISION (TREE_TYPE (rhs1
)) != 1
1948 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
1949 && TREE_CODE (TREE_TYPE (rhs1
)) != BOOLEAN_TYPE
)
1951 return check_bool_pattern (rhs1
, loop_vinfo
);
1954 return check_bool_pattern (rhs1
, loop_vinfo
);
1959 if (!check_bool_pattern (rhs1
, loop_vinfo
))
1961 return check_bool_pattern (gimple_assign_rhs2 (def_stmt
), loop_vinfo
);
1964 if (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
)
1966 tree vecitype
, comp_vectype
;
1968 comp_vectype
= get_vectype_for_scalar_type (TREE_TYPE (rhs1
));
1969 if (comp_vectype
== NULL_TREE
)
1972 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
)
1974 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
1976 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
1977 vecitype
= get_vectype_for_scalar_type (itype
);
1978 if (vecitype
== NULL_TREE
)
1982 vecitype
= comp_vectype
;
1983 return expand_vec_cond_expr_p (vecitype
, comp_vectype
);
1990 /* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
1991 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
1992 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
1995 adjust_bool_pattern_cast (tree type
, tree var
)
1997 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (SSA_NAME_DEF_STMT (var
));
1998 gimple cast_stmt
, pattern_stmt
;
2000 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo
));
2001 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_vinfo
);
2002 new_pattern_def_seq (stmt_vinfo
, pattern_stmt
);
2004 = gimple_build_assign_with_ops (NOP_EXPR
,
2005 vect_recog_temp_ssa_var (type
, NULL
),
2006 gimple_assign_lhs (pattern_stmt
),
2008 STMT_VINFO_RELATED_STMT (stmt_vinfo
) = cast_stmt
;
2009 return gimple_assign_lhs (cast_stmt
);
2013 /* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2014 recursively. VAR is an SSA_NAME that should be transformed from bool
2015 to a wider integer type, OUT_TYPE is the desired final integer type of
2016 the whole pattern, TRUEVAL should be NULL unless optimizing
2017 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2018 in the then_clause, STMTS is where statements with added pattern stmts
2019 should be pushed to. */
2022 adjust_bool_pattern (tree var
, tree out_type
, tree trueval
,
2023 VEC (gimple
, heap
) **stmts
)
2025 gimple stmt
= SSA_NAME_DEF_STMT (var
);
2026 enum tree_code rhs_code
, def_rhs_code
;
2027 tree itype
, cond_expr
, rhs1
, rhs2
, irhs1
, irhs2
;
2029 gimple pattern_stmt
, def_stmt
;
2031 rhs1
= gimple_assign_rhs1 (stmt
);
2032 rhs2
= gimple_assign_rhs2 (stmt
);
2033 rhs_code
= gimple_assign_rhs_code (stmt
);
2034 loc
= gimple_location (stmt
);
2039 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2040 itype
= TREE_TYPE (irhs1
);
2042 = gimple_build_assign_with_ops (SSA_NAME
,
2043 vect_recog_temp_ssa_var (itype
, NULL
),
2048 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2049 itype
= TREE_TYPE (irhs1
);
2051 = gimple_build_assign_with_ops (BIT_XOR_EXPR
,
2052 vect_recog_temp_ssa_var (itype
, NULL
),
2053 irhs1
, build_int_cst (itype
, 1));
2057 /* Try to optimize x = y & (a < b ? 1 : 0); into
2058 x = (a < b ? y : 0);
2064 S1 a_b = x1 CMP1 y1;
2065 S2 b_b = x2 CMP2 y2;
2067 S4 d_T = (TYPE) c_b;
2069 we would normally emit:
2071 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2072 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2073 S3' c_T = a_T & b_T;
2076 but we can save one stmt by using the
2077 result of one of the COND_EXPRs in the other COND_EXPR and leave
2078 BIT_AND_EXPR stmt out:
2080 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2081 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2084 At least when VEC_COND_EXPR is implemented using masks
2085 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2086 computes the comparison masks and ands it, in one case with
2087 all ones vector, in the other case with a vector register.
2088 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2089 often more expensive. */
2090 def_stmt
= SSA_NAME_DEF_STMT (rhs2
);
2091 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2092 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2094 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2095 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2096 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2097 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2100 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2101 irhs2
= adjust_bool_pattern (rhs2
, out_type
, irhs1
, stmts
);
2102 tstmt
= VEC_pop (gimple
, *stmts
);
2103 gcc_assert (tstmt
== def_stmt
);
2104 VEC_quick_push (gimple
, *stmts
, stmt
);
2105 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2106 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2107 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2108 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2112 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2115 def_stmt
= SSA_NAME_DEF_STMT (rhs1
);
2116 def_rhs_code
= gimple_assign_rhs_code (def_stmt
);
2117 if (TREE_CODE_CLASS (def_rhs_code
) == tcc_comparison
)
2119 tree def_rhs1
= gimple_assign_rhs1 (def_stmt
);
2120 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2121 if (TYPE_PRECISION (TREE_TYPE (irhs2
))
2122 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1
))))
2125 stmt_vec_info stmt_def_vinfo
= vinfo_for_stmt (def_stmt
);
2126 irhs1
= adjust_bool_pattern (rhs1
, out_type
, irhs2
, stmts
);
2127 tstmt
= VEC_pop (gimple
, *stmts
);
2128 gcc_assert (tstmt
== def_stmt
);
2129 VEC_quick_push (gimple
, *stmts
, stmt
);
2130 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
))
2131 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo
);
2132 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo
));
2133 STMT_VINFO_RELATED_STMT (stmt_def_vinfo
) = NULL
;
2137 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2143 irhs1
= adjust_bool_pattern (rhs1
, out_type
, NULL_TREE
, stmts
);
2144 irhs2
= adjust_bool_pattern (rhs2
, out_type
, NULL_TREE
, stmts
);
2146 if (TYPE_PRECISION (TREE_TYPE (irhs1
))
2147 != TYPE_PRECISION (TREE_TYPE (irhs2
)))
2149 int prec1
= TYPE_PRECISION (TREE_TYPE (irhs1
));
2150 int prec2
= TYPE_PRECISION (TREE_TYPE (irhs2
));
2151 int out_prec
= TYPE_PRECISION (out_type
);
2152 if (absu_hwi (out_prec
- prec1
) < absu_hwi (out_prec
- prec2
))
2153 irhs2
= adjust_bool_pattern_cast (TREE_TYPE (irhs1
), rhs2
);
2154 else if (absu_hwi (out_prec
- prec1
) > absu_hwi (out_prec
- prec2
))
2155 irhs1
= adjust_bool_pattern_cast (TREE_TYPE (irhs2
), rhs1
);
2158 irhs1
= adjust_bool_pattern_cast (out_type
, rhs1
);
2159 irhs2
= adjust_bool_pattern_cast (out_type
, rhs2
);
2162 itype
= TREE_TYPE (irhs1
);
2164 = gimple_build_assign_with_ops (rhs_code
,
2165 vect_recog_temp_ssa_var (itype
, NULL
),
2170 gcc_assert (TREE_CODE_CLASS (rhs_code
) == tcc_comparison
);
2171 if (TREE_CODE (TREE_TYPE (rhs1
)) != INTEGER_TYPE
2172 || !TYPE_UNSIGNED (TREE_TYPE (rhs1
)))
2174 enum machine_mode mode
= TYPE_MODE (TREE_TYPE (rhs1
));
2176 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode
), 1);
2179 itype
= TREE_TYPE (rhs1
);
2180 cond_expr
= build2_loc (loc
, rhs_code
, itype
, rhs1
, rhs2
);
2181 if (trueval
== NULL_TREE
)
2182 trueval
= build_int_cst (itype
, 1);
2184 gcc_checking_assert (useless_type_conversion_p (itype
,
2185 TREE_TYPE (trueval
)));
2187 = gimple_build_assign_with_ops3 (COND_EXPR
,
2188 vect_recog_temp_ssa_var (itype
, NULL
),
2190 build_int_cst (itype
, 0));
2194 VEC_safe_push (gimple
, heap
, *stmts
, stmt
);
2195 gimple_set_location (pattern_stmt
, loc
);
2196 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt
)) = pattern_stmt
;
2197 return gimple_assign_lhs (pattern_stmt
);
2201 /* Function vect_recog_bool_pattern
2203 Try to find pattern like following:
2205 bool a_b, b_b, c_b, d_b, e_b;
2208 S1 a_b = x1 CMP1 y1;
2209 S2 b_b = x2 CMP2 y2;
2211 S4 d_b = x3 CMP3 y3;
2213 S6 f_T = (TYPE) e_b;
2215 where type 'TYPE' is an integral type.
2219 * LAST_STMT: A stmt at the end from which the pattern
2220 search begins, i.e. cast of a bool to
2225 * TYPE_IN: The type of the input arguments to the pattern.
2227 * TYPE_OUT: The type of the output of this pattern.
2229 * Return value: A new stmt that will be used to replace the pattern.
2231 Assuming size of TYPE is the same as size of all comparisons
2232 (otherwise some casts would be added where needed), the above
2233 sequence we create related pattern stmts:
2234 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2235 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2236 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2237 S5' e_T = c_T | d_T;
2240 Instead of the above S3' we could emit:
2241 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2242 S3' c_T = a_T | b_T;
2243 but the above is more efficient. */
2246 vect_recog_bool_pattern (VEC (gimple
, heap
) **stmts
, tree
*type_in
,
2249 gimple last_stmt
= VEC_pop (gimple
, *stmts
);
2250 enum tree_code rhs_code
;
2251 tree var
, lhs
, rhs
, vectype
;
2252 stmt_vec_info stmt_vinfo
= vinfo_for_stmt (last_stmt
);
2253 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_vinfo
);
2254 gimple pattern_stmt
;
2256 if (!is_gimple_assign (last_stmt
))
2259 var
= gimple_assign_rhs1 (last_stmt
);
2260 lhs
= gimple_assign_lhs (last_stmt
);
2262 if ((TYPE_PRECISION (TREE_TYPE (var
)) != 1
2263 || !TYPE_UNSIGNED (TREE_TYPE (var
)))
2264 && TREE_CODE (TREE_TYPE (var
)) != BOOLEAN_TYPE
)
2267 rhs_code
= gimple_assign_rhs_code (last_stmt
);
2268 if (CONVERT_EXPR_CODE_P (rhs_code
))
2270 if (TREE_CODE (TREE_TYPE (lhs
)) != INTEGER_TYPE
2271 || TYPE_PRECISION (TREE_TYPE (lhs
)) == 1)
2273 vectype
= get_vectype_for_scalar_type (TREE_TYPE (lhs
));
2274 if (vectype
== NULL_TREE
)
2277 if (!check_bool_pattern (var
, loop_vinfo
))
2280 rhs
= adjust_bool_pattern (var
, TREE_TYPE (lhs
), NULL_TREE
, stmts
);
2281 lhs
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2282 if (useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2284 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2287 = gimple_build_assign_with_ops (NOP_EXPR
, lhs
, rhs
, NULL_TREE
);
2288 *type_out
= vectype
;
2290 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
2291 return pattern_stmt
;
2293 else if (rhs_code
== SSA_NAME
2294 && STMT_VINFO_DATA_REF (stmt_vinfo
))
2296 stmt_vec_info pattern_stmt_info
;
2297 vectype
= STMT_VINFO_VECTYPE (stmt_vinfo
);
2298 gcc_assert (vectype
!= NULL_TREE
);
2299 if (!VECTOR_MODE_P (TYPE_MODE (vectype
)))
2301 if (!check_bool_pattern (var
, loop_vinfo
))
2304 rhs
= adjust_bool_pattern (var
, TREE_TYPE (vectype
), NULL_TREE
, stmts
);
2305 lhs
= build1 (VIEW_CONVERT_EXPR
, TREE_TYPE (vectype
), lhs
);
2306 if (!useless_type_conversion_p (TREE_TYPE (lhs
), TREE_TYPE (rhs
)))
2308 tree rhs2
= vect_recog_temp_ssa_var (TREE_TYPE (lhs
), NULL
);
2310 = gimple_build_assign_with_ops (NOP_EXPR
, rhs2
, rhs
, NULL_TREE
);
2311 new_pattern_def_seq (stmt_vinfo
, cast_stmt
);
2315 = gimple_build_assign_with_ops (SSA_NAME
, lhs
, rhs
, NULL_TREE
);
2316 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
, NULL
);
2317 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2318 STMT_VINFO_DATA_REF (pattern_stmt_info
)
2319 = STMT_VINFO_DATA_REF (stmt_vinfo
);
2320 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info
)
2321 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo
);
2322 STMT_VINFO_DR_INIT (pattern_stmt_info
) = STMT_VINFO_DR_INIT (stmt_vinfo
);
2323 STMT_VINFO_DR_OFFSET (pattern_stmt_info
)
2324 = STMT_VINFO_DR_OFFSET (stmt_vinfo
);
2325 STMT_VINFO_DR_STEP (pattern_stmt_info
) = STMT_VINFO_DR_STEP (stmt_vinfo
);
2326 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info
)
2327 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo
);
2328 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo
)) = pattern_stmt
;
2329 *type_out
= vectype
;
2331 VEC_safe_push (gimple
, heap
, *stmts
, last_stmt
);
2332 return pattern_stmt
;
2339 /* Mark statements that are involved in a pattern. */
2342 vect_mark_pattern_stmts (gimple orig_stmt
, gimple pattern_stmt
,
2343 tree pattern_vectype
)
2345 stmt_vec_info pattern_stmt_info
, def_stmt_info
;
2346 stmt_vec_info orig_stmt_info
= vinfo_for_stmt (orig_stmt
);
2347 loop_vec_info loop_vinfo
= STMT_VINFO_LOOP_VINFO (orig_stmt_info
);
2350 pattern_stmt_info
= vinfo_for_stmt (pattern_stmt
);
2351 if (pattern_stmt_info
== NULL
)
2353 pattern_stmt_info
= new_stmt_vec_info (pattern_stmt
, loop_vinfo
, NULL
);
2354 set_vinfo_for_stmt (pattern_stmt
, pattern_stmt_info
);
2356 gimple_set_bb (pattern_stmt
, gimple_bb (orig_stmt
));
2358 STMT_VINFO_RELATED_STMT (pattern_stmt_info
) = orig_stmt
;
2359 STMT_VINFO_DEF_TYPE (pattern_stmt_info
)
2360 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2361 STMT_VINFO_VECTYPE (pattern_stmt_info
) = pattern_vectype
;
2362 STMT_VINFO_IN_PATTERN_P (orig_stmt_info
) = true;
2363 STMT_VINFO_RELATED_STMT (orig_stmt_info
) = pattern_stmt
;
2364 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
)
2365 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info
);
2366 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
))
2368 gimple_stmt_iterator si
;
2369 for (si
= gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info
));
2370 !gsi_end_p (si
); gsi_next (&si
))
2372 def_stmt
= gsi_stmt (si
);
2373 def_stmt_info
= vinfo_for_stmt (def_stmt
);
2374 if (def_stmt_info
== NULL
)
2376 def_stmt_info
= new_stmt_vec_info (def_stmt
, loop_vinfo
, NULL
);
2377 set_vinfo_for_stmt (def_stmt
, def_stmt_info
);
2379 gimple_set_bb (def_stmt
, gimple_bb (orig_stmt
));
2380 STMT_VINFO_RELATED_STMT (def_stmt_info
) = orig_stmt
;
2381 STMT_VINFO_DEF_TYPE (def_stmt_info
)
2382 = STMT_VINFO_DEF_TYPE (orig_stmt_info
);
2383 if (STMT_VINFO_VECTYPE (def_stmt_info
) == NULL_TREE
)
2384 STMT_VINFO_VECTYPE (def_stmt_info
) = pattern_vectype
;
2389 /* Function vect_pattern_recog_1
2392 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2393 computation pattern.
2394 STMT: A stmt from which the pattern search should start.
2396 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
2397 expression that computes the same functionality and can be used to
2398 replace the sequence of stmts that are involved in the pattern.
2401 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2402 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2403 relevant vector type. If 'TYPE_IN' is already a vector type, then this
2404 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2405 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2406 to the available target pattern.
2408 This function also does some bookkeeping, as explained in the documentation
2409 for vect_recog_pattern. */
2412 vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func
,
2413 gimple_stmt_iterator si
,
2414 VEC (gimple
, heap
) **stmts_to_replace
)
2416 gimple stmt
= gsi_stmt (si
), pattern_stmt
;
2417 stmt_vec_info stmt_info
;
2418 loop_vec_info loop_vinfo
;
2419 tree pattern_vectype
;
2420 tree type_in
, type_out
;
2421 enum tree_code code
;
2425 VEC_truncate (gimple
, *stmts_to_replace
, 0);
2426 VEC_quick_push (gimple
, *stmts_to_replace
, stmt
);
2427 pattern_stmt
= (* vect_recog_func
) (stmts_to_replace
, &type_in
, &type_out
);
2431 stmt
= VEC_last (gimple
, *stmts_to_replace
);
2432 stmt_info
= vinfo_for_stmt (stmt
);
2433 loop_vinfo
= STMT_VINFO_LOOP_VINFO (stmt_info
);
2435 if (VECTOR_MODE_P (TYPE_MODE (type_in
)))
2437 /* No need to check target support (already checked by the pattern
2438 recognition function). */
2439 pattern_vectype
= type_out
? type_out
: type_in
;
2443 enum machine_mode vec_mode
;
2444 enum insn_code icode
;
2447 /* Check target support */
2448 type_in
= get_vectype_for_scalar_type (type_in
);
2452 type_out
= get_vectype_for_scalar_type (type_out
);
2457 pattern_vectype
= type_out
;
2459 if (is_gimple_assign (pattern_stmt
))
2460 code
= gimple_assign_rhs_code (pattern_stmt
);
2463 gcc_assert (is_gimple_call (pattern_stmt
));
2467 optab
= optab_for_tree_code (code
, type_in
, optab_default
);
2468 vec_mode
= TYPE_MODE (type_in
);
2470 || (icode
= optab_handler (optab
, vec_mode
)) == CODE_FOR_nothing
2471 || (insn_data
[icode
].operand
[0].mode
!= TYPE_MODE (type_out
)))
2475 /* Found a vectorizable pattern. */
2476 if (vect_print_dump_info (REPORT_DETAILS
))
2478 fprintf (vect_dump
, "pattern recognized: ");
2479 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
2482 /* Mark the stmts that are involved in the pattern. */
2483 vect_mark_pattern_stmts (stmt
, pattern_stmt
, pattern_vectype
);
2485 /* Patterns cannot be vectorized using SLP, because they change the order of
2487 FOR_EACH_VEC_ELT (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
, next
)
2489 VEC_ordered_remove (gimple
, LOOP_VINFO_REDUCTIONS (loop_vinfo
), i
);
2491 /* It is possible that additional pattern stmts are created and inserted in
2492 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2493 relevant statements. */
2494 for (i
= 0; VEC_iterate (gimple
, *stmts_to_replace
, i
, stmt
)
2495 && (unsigned) i
< (VEC_length (gimple
, *stmts_to_replace
) - 1);
2498 stmt_info
= vinfo_for_stmt (stmt
);
2499 pattern_stmt
= STMT_VINFO_RELATED_STMT (stmt_info
);
2500 if (vect_print_dump_info (REPORT_DETAILS
))
2502 fprintf (vect_dump
, "additional pattern stmt: ");
2503 print_gimple_stmt (vect_dump
, pattern_stmt
, 0, TDF_SLIM
);
2506 vect_mark_pattern_stmts (stmt
, pattern_stmt
, NULL_TREE
);
2511 /* Function vect_pattern_recog
2514 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2517 Output - for each computation idiom that is detected we create a new stmt
2518 that provides the same functionality and that can be vectorized. We
2519 also record some information in the struct_stmt_info of the relevant
2520 stmts, as explained below:
2522 At the entry to this function we have the following stmts, with the
2523 following initial value in the STMT_VINFO fields:
2525 stmt in_pattern_p related_stmt vec_stmt
2526 S1: a_i = .... - - -
2527 S2: a_2 = ..use(a_i).. - - -
2528 S3: a_1 = ..use(a_2).. - - -
2529 S4: a_0 = ..use(a_1).. - - -
2530 S5: ... = ..use(a_0).. - - -
2532 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
2533 represented by a single stmt. We then:
2534 - create a new stmt S6 equivalent to the pattern (the stmt is not
2535 inserted into the code)
2536 - fill in the STMT_VINFO fields as follows:
2538 in_pattern_p related_stmt vec_stmt
2539 S1: a_i = .... - - -
2540 S2: a_2 = ..use(a_i).. - - -
2541 S3: a_1 = ..use(a_2).. - - -
2542 S4: a_0 = ..use(a_1).. true S6 -
2543 '---> S6: a_new = .... - S4 -
2544 S5: ... = ..use(a_0).. - - -
2546 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
2547 to each other through the RELATED_STMT field).
2549 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2550 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2551 remain irrelevant unless used by stmts other than S4.
2553 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
2554 (because they are marked as irrelevant). It will vectorize S6, and record
2555 a pointer to the new vector stmt VS6 from S6 (as usual).
2556 S4 will be skipped, and S5 will be vectorized as usual:
2558 in_pattern_p related_stmt vec_stmt
2559 S1: a_i = .... - - -
2560 S2: a_2 = ..use(a_i).. - - -
2561 S3: a_1 = ..use(a_2).. - - -
2562 > VS6: va_new = .... - - -
2563 S4: a_0 = ..use(a_1).. true S6 VS6
2564 '---> S6: a_new = .... - S4 VS6
2565 > VS5: ... = ..vuse(va_new).. - - -
2566 S5: ... = ..use(a_0).. - - -
2568 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
2569 elsewhere), and we'll end up with:
2572 VS5: ... = ..vuse(va_new)..
2574 In case of more than one pattern statements, e.g., widen-mult with
2578 S2 a_T = (TYPE) a_t;
2579 '--> S3: a_it = (interm_type) a_t;
2580 S4 prod_T = a_T * CONST;
2581 '--> S5: prod_T' = a_it w* CONST;
2583 there may be other users of a_T outside the pattern. In that case S2 will
2584 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2585 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2586 be recorded in S3. */
2589 vect_pattern_recog (loop_vec_info loop_vinfo
)
2591 struct loop
*loop
= LOOP_VINFO_LOOP (loop_vinfo
);
2592 basic_block
*bbs
= LOOP_VINFO_BBS (loop_vinfo
);
2593 unsigned int nbbs
= loop
->num_nodes
;
2594 gimple_stmt_iterator si
;
2596 vect_recog_func_ptr vect_recog_func
;
2597 VEC (gimple
, heap
) *stmts_to_replace
= VEC_alloc (gimple
, heap
, 1);
2599 if (vect_print_dump_info (REPORT_DETAILS
))
2600 fprintf (vect_dump
, "=== vect_pattern_recog ===");
2602 /* Scan through the loop stmts, applying the pattern recognition
2603 functions starting at each stmt visited: */
2604 for (i
= 0; i
< nbbs
; i
++)
2606 basic_block bb
= bbs
[i
];
2607 for (si
= gsi_start_bb (bb
); !gsi_end_p (si
); gsi_next (&si
))
2609 /* Scan over all generic vect_recog_xxx_pattern functions. */
2610 for (j
= 0; j
< NUM_PATTERNS
; j
++)
2612 vect_recog_func
= vect_vect_recog_func_ptrs
[j
];
2613 vect_pattern_recog_1 (vect_recog_func
, si
,
2619 VEC_free (gimple
, heap
, stmts_to_replace
);