]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
tree-vect-patterns.c (vect_same_loop_or_bb_p): New function.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
2f326699 2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
4fb489e7 3 Free Software Foundation, Inc.
20f06221
DN
4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
9dcd6f09 10Software Foundation; either version 3, or (at your option) any later
20f06221
DN
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
9dcd6f09
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
20f06221
DN
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "ggc.h"
27#include "tree.h"
20f06221
DN
28#include "target.h"
29#include "basic-block.h"
cf835838 30#include "gimple-pretty-print.h"
20f06221
DN
31#include "tree-flow.h"
32#include "tree-dump.h"
20f06221
DN
33#include "cfgloop.h"
34#include "expr.h"
35#include "optabs.h"
36#include "params.h"
37#include "tree-data-ref.h"
38#include "tree-vectorizer.h"
39#include "recog.h"
718f9c0f 40#include "diagnostic-core.h"
20f06221 41
20f06221 42/* Pattern recognition functions */
51312233
IR
43static gimple vect_recog_widen_sum_pattern (VEC (gimple, heap) **, tree *,
44 tree *);
45static gimple vect_recog_widen_mult_pattern (VEC (gimple, heap) **, tree *,
46 tree *);
47static gimple vect_recog_dot_prod_pattern (VEC (gimple, heap) **, tree *,
48 tree *);
49static gimple vect_recog_pow_pattern (VEC (gimple, heap) **, tree *, tree *);
1107f3ae
IR
50static gimple vect_recog_over_widening_pattern (VEC (gimple, heap) **, tree *,
51 tree *);
36ba4aae
IR
52static gimple vect_recog_widen_shift_pattern (VEC (gimple, heap) **,
53 tree *, tree *);
732a0ad3
JJ
54static gimple vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **,
55 tree *, tree *);
363477c0
JJ
56static gimple vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **,
57 tree *, tree *);
69d2aade
JJ
58static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
59 tree *, tree *);
71c92d17 60static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
20f06221
DN
61static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
0b2229b0 64 vect_recog_dot_prod_pattern,
1107f3ae 65 vect_recog_pow_pattern,
69d2aade 66 vect_recog_over_widening_pattern,
36ba4aae 67 vect_recog_widen_shift_pattern,
732a0ad3 68 vect_recog_vector_vector_shift_pattern,
363477c0 69 vect_recog_sdivmod_pow2_pattern,
71c92d17
JJ
70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
20f06221 72
083481d8
JJ
73static inline void
74append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
75{
a1a6c5b2
JJ
76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
083481d8
JJ
78}
79
80static inline void
81new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
82{
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
85}
86
f71cf56a
UW
87/* Check whether STMT2 is in the same loop or basic block as STMT1.
88 Which of the two applies depends on whether we're currently doing
89 loop-based or basic-block-based vectorization, as determined by
90 the vinfo_for_stmt for STMT1 (which must be defined).
91
92 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
93 to be defined as well. */
94
95static bool
96vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
97{
98 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
101
102 if (!gimple_bb (stmt2))
103 return false;
104
105 if (loop_vinfo)
106 {
107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
108 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
109 return false;
110 }
111 else
112 {
113 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
114 || gimple_code (stmt2) == GIMPLE_PHI)
115 return false;
116 }
117
118 gcc_assert (vinfo_for_stmt (stmt2));
119 return true;
120}
121
bc4fb355
IR
122/* Check whether NAME, an ssa-name used in USE_STMT,
123 is a result of a type promotion or demotion, such that:
20f06221 124 DEF_STMT: NAME = NOP (name0)
bc4fb355 125 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
383d9c83
IR
126 If CHECK_SIGN is TRUE, check that either both types are signed or both are
127 unsigned. */
20f06221
DN
128
129static bool
bc4fb355
IR
130type_conversion_p (tree name, gimple use_stmt, bool check_sign,
131 tree *orig_type, gimple *def_stmt, bool *promotion)
20f06221
DN
132{
133 tree dummy;
726a989a 134 gimple dummy_gimple;
20f06221
DN
135 loop_vec_info loop_vinfo;
136 stmt_vec_info stmt_vinfo;
20f06221
DN
137 tree type = TREE_TYPE (name);
138 tree oprnd0;
139 enum vect_def_type dt;
140 tree def;
f5709183 141 bb_vec_info bb_vinfo;
20f06221
DN
142
143 stmt_vinfo = vinfo_for_stmt (use_stmt);
144 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183
IR
145 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
146 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
147 &def, &dt))
20f06221
DN
148 return false;
149
8644a673
IR
150 if (dt != vect_internal_def
151 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
152 return false;
153
bc4fb355 154 if (!*def_stmt)
20f06221
DN
155 return false;
156
726a989a 157 if (!is_gimple_assign (*def_stmt))
20f06221
DN
158 return false;
159
bc4fb355 160 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
161 return false;
162
726a989a 163 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 164
bc4fb355
IR
165 *orig_type = TREE_TYPE (oprnd0);
166 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
167 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
168 return false;
169
170 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
171 *promotion = true;
172 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
173 *promotion = false;
174 else
20f06221
DN
175 return false;
176
24ee1384 177 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
f5709183 178 bb_vinfo, &dummy_gimple, &dummy, &dt))
20f06221
DN
179 return false;
180
20f06221
DN
181 return true;
182}
183
726a989a
RB
184/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
185 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
186
187static tree
188vect_recog_temp_ssa_var (tree type, gimple stmt)
189{
190 tree var = create_tmp_var (type, "patt");
191
192 add_referenced_var (var);
193 var = make_ssa_name (var, stmt);
194 return var;
195}
20f06221
DN
196
197/* Function vect_recog_dot_prod_pattern
198
199 Try to find the following pattern:
200
201 type x_t, y_t;
202 TYPE1 prod;
203 TYPE2 sum = init;
204 loop:
205 sum_0 = phi <init, sum_1>
206 S1 x_t = ...
207 S2 y_t = ...
208 S3 x_T = (TYPE1) x_t;
209 S4 y_T = (TYPE1) y_t;
210 S5 prod = x_T * y_T;
211 [S6 prod = (TYPE2) prod; #optional]
212 S7 sum_1 = prod + sum_0;
213
b8698a0f
L
214 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
215 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 216 computation.
b8698a0f 217
20f06221
DN
218 Input:
219
51312233
IR
220 * STMTS: Contains a stmt from which the pattern search begins. In the
221 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
222 will be detected.
20f06221
DN
223
224 Output:
225
226 * TYPE_IN: The type of the input arguments to the pattern.
227
228 * TYPE_OUT: The type of the output of this pattern.
229
230 * Return value: A new stmt that will be used to replace the sequence of
231 stmts that constitute the pattern. In this case it will be:
232 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
233
234 Note: The dot-prod idiom is a widening reduction pattern that is
235 vectorized without preserving all the intermediate results. It
236 produces only N/2 (widened) results (by summing up pairs of
237 intermediate results) rather than all N results. Therefore, we
238 cannot allow this pattern when we want to get all the results and in
239 the correct order (as is the case when this computation is in an
240 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 241
726a989a 242static gimple
51312233
IR
243vect_recog_dot_prod_pattern (VEC (gimple, heap) **stmts, tree *type_in,
244 tree *type_out)
20f06221 245{
51312233 246 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
20f06221
DN
247 tree oprnd0, oprnd1;
248 tree oprnd00, oprnd01;
51312233 249 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 250 tree type, half_type;
726a989a 251 gimple pattern_stmt;
20f06221 252 tree prod_type;
d29de1bf 253 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 254 struct loop *loop;
f471fe72 255 tree var;
bc4fb355 256 bool promotion;
20f06221 257
f5709183
IR
258 if (!loop_info)
259 return NULL;
260
261 loop = LOOP_VINFO_LOOP (loop_info);
262
51312233 263 if (!is_gimple_assign (last_stmt))
20f06221
DN
264 return NULL;
265
51312233 266 type = gimple_expr_type (last_stmt);
20f06221 267
b8698a0f 268 /* Look for the following pattern
20f06221
DN
269 DX = (TYPE1) X;
270 DY = (TYPE1) Y;
b8698a0f 271 DPROD = DX * DY;
20f06221
DN
272 DDPROD = (TYPE2) DPROD;
273 sum_1 = DDPROD + sum_0;
b8698a0f 274 In which
20f06221
DN
275 - DX is double the size of X
276 - DY is double the size of Y
277 - DX, DY, DPROD all have the same type
278 - sum is the same size of DPROD or bigger
279 - sum has been recognized as a reduction variable.
280
281 This is equivalent to:
282 DPROD = X w* Y; #widen mult
283 sum_1 = DPROD w+ sum_0; #widen summation
284 or
285 DPROD = X w* Y; #widen mult
286 sum_1 = DPROD + sum_0; #summation
287 */
288
289 /* Starting from LAST_STMT, follow the defs of its uses in search
290 of the above pattern. */
291
51312233 292 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
293 return NULL;
294
295 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
296 {
297 /* Has been detected as widening-summation? */
298
299 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
300 type = gimple_expr_type (stmt);
301 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 302 return NULL;
726a989a
RB
303 oprnd0 = gimple_assign_rhs1 (stmt);
304 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
305 half_type = TREE_TYPE (oprnd0);
306 }
307 else
308 {
726a989a 309 gimple def_stmt;
20f06221
DN
310
311 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
312 return NULL;
51312233
IR
313 oprnd0 = gimple_assign_rhs1 (last_stmt);
314 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
315 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
316 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 317 return NULL;
51312233 318 stmt = last_stmt;
20f06221 319
bc4fb355
IR
320 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
321 &promotion)
322 && promotion)
20f06221
DN
323 {
324 stmt = def_stmt;
726a989a 325 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
326 }
327 else
328 half_type = type;
329 }
330
51312233 331 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
332 we know that oprnd1 is the reduction variable (defined by a loop-header
333 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
334 Left to check that oprnd0 is defined by a (widen_)mult_expr */
ba02d3bc
RG
335 if (TREE_CODE (oprnd0) != SSA_NAME)
336 return NULL;
20f06221
DN
337
338 prod_type = half_type;
339 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
340
341 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 342 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
343 return NULL;
344
b8698a0f 345 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 346 inside the loop (in case we are analyzing an outer-loop). */
726a989a 347 if (!is_gimple_assign (stmt))
b8698a0f 348 return NULL;
20f06221
DN
349 stmt_vinfo = vinfo_for_stmt (stmt);
350 gcc_assert (stmt_vinfo);
8644a673 351 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 352 return NULL;
726a989a 353 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
354 return NULL;
355 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
356 {
357 /* Has been detected as a widening multiplication? */
358
359 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 360 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
361 return NULL;
362 stmt_vinfo = vinfo_for_stmt (stmt);
363 gcc_assert (stmt_vinfo);
8644a673 364 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
365 oprnd00 = gimple_assign_rhs1 (stmt);
366 oprnd01 = gimple_assign_rhs2 (stmt);
20f06221
DN
367 }
368 else
369 {
370 tree half_type0, half_type1;
726a989a 371 gimple def_stmt;
20f06221
DN
372 tree oprnd0, oprnd1;
373
726a989a
RB
374 oprnd0 = gimple_assign_rhs1 (stmt);
375 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
376 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
377 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 378 return NULL;
bc4fb355
IR
379 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
380 &promotion)
381 || !promotion)
20f06221 382 return NULL;
726a989a 383 oprnd00 = gimple_assign_rhs1 (def_stmt);
bc4fb355
IR
384 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
385 &promotion)
386 || !promotion)
20f06221 387 return NULL;
726a989a 388 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 389 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
390 return NULL;
391 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
392 return NULL;
393 }
394
395 half_type = TREE_TYPE (oprnd00);
396 *type_in = half_type;
397 *type_out = type;
b8698a0f 398
20f06221 399 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 400 var = vect_recog_temp_ssa_var (type, NULL);
f471fe72
RG
401 pattern_stmt = gimple_build_assign_with_ops3 (DOT_PROD_EXPR, var,
402 oprnd00, oprnd01, oprnd1);
b8698a0f 403
20f06221
DN
404 if (vect_print_dump_info (REPORT_DETAILS))
405 {
406 fprintf (vect_dump, "vect_recog_dot_prod_pattern: detected: ");
726a989a 407 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
20f06221 408 }
d29de1bf
DN
409
410 /* We don't allow changing the order of the computation in the inner-loop
411 when doing outer-loop vectorization. */
51312233 412 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
d29de1bf 413
726a989a 414 return pattern_stmt;
20f06221 415}
b8698a0f 416
51312233 417
36ba4aae
IR
418/* Handle widening operation by a constant. At the moment we support MULT_EXPR
419 and LSHIFT_EXPR.
420
421 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
422 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
51312233
IR
423
424 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
36ba4aae
IR
425 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
426 that satisfies the above restrictions, we can perform a widening opeartion
427 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
428 with a_it = (interm_type) a_t; */
51312233
IR
429
430static bool
36ba4aae
IR
431vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
432 tree const_oprnd, tree *oprnd,
433 VEC (gimple, heap) **stmts, tree type,
434 tree *half_type, gimple def_stmt)
51312233
IR
435{
436 tree new_type, new_oprnd, tmp;
437 gimple new_stmt;
438
36ba4aae
IR
439 if (code != MULT_EXPR && code != LSHIFT_EXPR)
440 return false;
441
442 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
443 || (code == LSHIFT_EXPR
444 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
445 != 1))
446 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
51312233
IR
447 {
448 /* CONST_OPRND is a constant of HALF_TYPE. */
449 *oprnd = gimple_assign_rhs1 (def_stmt);
450 return true;
451 }
452
f71cf56a
UW
453 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
454 return false;
455
456 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
51312233
IR
457 return false;
458
36ba4aae 459 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
51312233
IR
460 a type 2 times bigger than HALF_TYPE. */
461 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
462 TYPE_UNSIGNED (type));
36ba4aae
IR
463 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
464 || (code == LSHIFT_EXPR
465 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
51312233
IR
466 return false;
467
36ba4aae 468 /* Use NEW_TYPE for widening operation. */
51312233
IR
469 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
470 {
471 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
472 /* Check if the already created pattern stmt is what we need. */
473 if (!is_gimple_assign (new_stmt)
474 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
475 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
476 return false;
477
3a0a3578 478 VEC_safe_push (gimple, heap, *stmts, def_stmt);
51312233
IR
479 *oprnd = gimple_assign_lhs (new_stmt);
480 }
481 else
482 {
483 /* Create a_T = (NEW_TYPE) a_t; */
484 *oprnd = gimple_assign_rhs1 (def_stmt);
485 tmp = create_tmp_var (new_type, NULL);
486 add_referenced_var (tmp);
487 new_oprnd = make_ssa_name (tmp, NULL);
488 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
489 NULL_TREE);
51312233
IR
490 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
491 VEC_safe_push (gimple, heap, *stmts, def_stmt);
492 *oprnd = new_oprnd;
493 }
494
495 *half_type = new_type;
496 return true;
497}
498
499
20f06221
DN
500/* Function vect_recog_widen_mult_pattern
501
502 Try to find the following pattern:
503
504 type a_t, b_t;
505 TYPE a_T, b_T, prod_T;
506
507 S1 a_t = ;
508 S2 b_t = ;
509 S3 a_T = (TYPE) a_t;
510 S4 b_T = (TYPE) b_t;
511 S5 prod_T = a_T * b_T;
512
513 where type 'TYPE' is at least double the size of type 'type'.
514
d47657bd 515 Also detect unsigned cases:
383d9c83
IR
516
517 unsigned type a_t, b_t;
518 unsigned TYPE u_prod_T;
519 TYPE a_T, b_T, prod_T;
520
521 S1 a_t = ;
522 S2 b_t = ;
523 S3 a_T = (TYPE) a_t;
524 S4 b_T = (TYPE) b_t;
525 S5 prod_T = a_T * b_T;
526 S6 u_prod_T = (unsigned TYPE) prod_T;
527
528 and multiplication by constants:
529
530 type a_t;
531 TYPE a_T, prod_T;
532
533 S1 a_t = ;
534 S3 a_T = (TYPE) a_t;
535 S5 prod_T = a_T * CONST;
536
51312233
IR
537 A special case of multiplication by constants is when 'TYPE' is 4 times
538 bigger than 'type', but CONST fits an intermediate type 2 times smaller
539 than 'TYPE'. In that case we create an additional pattern stmt for S3
540 to create a variable of the intermediate type, and perform widen-mult
541 on the intermediate type as well:
542
543 type a_t;
544 interm_type a_it;
545 TYPE a_T, prod_T, prod_T';
546
547 S1 a_t = ;
548 S3 a_T = (TYPE) a_t;
549 '--> a_it = (interm_type) a_t;
550 S5 prod_T = a_T * CONST;
551 '--> prod_T' = a_it w* CONST;
20f06221 552
51312233
IR
553 Input/Output:
554
555 * STMTS: Contains a stmt from which the pattern search begins. In the
556 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
557 is detected. In case of unsigned widen-mult, the original stmt (S5) is
558 replaced with S6 in STMTS. In case of multiplication by a constant
559 of an intermediate type (the last case above), STMTS also contains S3
560 (inserted before S5).
20f06221
DN
561
562 Output:
563
564 * TYPE_IN: The type of the input arguments to the pattern.
565
383d9c83 566 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
567
568 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 569 stmts that constitute the pattern. In this case it will be:
20f06221
DN
570 WIDEN_MULT <a_t, b_t>
571*/
572
726a989a 573static gimple
51312233
IR
574vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
575 tree *type_in, tree *type_out)
20f06221 576{
51312233 577 gimple last_stmt = VEC_pop (gimple, *stmts);
726a989a 578 gimple def_stmt0, def_stmt1;
89d67cca
DN
579 tree oprnd0, oprnd1;
580 tree type, half_type0, half_type1;
726a989a 581 gimple pattern_stmt;
383d9c83 582 tree vectype, vectype_out = NULL_TREE;
89d67cca 583 tree dummy;
726a989a 584 tree var;
89d67cca 585 enum tree_code dummy_code;
5d593372
IR
586 int dummy_int;
587 VEC (tree, heap) *dummy_vec;
36ba4aae 588 bool op1_ok;
bc4fb355 589 bool promotion;
89d67cca 590
51312233 591 if (!is_gimple_assign (last_stmt))
89d67cca
DN
592 return NULL;
593
51312233 594 type = gimple_expr_type (last_stmt);
89d67cca
DN
595
596 /* Starting from LAST_STMT, follow the defs of its uses in search
597 of the above pattern. */
598
51312233 599 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
89d67cca
DN
600 return NULL;
601
51312233
IR
602 oprnd0 = gimple_assign_rhs1 (last_stmt);
603 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
604 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
605 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
606 return NULL;
607
383d9c83 608 /* Check argument 0. */
bc4fb355
IR
609 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
610 &promotion)
611 || !promotion)
612 return NULL;
383d9c83 613 /* Check argument 1. */
bc4fb355
IR
614 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
615 &def_stmt1, &promotion);
89d67cca 616
bc4fb355 617 if (op1_ok && promotion)
383d9c83
IR
618 {
619 oprnd0 = gimple_assign_rhs1 (def_stmt0);
620 oprnd1 = gimple_assign_rhs1 (def_stmt1);
621 }
36ba4aae 622 else
383d9c83 623 {
51312233 624 if (TREE_CODE (oprnd1) == INTEGER_CST
383d9c83 625 && TREE_CODE (half_type0) == INTEGER_TYPE
36ba4aae
IR
626 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
627 &oprnd0, stmts, type,
628 &half_type0, def_stmt0))
51312233 629 half_type1 = half_type0;
383d9c83
IR
630 else
631 return NULL;
632 }
633
634 /* Handle unsigned case. Look for
635 S6 u_prod_T = (unsigned TYPE) prod_T;
636 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
637 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
638 {
51312233 639 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
383d9c83
IR
640 imm_use_iterator imm_iter;
641 use_operand_p use_p;
642 int nuses = 0;
643 gimple use_stmt = NULL;
644 tree use_type;
645
646 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
647 return NULL;
648
649 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
650 {
4fb489e7
JJ
651 if (is_gimple_debug (USE_STMT (use_p)))
652 continue;
383d9c83
IR
653 use_stmt = USE_STMT (use_p);
654 nuses++;
655 }
656
657 if (nuses != 1 || !is_gimple_assign (use_stmt)
658 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
659 return NULL;
660
f71cf56a 661 if (!vect_same_loop_or_bb_p (last_stmt, use_stmt))
c0d942d2
UW
662 return NULL;
663
383d9c83
IR
664 use_lhs = gimple_assign_lhs (use_stmt);
665 use_type = TREE_TYPE (use_lhs);
666 if (!INTEGRAL_TYPE_P (use_type)
667 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
668 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
669 return NULL;
670
671 type = use_type;
51312233 672 last_stmt = use_stmt;
383d9c83 673 }
89d67cca 674
9600efe1 675 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
676 return NULL;
677
678 /* Pattern detected. */
679 if (vect_print_dump_info (REPORT_DETAILS))
680 fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
681
682 /* Check target support */
683 vectype = get_vectype_for_scalar_type (half_type0);
b690cc0f 684 vectype_out = get_vectype_for_scalar_type (type);
03d3e953 685 if (!vectype
d163c4f7 686 || !vectype_out
51312233 687 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
b690cc0f 688 vectype_out, vectype,
726a989a 689 &dummy, &dummy, &dummy_code,
5d593372 690 &dummy_code, &dummy_int, &dummy_vec))
89d67cca
DN
691 return NULL;
692
693 *type_in = vectype;
b690cc0f 694 *type_out = vectype_out;
89d67cca
DN
695
696 /* Pattern supported. Create a stmt to be used to replace the pattern: */
726a989a
RB
697 var = vect_recog_temp_ssa_var (type, NULL);
698 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
699 oprnd1);
726a989a 700
89d67cca 701 if (vect_print_dump_info (REPORT_DETAILS))
726a989a
RB
702 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
703
51312233 704 VEC_safe_push (gimple, heap, *stmts, last_stmt);
726a989a 705 return pattern_stmt;
20f06221
DN
706}
707
708
0b2229b0
RG
709/* Function vect_recog_pow_pattern
710
711 Try to find the following pattern:
712
713 x = POW (y, N);
714
715 with POW being one of pow, powf, powi, powif and N being
716 either 2 or 0.5.
717
718 Input:
719
720 * LAST_STMT: A stmt from which the pattern search begins.
721
722 Output:
723
724 * TYPE_IN: The type of the input arguments to the pattern.
725
726 * TYPE_OUT: The type of the output of this pattern.
727
728 * Return value: A new stmt that will be used to replace the sequence of
729 stmts that constitute the pattern. In this case it will be:
726a989a 730 x = x * x
0b2229b0 731 or
726a989a 732 x = sqrt (x)
0b2229b0
RG
733*/
734
726a989a 735static gimple
51312233
IR
736vect_recog_pow_pattern (VEC (gimple, heap) **stmts, tree *type_in,
737 tree *type_out)
0b2229b0 738{
51312233 739 gimple last_stmt = VEC_index (gimple, *stmts, 0);
726a989a
RB
740 tree fn, base, exp = NULL;
741 gimple stmt;
742 tree var;
0b2229b0 743
51312233 744 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
745 return NULL;
746
51312233 747 fn = gimple_call_fndecl (last_stmt);
52bd463c
RG
748 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
749 return NULL;
750
0b2229b0
RG
751 switch (DECL_FUNCTION_CODE (fn))
752 {
753 case BUILT_IN_POWIF:
754 case BUILT_IN_POWI:
755 case BUILT_IN_POWF:
756 case BUILT_IN_POW:
51312233
IR
757 base = gimple_call_arg (last_stmt, 0);
758 exp = gimple_call_arg (last_stmt, 1);
0b2229b0
RG
759 if (TREE_CODE (exp) != REAL_CST
760 && TREE_CODE (exp) != INTEGER_CST)
726a989a 761 return NULL;
0b2229b0
RG
762 break;
763
726a989a
RB
764 default:
765 return NULL;
0b2229b0
RG
766 }
767
768 /* We now have a pow or powi builtin function call with a constant
769 exponent. */
770
0b2229b0
RG
771 *type_out = NULL_TREE;
772
773 /* Catch squaring. */
774 if ((host_integerp (exp, 0)
775 && tree_low_cst (exp, 0) == 2)
776 || (TREE_CODE (exp) == REAL_CST
777 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
c6b1b49b
RG
778 {
779 *type_in = TREE_TYPE (base);
726a989a
RB
780
781 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
782 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
726a989a 783 return stmt;
c6b1b49b 784 }
0b2229b0
RG
785
786 /* Catch square root. */
787 if (TREE_CODE (exp) == REAL_CST
788 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
789 {
790 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
c6b1b49b
RG
791 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
792 if (*type_in)
793 {
726a989a
RB
794 gimple stmt = gimple_build_call (newfn, 1, base);
795 if (vectorizable_function (stmt, *type_in, *type_in)
796 != NULL_TREE)
797 {
798 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
b8698a0f 799 gimple_call_set_lhs (stmt, var);
726a989a
RB
800 return stmt;
801 }
c6b1b49b 802 }
0b2229b0
RG
803 }
804
726a989a 805 return NULL;
0b2229b0
RG
806}
807
808
20f06221
DN
809/* Function vect_recog_widen_sum_pattern
810
811 Try to find the following pattern:
812
b8698a0f 813 type x_t;
20f06221
DN
814 TYPE x_T, sum = init;
815 loop:
816 sum_0 = phi <init, sum_1>
817 S1 x_t = *p;
818 S2 x_T = (TYPE) x_t;
819 S3 sum_1 = x_T + sum_0;
820
b8698a0f 821 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 822 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 823 a special case of a reduction computation.
20f06221
DN
824
825 Input:
826
827 * LAST_STMT: A stmt from which the pattern search begins. In the example,
828 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 829
20f06221 830 Output:
b8698a0f 831
20f06221
DN
832 * TYPE_IN: The type of the input arguments to the pattern.
833
834 * TYPE_OUT: The type of the output of this pattern.
835
836 * Return value: A new stmt that will be used to replace the sequence of
837 stmts that constitute the pattern. In this case it will be:
838 WIDEN_SUM <x_t, sum_0>
d29de1bf 839
b8698a0f 840 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 841 vectorized without preserving all the intermediate results. It
b8698a0f
L
842 produces only N/2 (widened) results (by summing up pairs of
843 intermediate results) rather than all N results. Therefore, we
844 cannot allow this pattern when we want to get all the results and in
845 the correct order (as is the case when this computation is in an
d29de1bf 846 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 847
726a989a 848static gimple
51312233
IR
849vect_recog_widen_sum_pattern (VEC (gimple, heap) **stmts, tree *type_in,
850 tree *type_out)
20f06221 851{
51312233 852 gimple stmt, last_stmt = VEC_index (gimple, *stmts, 0);
20f06221 853 tree oprnd0, oprnd1;
51312233 854 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 855 tree type, half_type;
726a989a 856 gimple pattern_stmt;
d29de1bf 857 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 858 struct loop *loop;
726a989a 859 tree var;
bc4fb355 860 bool promotion;
20f06221 861
f5709183
IR
862 if (!loop_info)
863 return NULL;
864
865 loop = LOOP_VINFO_LOOP (loop_info);
866
51312233 867 if (!is_gimple_assign (last_stmt))
20f06221
DN
868 return NULL;
869
51312233 870 type = gimple_expr_type (last_stmt);
20f06221
DN
871
872 /* Look for the following pattern
873 DX = (TYPE) X;
874 sum_1 = DX + sum_0;
875 In which DX is at least double the size of X, and sum_1 has been
876 recognized as a reduction variable.
877 */
878
879 /* Starting from LAST_STMT, follow the defs of its uses in search
880 of the above pattern. */
881
51312233 882 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
883 return NULL;
884
885 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
886 return NULL;
887
51312233
IR
888 oprnd0 = gimple_assign_rhs1 (last_stmt);
889 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
890 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
891 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
892 return NULL;
893
51312233 894 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
895 we know that oprnd1 is the reduction variable (defined by a loop-header
896 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
897 Left to check that oprnd0 is defined by a cast from type 'type' to type
898 'TYPE'. */
899
bc4fb355
IR
900 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
901 &promotion)
902 || !promotion)
903 return NULL;
20f06221 904
726a989a 905 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
906 *type_in = half_type;
907 *type_out = type;
908
909 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a
RB
910 var = vect_recog_temp_ssa_var (type, NULL);
911 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
912 oprnd0, oprnd1);
726a989a 913
20f06221
DN
914 if (vect_print_dump_info (REPORT_DETAILS))
915 {
916 fprintf (vect_dump, "vect_recog_widen_sum_pattern: detected: ");
726a989a 917 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
20f06221 918 }
d29de1bf
DN
919
920 /* We don't allow changing the order of the computation in the inner-loop
921 when doing outer-loop vectorization. */
51312233 922 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
d29de1bf 923
726a989a 924 return pattern_stmt;
20f06221
DN
925}
926
927
1107f3ae
IR
928/* Return TRUE if the operation in STMT can be performed on a smaller type.
929
930 Input:
931 STMT - a statement to check.
932 DEF - we support operations with two operands, one of which is constant.
933 The other operand can be defined by a demotion operation, or by a
934 previous statement in a sequence of over-promoted operations. In the
935 later case DEF is used to replace that operand. (It is defined by a
936 pattern statement we created for the previous statement in the
937 sequence).
938
939 Input/output:
940 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
941 NULL, it's the type of DEF.
942 STMTS - additional pattern statements. If a pattern statement (type
943 conversion) is created in this function, its original statement is
944 added to STMTS.
945
946 Output:
947 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
948 operands to use in the new pattern statement for STMT (will be created
949 in vect_recog_over_widening_pattern ()).
950 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
951 statements for STMT: the first one is a type promotion and the second
952 one is the operation itself. We return the type promotion statement
363477c0 953 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1107f3ae
IR
954 the second pattern statement. */
955
956static bool
957vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
958 tree *op0, tree *op1, gimple *new_def_stmt,
959 VEC (gimple, heap) **stmts)
960{
961 enum tree_code code;
962 tree const_oprnd, oprnd;
963 tree interm_type = NULL_TREE, half_type, tmp, new_oprnd, type;
964 gimple def_stmt, new_stmt;
965 bool first = false;
bc4fb355 966 bool promotion;
f5709183 967
d6e1acf6
JJ
968 *op0 = NULL_TREE;
969 *op1 = NULL_TREE;
1107f3ae
IR
970 *new_def_stmt = NULL;
971
972 if (!is_gimple_assign (stmt))
973 return false;
974
975 code = gimple_assign_rhs_code (stmt);
976 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
977 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
978 return false;
979
980 oprnd = gimple_assign_rhs1 (stmt);
981 const_oprnd = gimple_assign_rhs2 (stmt);
982 type = gimple_expr_type (stmt);
983
984 if (TREE_CODE (oprnd) != SSA_NAME
985 || TREE_CODE (const_oprnd) != INTEGER_CST)
986 return false;
987
988 /* If we are in the middle of a sequence, we use DEF from a previous
989 statement. Otherwise, OPRND has to be a result of type promotion. */
990 if (*new_type)
991 {
992 half_type = *new_type;
993 oprnd = def;
994 }
995 else
996 {
997 first = true;
bc4fb355 998 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
f71cf56a
UW
999 &promotion)
1000 || !promotion
1001 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1107f3ae
IR
1002 return false;
1003 }
1004
1005 /* Can we perform the operation on a smaller type? */
1006 switch (code)
1007 {
1008 case BIT_IOR_EXPR:
1009 case BIT_XOR_EXPR:
1010 case BIT_AND_EXPR:
1011 if (!int_fits_type_p (const_oprnd, half_type))
1012 {
1013 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1014 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1015 return false;
1016
1017 interm_type = build_nonstandard_integer_type (
1018 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1019 if (!int_fits_type_p (const_oprnd, interm_type))
1020 return false;
1021 }
1022
1023 break;
1024
1025 case LSHIFT_EXPR:
1026 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1027 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1028 return false;
1029
1030 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1031 (e.g., if the original value was char, the shift amount is at most 8
1032 if we want to use short). */
1033 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1034 return false;
1035
1036 interm_type = build_nonstandard_integer_type (
1037 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1038
1039 if (!vect_supportable_shift (code, interm_type))
1040 return false;
1041
1042 break;
1043
1044 case RSHIFT_EXPR:
1045 if (vect_supportable_shift (code, half_type))
1046 break;
1047
1048 /* Try intermediate type - HALF_TYPE is not supported. */
1049 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1050 return false;
1051
1052 interm_type = build_nonstandard_integer_type (
1053 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1054
1055 if (!vect_supportable_shift (code, interm_type))
1056 return false;
1057
1058 break;
1059
1060 default:
1061 gcc_unreachable ();
1062 }
1063
1064 /* There are four possible cases:
1065 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1066 the first statement in the sequence)
1067 a. The original, HALF_TYPE, is not enough - we replace the promotion
1068 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1069 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1070 promotion.
1071 2. OPRND is defined by a pattern statement we created.
1072 a. Its type is not sufficient for the operation, we create a new stmt:
1073 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1074 this statement in NEW_DEF_STMT, and it is later put in
363477c0 1075 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1107f3ae
IR
1076 b. OPRND is good to use in the new statement. */
1077 if (first)
1078 {
1079 if (interm_type)
1080 {
1081 /* Replace the original type conversion HALF_TYPE->TYPE with
1082 HALF_TYPE->INTERM_TYPE. */
1083 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1084 {
1085 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1086 /* Check if the already created pattern stmt is what we need. */
1087 if (!is_gimple_assign (new_stmt)
1088 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1089 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1090 return false;
1091
aede1227 1092 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1107f3ae
IR
1093 oprnd = gimple_assign_lhs (new_stmt);
1094 }
1095 else
1096 {
1097 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1098 oprnd = gimple_assign_rhs1 (def_stmt);
1099 tmp = create_tmp_reg (interm_type, NULL);
1100 add_referenced_var (tmp);
1101 new_oprnd = make_ssa_name (tmp, NULL);
1102 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1103 oprnd, NULL_TREE);
1107f3ae
IR
1104 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
1105 VEC_safe_push (gimple, heap, *stmts, def_stmt);
1106 oprnd = new_oprnd;
1107 }
1108 }
1109 else
1110 {
1111 /* Retrieve the operand before the type promotion. */
1112 oprnd = gimple_assign_rhs1 (def_stmt);
1113 }
1114 }
1115 else
1116 {
1117 if (interm_type)
1118 {
1119 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
1120 tmp = create_tmp_reg (interm_type, NULL);
1121 add_referenced_var (tmp);
1122 new_oprnd = make_ssa_name (tmp, NULL);
1123 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1124 oprnd, NULL_TREE);
1107f3ae
IR
1125 oprnd = new_oprnd;
1126 *new_def_stmt = new_stmt;
1127 }
1128
1129 /* Otherwise, OPRND is already set. */
1130 }
1131
1132 if (interm_type)
1133 *new_type = interm_type;
1134 else
1135 *new_type = half_type;
1136
1137 *op0 = oprnd;
1138 *op1 = fold_convert (*new_type, const_oprnd);
1139
1140 return true;
1141}
1142
1143
1144/* Try to find a statement or a sequence of statements that can be performed
1145 on a smaller type:
1146
1147 type x_t;
1148 TYPE x_T, res0_T, res1_T;
1149 loop:
1150 S1 x_t = *p;
1151 S2 x_T = (TYPE) x_t;
1152 S3 res0_T = op (x_T, C0);
1153 S4 res1_T = op (res0_T, C1);
1154 S5 ... = () res1_T; - type demotion
1155
1156 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1157 constants.
1158 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1159 be 'type' or some intermediate type. For now, we expect S5 to be a type
71c92d17 1160 demotion operation. We also check that S3 and S4 have only one use. */
1107f3ae 1161
1107f3ae
IR
1162static gimple
1163vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
1164 tree *type_in, tree *type_out)
1165{
1166 gimple stmt = VEC_pop (gimple, *stmts);
1167 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
1168 tree op0, op1, vectype = NULL_TREE, lhs, use_lhs, use_type;
1169 imm_use_iterator imm_iter;
1170 use_operand_p use_p;
1171 int nuses = 0;
1172 tree var = NULL_TREE, new_type = NULL_TREE, tmp, new_oprnd;
1173 bool first;
b2a1a74d 1174 tree type = NULL;
1107f3ae
IR
1175
1176 first = true;
1177 while (1)
1178 {
1179 if (!vinfo_for_stmt (stmt)
1180 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1181 return NULL;
1182
1183 new_def_stmt = NULL;
1184 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1185 &op0, &op1, &new_def_stmt,
1186 stmts))
1187 {
1188 if (first)
1189 return NULL;
1190 else
1191 break;
1192 }
1193
1194 /* STMT can be performed on a smaller type. Check its uses. */
1195 lhs = gimple_assign_lhs (stmt);
1196 nuses = 0;
1197 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1198 {
1199 if (is_gimple_debug (USE_STMT (use_p)))
1200 continue;
1201 use_stmt = USE_STMT (use_p);
1202 nuses++;
1203 }
1204
1205 if (nuses != 1 || !is_gimple_assign (use_stmt)
f71cf56a 1206 || !vect_same_loop_or_bb_p (stmt, use_stmt))
1107f3ae
IR
1207 return NULL;
1208
1209 /* Create pattern statement for STMT. */
1210 vectype = get_vectype_for_scalar_type (new_type);
1211 if (!vectype)
1212 return NULL;
1213
1214 /* We want to collect all the statements for which we create pattern
1215 statetments, except for the case when the last statement in the
1216 sequence doesn't have a corresponding pattern statement. In such
1217 case we associate the last pattern statement with the last statement
36ba4aae 1218 in the sequence. Therefore, we only add the original statement to
1107f3ae
IR
1219 the list if we know that it is not the last. */
1220 if (prev_stmt)
1221 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1222
1223 var = vect_recog_temp_ssa_var (new_type, NULL);
62371b92
JJ
1224 pattern_stmt
1225 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1226 op0, op1);
1107f3ae 1227 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
083481d8 1228 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1107f3ae
IR
1229
1230 if (vect_print_dump_info (REPORT_DETAILS))
1231 {
1232 fprintf (vect_dump, "created pattern stmt: ");
1233 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1234 }
1235
b2a1a74d 1236 type = gimple_expr_type (stmt);
1107f3ae
IR
1237 prev_stmt = stmt;
1238 stmt = use_stmt;
1239
1240 first = false;
1241 }
1242
1243 /* We got a sequence. We expect it to end with a type demotion operation.
1244 Otherwise, we quit (for now). There are three possible cases: the
1245 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1246 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1247 NEW_TYPE differs (we create a new conversion statement). */
1248 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1249 {
1250 use_lhs = gimple_assign_lhs (use_stmt);
1251 use_type = TREE_TYPE (use_lhs);
82db3d43 1252 /* Support only type demotion or signedess change. */
1107f3ae 1253 if (!INTEGRAL_TYPE_P (use_type)
82db3d43 1254 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1107f3ae
IR
1255 return NULL;
1256
82db3d43
IR
1257 /* Check that NEW_TYPE is not bigger than the conversion result. */
1258 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1259 return NULL;
1260
1107f3ae
IR
1261 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1262 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1263 {
1264 /* Create NEW_TYPE->USE_TYPE conversion. */
1265 tmp = create_tmp_reg (use_type, NULL);
1266 add_referenced_var (tmp);
1267 new_oprnd = make_ssa_name (tmp, NULL);
1268 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1269 var, NULL_TREE);
1107f3ae
IR
1270 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1271
1272 *type_in = get_vectype_for_scalar_type (new_type);
1273 *type_out = get_vectype_for_scalar_type (use_type);
1274
1275 /* We created a pattern statement for the last statement in the
1276 sequence, so we don't need to associate it with the pattern
1277 statement created for PREV_STMT. Therefore, we add PREV_STMT
1278 to the list in order to mark it later in vect_pattern_recog_1. */
1279 if (prev_stmt)
1280 VEC_safe_push (gimple, heap, *stmts, prev_stmt);
1281 }
1282 else
1283 {
1284 if (prev_stmt)
363477c0
JJ
1285 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1286 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1107f3ae
IR
1287
1288 *type_in = vectype;
1289 *type_out = NULL_TREE;
1290 }
1291
1292 VEC_safe_push (gimple, heap, *stmts, use_stmt);
1293 }
1294 else
1295 /* TODO: support general case, create a conversion to the correct type. */
1296 return NULL;
1297
1298 /* Pattern detected. */
1299 if (vect_print_dump_info (REPORT_DETAILS))
1300 {
1301 fprintf (vect_dump, "vect_recog_over_widening_pattern: detected: ");
1302 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1303 }
1304
1305 return pattern_stmt;
1306}
1307
36ba4aae
IR
1308/* Detect widening shift pattern:
1309
1310 type a_t;
1311 TYPE a_T, res_T;
1312
1313 S1 a_t = ;
1314 S2 a_T = (TYPE) a_t;
1315 S3 res_T = a_T << CONST;
1316
1317 where type 'TYPE' is at least double the size of type 'type'.
1318
5bfdb7d8 1319 Also detect unsigned cases:
36ba4aae
IR
1320
1321 unsigned type a_t;
1322 unsigned TYPE u_res_T;
1323 TYPE a_T, res_T;
1324
1325 S1 a_t = ;
1326 S2 a_T = (TYPE) a_t;
1327 S3 res_T = a_T << CONST;
1328 S4 u_res_T = (unsigned TYPE) res_T;
1329
1330 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1331 create an additional pattern stmt for S2 to create a variable of an
1332 intermediate type, and perform widen-shift on the intermediate type:
1333
1334 type a_t;
1335 interm_type a_it;
1336 TYPE a_T, res_T, res_T';
1337
1338 S1 a_t = ;
1339 S2 a_T = (TYPE) a_t;
1340 '--> a_it = (interm_type) a_t;
1341 S3 res_T = a_T << CONST;
1342 '--> res_T' = a_it <<* CONST;
1343
1344 Input/Output:
1345
1346 * STMTS: Contains a stmt from which the pattern search begins.
1347 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1348 in STMTS. When an intermediate type is used and a pattern statement is
1349 created for S2, we also put S2 here (before S3).
1350
1351 Output:
1352
1353 * TYPE_IN: The type of the input arguments to the pattern.
1354
1355 * TYPE_OUT: The type of the output of this pattern.
1356
1357 * Return value: A new stmt that will be used to replace the sequence of
1358 stmts that constitute the pattern. In this case it will be:
1359 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1360
1361static gimple
1362vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
1363 tree *type_in, tree *type_out)
1364{
1365 gimple last_stmt = VEC_pop (gimple, *stmts);
1366 gimple def_stmt0;
1367 tree oprnd0, oprnd1;
1368 tree type, half_type0;
1369 gimple pattern_stmt, orig_stmt = NULL;
1370 tree vectype, vectype_out = NULL_TREE;
1371 tree dummy;
1372 tree var;
1373 enum tree_code dummy_code;
1374 int dummy_int;
1375 VEC (tree, heap) * dummy_vec;
1376 gimple use_stmt = NULL;
1377 bool over_widen = false;
bc4fb355 1378 bool promotion;
36ba4aae
IR
1379
1380 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1381 return NULL;
1382
1383 orig_stmt = last_stmt;
1384 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
1385 {
1386 /* This statement was also detected as over-widening operation (it can't
1387 be any other pattern, because only over-widening detects shifts).
1388 LAST_STMT is the final type demotion statement, but its related
1389 statement is shift. We analyze the related statement to catch cases:
1390
1391 orig code:
1392 type a_t;
1393 itype res;
1394 TYPE a_T, res_T;
1395
1396 S1 a_T = (TYPE) a_t;
1397 S2 res_T = a_T << CONST;
1398 S3 res = (itype)res_T;
1399
1400 (size of type * 2 <= size of itype
1401 and size of itype * 2 <= size of TYPE)
1402
1403 code after over-widening pattern detection:
1404
1405 S1 a_T = (TYPE) a_t;
1406 --> a_it = (itype) a_t;
1407 S2 res_T = a_T << CONST;
1408 S3 res = (itype)res_T; <--- LAST_STMT
1409 --> res = a_it << CONST;
1410
1411 after widen_shift:
1412
1413 S1 a_T = (TYPE) a_t;
1414 --> a_it = (itype) a_t; - redundant
1415 S2 res_T = a_T << CONST;
1416 S3 res = (itype)res_T;
1417 --> res = a_t w<< CONST;
1418
1419 i.e., we replace the three statements with res = a_t w<< CONST. */
1420 last_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (last_stmt));
1421 over_widen = true;
1422 }
1423
1424 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1425 return NULL;
1426
1427 oprnd0 = gimple_assign_rhs1 (last_stmt);
1428 oprnd1 = gimple_assign_rhs2 (last_stmt);
1429 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1430 return NULL;
1431
1432 /* Check operand 0: it has to be defined by a type promotion. */
bc4fb355
IR
1433 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1434 &promotion)
1435 || !promotion)
1436 return NULL;
36ba4aae
IR
1437
1438 /* Check operand 1: has to be positive. We check that it fits the type
1439 in vect_handle_widen_op_by_const (). */
1440 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1441 return NULL;
1442
1443 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1444 type = gimple_expr_type (last_stmt);
1445
1446 /* Check if this a widening operation. */
1447 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1448 &oprnd0, stmts,
1449 type, &half_type0, def_stmt0))
1450 return NULL;
1451
1452 /* Handle unsigned case. Look for
1453 S4 u_res_T = (unsigned TYPE) res_T;
1454 Use unsigned TYPE as the type for WIDEN_LSHIFT_EXPR. */
1455 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
1456 {
1457 tree lhs = gimple_assign_lhs (last_stmt), use_lhs;
1458 imm_use_iterator imm_iter;
1459 use_operand_p use_p;
1460 int nuses = 0;
1461 tree use_type;
1462
1463 if (over_widen)
1464 {
1465 /* In case of over-widening pattern, S4 should be ORIG_STMT itself.
1466 We check here that TYPE is the correct type for the operation,
1467 i.e., it's the type of the original result. */
1468 tree orig_type = gimple_expr_type (orig_stmt);
1469 if ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (orig_type))
1470 || (TYPE_PRECISION (type) != TYPE_PRECISION (orig_type)))
1471 return NULL;
1472 }
1473 else
1474 {
1475 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
1476 {
1477 if (is_gimple_debug (USE_STMT (use_p)))
1478 continue;
1479 use_stmt = USE_STMT (use_p);
1480 nuses++;
1481 }
1482
1483 if (nuses != 1 || !is_gimple_assign (use_stmt)
1484 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1485 return NULL;
1486
f71cf56a
UW
1487 if (!vect_same_loop_or_bb_p (last_stmt, use_stmt))
1488 return NULL;
1489
36ba4aae
IR
1490 use_lhs = gimple_assign_lhs (use_stmt);
1491 use_type = TREE_TYPE (use_lhs);
1492
1493 if (!INTEGRAL_TYPE_P (use_type)
1494 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
1495 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
1496 return NULL;
1497
1498 type = use_type;
1499 }
1500 }
1501
1502 /* Pattern detected. */
1503 if (vect_print_dump_info (REPORT_DETAILS))
1504 fprintf (vect_dump, "vect_recog_widen_shift_pattern: detected: ");
1505
1506 /* Check target support. */
1507 vectype = get_vectype_for_scalar_type (half_type0);
1508 vectype_out = get_vectype_for_scalar_type (type);
1509
1510 if (!vectype
1511 || !vectype_out
1512 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1513 vectype_out, vectype,
1514 &dummy, &dummy, &dummy_code,
1515 &dummy_code, &dummy_int,
1516 &dummy_vec))
1517 return NULL;
1518
1519 *type_in = vectype;
1520 *type_out = vectype_out;
1521
1522 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1523 var = vect_recog_temp_ssa_var (type, NULL);
1524 pattern_stmt =
1525 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1526
1527 if (vect_print_dump_info (REPORT_DETAILS))
1528 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1529
1530 if (use_stmt)
1531 last_stmt = use_stmt;
1532 else
1533 last_stmt = orig_stmt;
1534
1535 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1536 return pattern_stmt;
1537}
1107f3ae 1538
732a0ad3
JJ
1539/* Detect a vector by vector shift pattern that wouldn't be otherwise
1540 vectorized:
1541
1542 type a_t;
1543 TYPE b_T, res_T;
1544
1545 S1 a_t = ;
1546 S2 b_T = ;
1547 S3 res_T = b_T op a_t;
1548
1549 where type 'TYPE' is a type with different size than 'type',
1550 and op is <<, >> or rotate.
1551
1552 Also detect cases:
1553
1554 type a_t;
1555 TYPE b_T, c_T, res_T;
1556
1557 S0 c_T = ;
1558 S1 a_t = (type) c_T;
1559 S2 b_T = ;
1560 S3 res_T = b_T op a_t;
1561
1562 Input/Output:
1563
1564 * STMTS: Contains a stmt from which the pattern search begins,
1565 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1566 with a shift/rotate which has same type on both operands, in the
1567 second case just b_T op c_T, in the first case with added cast
363477c0 1568 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
1569
1570 Output:
1571
1572 * TYPE_IN: The type of the input arguments to the pattern.
1573
1574 * TYPE_OUT: The type of the output of this pattern.
1575
1576 * Return value: A new stmt that will be used to replace the shift/rotate
1577 S3 stmt. */
1578
1579static gimple
1580vect_recog_vector_vector_shift_pattern (VEC (gimple, heap) **stmts,
1581 tree *type_in, tree *type_out)
1582{
1583 gimple last_stmt = VEC_pop (gimple, *stmts);
1584 tree oprnd0, oprnd1, lhs, var;
1585 gimple pattern_stmt, def_stmt;
1586 enum tree_code rhs_code;
1587 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1588 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 1589 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
732a0ad3
JJ
1590 enum vect_def_type dt;
1591 tree def;
1592
1593 if (!is_gimple_assign (last_stmt))
1594 return NULL;
1595
1596 rhs_code = gimple_assign_rhs_code (last_stmt);
1597 switch (rhs_code)
1598 {
1599 case LSHIFT_EXPR:
1600 case RSHIFT_EXPR:
1601 case LROTATE_EXPR:
1602 case RROTATE_EXPR:
1603 break;
1604 default:
1605 return NULL;
1606 }
1607
1608 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1609 return NULL;
1610
1611 lhs = gimple_assign_lhs (last_stmt);
1612 oprnd0 = gimple_assign_rhs1 (last_stmt);
1613 oprnd1 = gimple_assign_rhs2 (last_stmt);
1614 if (TREE_CODE (oprnd0) != SSA_NAME
1615 || TREE_CODE (oprnd1) != SSA_NAME
1616 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1617 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1618 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1619 || TYPE_PRECISION (TREE_TYPE (lhs))
1620 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1621 return NULL;
1622
f5709183 1623 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 1624 &def, &dt))
732a0ad3
JJ
1625 return NULL;
1626
1627 if (dt != vect_internal_def)
1628 return NULL;
1629
1630 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1631 *type_out = *type_in;
1632 if (*type_in == NULL_TREE)
1633 return NULL;
1634
1635 def = NULL_TREE;
1636 if (gimple_assign_cast_p (def_stmt))
1637 {
1638 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1639 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1640 && TYPE_PRECISION (TREE_TYPE (rhs1))
1641 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1642 def = rhs1;
1643 }
1644
1645 if (def == NULL_TREE)
1646 {
1647 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1648 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1649 NULL_TREE);
083481d8 1650 new_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
1651 }
1652
1653 /* Pattern detected. */
1654 if (vect_print_dump_info (REPORT_DETAILS))
1655 fprintf (vect_dump, "vect_recog_vector_vector_shift_pattern: detected: ");
1656
1657 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1658 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1659 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1660
1661 if (vect_print_dump_info (REPORT_DETAILS))
1662 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1663
1664 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1665 return pattern_stmt;
1666}
1667
363477c0
JJ
1668/* Detect a signed division by power of two constant that wouldn't be
1669 otherwise vectorized:
1670
1671 type a_t, b_t;
1672
1673 S1 a_t = b_t / N;
1674
1675 where type 'type' is a signed integral type and N is a constant positive
1676 power of two.
1677
1678 Similarly handle signed modulo by power of two constant:
1679
1680 S4 a_t = b_t % N;
1681
1682 Input/Output:
1683
1684 * STMTS: Contains a stmt from which the pattern search begins,
1685 i.e. the division stmt. S1 is replaced by:
1686 S3 y_t = b_t < 0 ? N - 1 : 0;
1687 S2 x_t = b_t + y_t;
1688 S1' a_t = x_t >> log2 (N);
1689
1690 S4 is replaced by (where *_T temporaries have unsigned type):
1691 S9 y_T = b_t < 0 ? -1U : 0U;
1692 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1693 S7 z_t = (type) z_T;
1694 S6 w_t = b_t + z_t;
1695 S5 x_t = w_t & (N - 1);
1696 S4' a_t = x_t - z_t;
1697
1698 Output:
1699
1700 * TYPE_IN: The type of the input arguments to the pattern.
1701
1702 * TYPE_OUT: The type of the output of this pattern.
1703
1704 * Return value: A new stmt that will be used to replace the division
1705 S1 or modulo S4 stmt. */
1706
1707static gimple
1708vect_recog_sdivmod_pow2_pattern (VEC (gimple, heap) **stmts,
1709 tree *type_in, tree *type_out)
1710{
1711 gimple last_stmt = VEC_pop (gimple, *stmts);
1712 tree oprnd0, oprnd1, vectype, itype, cond;
1713 gimple pattern_stmt, def_stmt;
1714 enum tree_code rhs_code;
1715 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1716 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1717 optab optab;
1718
1719 if (!is_gimple_assign (last_stmt))
1720 return NULL;
1721
1722 rhs_code = gimple_assign_rhs_code (last_stmt);
1723 switch (rhs_code)
1724 {
1725 case TRUNC_DIV_EXPR:
1726 case TRUNC_MOD_EXPR:
1727 break;
1728 default:
1729 return NULL;
1730 }
1731
1732 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1733 return NULL;
1734
1735 oprnd0 = gimple_assign_rhs1 (last_stmt);
1736 oprnd1 = gimple_assign_rhs2 (last_stmt);
1737 itype = TREE_TYPE (oprnd0);
1738 if (TREE_CODE (oprnd0) != SSA_NAME
1739 || TREE_CODE (oprnd1) != INTEGER_CST
1740 || TREE_CODE (itype) != INTEGER_TYPE
1741 || TYPE_UNSIGNED (itype)
1742 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype))
1743 || !integer_pow2p (oprnd1)
1744 || tree_int_cst_sgn (oprnd1) != 1)
1745 return NULL;
1746
1747 vectype = get_vectype_for_scalar_type (itype);
1748 if (vectype == NULL_TREE)
1749 return NULL;
1750
1751 /* If the target can handle vectorized division or modulo natively,
1752 don't attempt to optimize this. */
1753 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
1754 if (optab != NULL)
1755 {
1756 enum machine_mode vec_mode = TYPE_MODE (vectype);
1757 int icode = (int) optab_handler (optab, vec_mode);
1758 if (icode != CODE_FOR_nothing
1759 || GET_MODE_SIZE (vec_mode) == UNITS_PER_WORD)
1760 return NULL;
1761 }
1762
1763 /* Pattern detected. */
1764 if (vect_print_dump_info (REPORT_DETAILS))
1765 fprintf (vect_dump, "vect_recog_sdivmod_pow2_pattern: detected: ");
1766
1767 cond = build2 (LT_EXPR, boolean_type_node, oprnd0, build_int_cst (itype, 0));
1768 if (rhs_code == TRUNC_DIV_EXPR)
1769 {
1770 tree var = vect_recog_temp_ssa_var (itype, NULL);
1771 def_stmt
1772 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1773 fold_build2 (MINUS_EXPR, itype,
1774 oprnd1,
1775 build_int_cst (itype,
1776 1)),
1777 build_int_cst (itype, 0));
083481d8 1778 new_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1779 var = vect_recog_temp_ssa_var (itype, NULL);
1780 def_stmt
1781 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1782 gimple_assign_lhs (def_stmt));
083481d8 1783 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1784
1785 pattern_stmt
1786 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1787 vect_recog_temp_ssa_var (itype, NULL),
1788 var,
1789 build_int_cst (itype,
1790 tree_log2 (oprnd1)));
1791 }
1792 else
1793 {
1794 tree signmask;
1795 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1796 if (compare_tree_int (oprnd1, 2) == 0)
1797 {
1798 signmask = vect_recog_temp_ssa_var (itype, NULL);
1799 def_stmt
1800 = gimple_build_assign_with_ops3 (COND_EXPR, signmask, cond,
1801 build_int_cst (itype, 1),
1802 build_int_cst (itype, 0));
083481d8 1803 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1804 }
1805 else
1806 {
1807 tree utype
1808 = build_nonstandard_integer_type (TYPE_PRECISION (itype), 1);
1809 tree vecutype = get_vectype_for_scalar_type (utype);
1810 tree shift
1811 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1812 - tree_log2 (oprnd1));
1813 tree var = vect_recog_temp_ssa_var (utype, NULL);
1814 stmt_vec_info def_stmt_vinfo;
1815
1816 def_stmt
1817 = gimple_build_assign_with_ops3 (COND_EXPR, var, cond,
1818 build_int_cst (utype, -1),
1819 build_int_cst (utype, 0));
1820 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1821 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1822 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
083481d8 1823 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1824 var = vect_recog_temp_ssa_var (utype, NULL);
1825 def_stmt
1826 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1827 gimple_assign_lhs (def_stmt),
1828 shift);
1829 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, NULL);
1830 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1831 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
083481d8 1832 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1833 signmask = vect_recog_temp_ssa_var (itype, NULL);
1834 def_stmt
1835 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1836 NULL_TREE);
083481d8 1837 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1838 }
1839 def_stmt
1840 = gimple_build_assign_with_ops (PLUS_EXPR,
1841 vect_recog_temp_ssa_var (itype, NULL),
1842 oprnd0, signmask);
083481d8 1843 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1844 def_stmt
1845 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1846 vect_recog_temp_ssa_var (itype, NULL),
1847 gimple_assign_lhs (def_stmt),
1848 fold_build2 (MINUS_EXPR, itype,
1849 oprnd1,
1850 build_int_cst (itype,
1851 1)));
083481d8 1852 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0
JJ
1853
1854 pattern_stmt
1855 = gimple_build_assign_with_ops (MINUS_EXPR,
1856 vect_recog_temp_ssa_var (itype, NULL),
1857 gimple_assign_lhs (def_stmt),
1858 signmask);
1859 }
1860
1861 if (vect_print_dump_info (REPORT_DETAILS))
1862 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
1863
1864 VEC_safe_push (gimple, heap, *stmts, last_stmt);
1865
1866 *type_in = vectype;
1867 *type_out = vectype;
1868 return pattern_stmt;
1869}
1870
69d2aade
JJ
1871/* Function vect_recog_mixed_size_cond_pattern
1872
1873 Try to find the following pattern:
1874
1875 type x_t, y_t;
1876 TYPE a_T, b_T, c_T;
1877 loop:
1878 S1 a_T = x_t CMP y_t ? b_T : c_T;
1879
1880 where type 'TYPE' is an integral type which has different size
bc4fb355 1881 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 1882 than 'type', the constants need to fit into an integer type
bc4fb355 1883 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
1884
1885 Input:
1886
1887 * LAST_STMT: A stmt from which the pattern search begins.
1888
1889 Output:
1890
1891 * TYPE_IN: The type of the input arguments to the pattern.
1892
1893 * TYPE_OUT: The type of the output of this pattern.
1894
1895 * Return value: A new stmt that will be used to replace the pattern.
1896 Additionally a def_stmt is added.
1897
1898 a_it = x_t CMP y_t ? b_it : c_it;
1899 a_T = (TYPE) a_it; */
1900
1901static gimple
1902vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **stmts, tree *type_in,
1903 tree *type_out)
1904{
1905 gimple last_stmt = VEC_index (gimple, *stmts, 0);
1906 tree cond_expr, then_clause, else_clause;
1907 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
bc4fb355 1908 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
69d2aade
JJ
1909 enum machine_mode cmpmode;
1910 gimple pattern_stmt, def_stmt;
1911 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 1912 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
bc4fb355
IR
1913 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
1914 gimple def_stmt0 = NULL, def_stmt1 = NULL;
1915 bool promotion;
1916 tree comp_scalar_type;
69d2aade
JJ
1917
1918 if (!is_gimple_assign (last_stmt)
1919 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
1920 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
1921 return NULL;
1922
1923 cond_expr = gimple_assign_rhs1 (last_stmt);
1924 then_clause = gimple_assign_rhs2 (last_stmt);
1925 else_clause = gimple_assign_rhs3 (last_stmt);
1926
87aab9b2
JJ
1927 if (!COMPARISON_CLASS_P (cond_expr))
1928 return NULL;
1929
bc4fb355
IR
1930 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
1931 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 1932 if (comp_vectype == NULL_TREE)
69d2aade
JJ
1933 return NULL;
1934
1935 type = gimple_expr_type (last_stmt);
bc4fb355
IR
1936 if (types_compatible_p (type, comp_scalar_type)
1937 || ((TREE_CODE (then_clause) != INTEGER_CST
1938 || TREE_CODE (else_clause) != INTEGER_CST)
1939 && !INTEGRAL_TYPE_P (comp_scalar_type))
1940 || !INTEGRAL_TYPE_P (type))
1941 return NULL;
1942
1943 if ((TREE_CODE (then_clause) != INTEGER_CST
1944 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
1945 &def_stmt0, &promotion))
1946 || (TREE_CODE (else_clause) != INTEGER_CST
1947 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
1948 &def_stmt1, &promotion)))
1949 return NULL;
1950
1951 if (orig_type0 && orig_type1
1952 && !types_compatible_p (orig_type0, orig_type1))
1953 return NULL;
1954
1955 if (orig_type0)
1956 {
1957 if (!types_compatible_p (orig_type0, comp_scalar_type))
1958 return NULL;
1959 then_clause = gimple_assign_rhs1 (def_stmt0);
1960 itype = orig_type0;
1961 }
1962
1963 if (orig_type1)
1964 {
1965 if (!types_compatible_p (orig_type1, comp_scalar_type))
1966 return NULL;
1967 else_clause = gimple_assign_rhs1 (def_stmt1);
1968 itype = orig_type1;
1969 }
1970
69d2aade
JJ
1971 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
1972
1973 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
1974 return NULL;
1975
1976 vectype = get_vectype_for_scalar_type (type);
1977 if (vectype == NULL_TREE)
1978 return NULL;
1979
1980 if (expand_vec_cond_expr_p (vectype, comp_vectype))
1981 return NULL;
1982
bc4fb355
IR
1983 if (itype == NULL_TREE)
1984 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
1985 TYPE_UNSIGNED (type));
1986
69d2aade
JJ
1987 if (itype == NULL_TREE
1988 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
1989 return NULL;
1990
1991 vecitype = get_vectype_for_scalar_type (itype);
1992 if (vecitype == NULL_TREE)
1993 return NULL;
1994
1995 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
1996 return NULL;
1997
1998 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
1999 {
bc4fb355
IR
2000 if ((TREE_CODE (then_clause) == INTEGER_CST
2001 && !int_fits_type_p (then_clause, itype))
2002 || (TREE_CODE (else_clause) == INTEGER_CST
2003 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
2004 return NULL;
2005 }
2006
2007 def_stmt
2008 = gimple_build_assign_with_ops3 (COND_EXPR,
2009 vect_recog_temp_ssa_var (itype, NULL),
2010 unshare_expr (cond_expr),
2011 fold_convert (itype, then_clause),
2012 fold_convert (itype, else_clause));
2013 pattern_stmt
2014 = gimple_build_assign_with_ops (NOP_EXPR,
2015 vect_recog_temp_ssa_var (type, NULL),
2016 gimple_assign_lhs (def_stmt), NULL_TREE);
2017
083481d8 2018 new_pattern_def_seq (stmt_vinfo, def_stmt);
f5709183 2019 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
69d2aade
JJ
2020 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2021 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2022 *type_in = vecitype;
2023 *type_out = vectype;
2024
f5709183
IR
2025 if (vect_print_dump_info (REPORT_DETAILS))
2026 fprintf (vect_dump, "vect_recog_mixed_size_cond_pattern: detected: ");
2027
69d2aade
JJ
2028 return pattern_stmt;
2029}
2030
2031
71c92d17
JJ
2032/* Helper function of vect_recog_bool_pattern. Called recursively, return
2033 true if bool VAR can be optimized that way. */
2034
2035static bool
f5709183 2036check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
71c92d17
JJ
2037{
2038 gimple def_stmt;
2039 enum vect_def_type dt;
2040 tree def, rhs1;
2041 enum tree_code rhs_code;
2042
f5709183
IR
2043 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2044 &dt))
71c92d17
JJ
2045 return false;
2046
2047 if (dt != vect_internal_def)
2048 return false;
2049
2050 if (!is_gimple_assign (def_stmt))
2051 return false;
2052
2053 if (!has_single_use (def))
2054 return false;
2055
2056 rhs1 = gimple_assign_rhs1 (def_stmt);
2057 rhs_code = gimple_assign_rhs_code (def_stmt);
2058 switch (rhs_code)
2059 {
2060 case SSA_NAME:
f5709183 2061 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2062
2063 CASE_CONVERT:
2064 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2065 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2066 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2067 return false;
f5709183 2068 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2069
2070 case BIT_NOT_EXPR:
f5709183 2071 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2072
2073 case BIT_AND_EXPR:
2074 case BIT_IOR_EXPR:
2075 case BIT_XOR_EXPR:
f5709183 2076 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
71c92d17 2077 return false;
f5709183
IR
2078 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2079 bb_vinfo);
71c92d17
JJ
2080
2081 default:
2082 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2083 {
2084 tree vecitype, comp_vectype;
2085
2f326699
JJ
2086 /* If the comparison can throw, then is_gimple_condexpr will be
2087 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2088 if (stmt_could_throw_p (def_stmt))
2089 return false;
2090
71c92d17
JJ
2091 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2092 if (comp_vectype == NULL_TREE)
2093 return false;
2094
2095 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2096 {
2097 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2098 tree itype
ab0ef706 2099 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2100 vecitype = get_vectype_for_scalar_type (itype);
2101 if (vecitype == NULL_TREE)
2102 return false;
2103 }
2104 else
2105 vecitype = comp_vectype;
2106 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2107 }
2108 return false;
2109 }
2110}
2111
2112
2113/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2114 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
363477c0 2115 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
71c92d17
JJ
2116
2117static tree
2118adjust_bool_pattern_cast (tree type, tree var)
2119{
2120 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2121 gimple cast_stmt, pattern_stmt;
2122
363477c0 2123 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
71c92d17 2124 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
083481d8 2125 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
71c92d17
JJ
2126 cast_stmt
2127 = gimple_build_assign_with_ops (NOP_EXPR,
2128 vect_recog_temp_ssa_var (type, NULL),
2129 gimple_assign_lhs (pattern_stmt),
2130 NULL_TREE);
2131 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2132 return gimple_assign_lhs (cast_stmt);
2133}
2134
2135
2136/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2137 recursively. VAR is an SSA_NAME that should be transformed from bool
2138 to a wider integer type, OUT_TYPE is the desired final integer type of
2139 the whole pattern, TRUEVAL should be NULL unless optimizing
2140 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2141 in the then_clause, STMTS is where statements with added pattern stmts
2142 should be pushed to. */
2143
2144static tree
2145adjust_bool_pattern (tree var, tree out_type, tree trueval,
2146 VEC (gimple, heap) **stmts)
2147{
2148 gimple stmt = SSA_NAME_DEF_STMT (var);
2149 enum tree_code rhs_code, def_rhs_code;
2150 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2151 location_t loc;
2152 gimple pattern_stmt, def_stmt;
2153
2154 rhs1 = gimple_assign_rhs1 (stmt);
2155 rhs2 = gimple_assign_rhs2 (stmt);
2156 rhs_code = gimple_assign_rhs_code (stmt);
2157 loc = gimple_location (stmt);
2158 switch (rhs_code)
2159 {
2160 case SSA_NAME:
2161 CASE_CONVERT:
2162 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2163 itype = TREE_TYPE (irhs1);
2164 pattern_stmt
2165 = gimple_build_assign_with_ops (SSA_NAME,
2166 vect_recog_temp_ssa_var (itype, NULL),
2167 irhs1, NULL_TREE);
2168 break;
2169
2170 case BIT_NOT_EXPR:
2171 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2172 itype = TREE_TYPE (irhs1);
2173 pattern_stmt
2174 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2175 vect_recog_temp_ssa_var (itype, NULL),
2176 irhs1, build_int_cst (itype, 1));
2177 break;
2178
2179 case BIT_AND_EXPR:
2180 /* Try to optimize x = y & (a < b ? 1 : 0); into
2181 x = (a < b ? y : 0);
2182
2183 E.g. for:
2184 bool a_b, b_b, c_b;
2185 TYPE d_T;
2186
2187 S1 a_b = x1 CMP1 y1;
2188 S2 b_b = x2 CMP2 y2;
2189 S3 c_b = a_b & b_b;
2190 S4 d_T = (TYPE) c_b;
2191
2192 we would normally emit:
2193
2194 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2195 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2196 S3' c_T = a_T & b_T;
2197 S4' d_T = c_T;
2198
2199 but we can save one stmt by using the
2200 result of one of the COND_EXPRs in the other COND_EXPR and leave
2201 BIT_AND_EXPR stmt out:
2202
2203 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2204 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2205 S4' f_T = c_T;
2206
2207 At least when VEC_COND_EXPR is implemented using masks
2208 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2209 computes the comparison masks and ands it, in one case with
2210 all ones vector, in the other case with a vector register.
2211 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2212 often more expensive. */
2213 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2214 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2215 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2216 {
2217 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2218 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2219 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2220 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2221 {
2222 gimple tstmt;
2223 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2224 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
2225 tstmt = VEC_pop (gimple, *stmts);
2226 gcc_assert (tstmt == def_stmt);
2227 VEC_quick_push (gimple, *stmts, stmt);
2228 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2229 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2230 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2231 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2232 return irhs2;
2233 }
2234 else
2235 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2236 goto and_ior_xor;
2237 }
2238 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2239 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2240 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2241 {
2242 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2243 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2244 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2245 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2246 {
2247 gimple tstmt;
2248 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2249 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
2250 tstmt = VEC_pop (gimple, *stmts);
2251 gcc_assert (tstmt == def_stmt);
2252 VEC_quick_push (gimple, *stmts, stmt);
2253 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2254 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2255 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2256 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2257 return irhs1;
2258 }
2259 else
2260 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2261 goto and_ior_xor;
2262 }
2263 /* FALLTHRU */
2264 case BIT_IOR_EXPR:
2265 case BIT_XOR_EXPR:
2266 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2267 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2268 and_ior_xor:
2269 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2270 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2271 {
2272 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2273 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2274 int out_prec = TYPE_PRECISION (out_type);
2275 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2276 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2277 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2278 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2279 else
2280 {
2281 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2282 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2283 }
2284 }
2285 itype = TREE_TYPE (irhs1);
2286 pattern_stmt
2287 = gimple_build_assign_with_ops (rhs_code,
2288 vect_recog_temp_ssa_var (itype, NULL),
2289 irhs1, irhs2);
2290 break;
2291
2292 default:
2293 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2294 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2
JJ
2295 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2296 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2297 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17
JJ
2298 {
2299 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2300 itype
ab0ef706 2301 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2302 }
2303 else
2304 itype = TREE_TYPE (rhs1);
2305 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2306 if (trueval == NULL_TREE)
2307 trueval = build_int_cst (itype, 1);
2308 else
2309 gcc_checking_assert (useless_type_conversion_p (itype,
2310 TREE_TYPE (trueval)));
2311 pattern_stmt
2312 = gimple_build_assign_with_ops3 (COND_EXPR,
2313 vect_recog_temp_ssa_var (itype, NULL),
2314 cond_expr, trueval,
2315 build_int_cst (itype, 0));
2316 break;
2317 }
2318
2319 VEC_safe_push (gimple, heap, *stmts, stmt);
2320 gimple_set_location (pattern_stmt, loc);
2321 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2322 return gimple_assign_lhs (pattern_stmt);
2323}
2324
2325
2326/* Function vect_recog_bool_pattern
2327
2328 Try to find pattern like following:
2329
2330 bool a_b, b_b, c_b, d_b, e_b;
2331 TYPE f_T;
2332 loop:
2333 S1 a_b = x1 CMP1 y1;
2334 S2 b_b = x2 CMP2 y2;
2335 S3 c_b = a_b & b_b;
2336 S4 d_b = x3 CMP3 y3;
2337 S5 e_b = c_b | d_b;
2338 S6 f_T = (TYPE) e_b;
2339
2340 where type 'TYPE' is an integral type.
2341
2342 Input:
2343
2344 * LAST_STMT: A stmt at the end from which the pattern
2345 search begins, i.e. cast of a bool to
2346 an integer type.
2347
2348 Output:
2349
2350 * TYPE_IN: The type of the input arguments to the pattern.
2351
2352 * TYPE_OUT: The type of the output of this pattern.
2353
2354 * Return value: A new stmt that will be used to replace the pattern.
2355
2356 Assuming size of TYPE is the same as size of all comparisons
2357 (otherwise some casts would be added where needed), the above
2358 sequence we create related pattern stmts:
2359 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2360 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2361 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2362 S5' e_T = c_T | d_T;
2363 S6' f_T = e_T;
2364
2365 Instead of the above S3' we could emit:
2366 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2367 S3' c_T = a_T | b_T;
2368 but the above is more efficient. */
2369
2370static gimple
2371vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
2372 tree *type_out)
2373{
2374 gimple last_stmt = VEC_pop (gimple, *stmts);
2375 enum tree_code rhs_code;
2376 tree var, lhs, rhs, vectype;
2377 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2378 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 2379 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
71c92d17
JJ
2380 gimple pattern_stmt;
2381
2382 if (!is_gimple_assign (last_stmt))
2383 return NULL;
2384
2385 var = gimple_assign_rhs1 (last_stmt);
2386 lhs = gimple_assign_lhs (last_stmt);
2387
2388 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2389 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2390 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2391 return NULL;
2392
2393 rhs_code = gimple_assign_rhs_code (last_stmt);
2394 if (CONVERT_EXPR_CODE_P (rhs_code))
2395 {
78048b1c
JJ
2396 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2397 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
2398 return NULL;
2399 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2400 if (vectype == NULL_TREE)
2401 return NULL;
2402
f5709183 2403 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
71c92d17
JJ
2404 return NULL;
2405
2406 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2407 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2408 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2409 pattern_stmt
2410 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2411 else
2412 pattern_stmt
2413 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2414 *type_out = vectype;
2415 *type_in = vectype;
2416 VEC_safe_push (gimple, heap, *stmts, last_stmt);
f5709183
IR
2417 if (vect_print_dump_info (REPORT_DETAILS))
2418 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
2419
71c92d17
JJ
2420 return pattern_stmt;
2421 }
ab0ef706
JJ
2422 else if (rhs_code == SSA_NAME
2423 && STMT_VINFO_DATA_REF (stmt_vinfo))
2424 {
2425 stmt_vec_info pattern_stmt_info;
2426 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2427 gcc_assert (vectype != NULL_TREE);
78336739
JJ
2428 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2429 return NULL;
f5709183 2430 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
ab0ef706
JJ
2431 return NULL;
2432
2433 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2434 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2435 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2436 {
2437 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2438 gimple cast_stmt
2439 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
083481d8 2440 new_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
2441 rhs = rhs2;
2442 }
2443 pattern_stmt
2444 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
f5709183
IR
2445 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2446 bb_vinfo);
ab0ef706
JJ
2447 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2448 STMT_VINFO_DATA_REF (pattern_stmt_info)
2449 = STMT_VINFO_DATA_REF (stmt_vinfo);
2450 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2451 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2452 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2453 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2454 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2455 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2456 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2457 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
78048b1c 2458 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
ab0ef706
JJ
2459 *type_out = vectype;
2460 *type_in = vectype;
2461 VEC_safe_push (gimple, heap, *stmts, last_stmt);
f5709183
IR
2462 if (vect_print_dump_info (REPORT_DETAILS))
2463 fprintf (vect_dump, "vect_recog_bool_pattern: detected: ");
ab0ef706
JJ
2464 return pattern_stmt;
2465 }
71c92d17
JJ
2466 else
2467 return NULL;
2468}
2469
2470
1107f3ae
IR
2471/* Mark statements that are involved in a pattern. */
2472
2473static inline void
2474vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2475 tree pattern_vectype)
2476{
2477 stmt_vec_info pattern_stmt_info, def_stmt_info;
2478 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2479 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
f5709183 2480 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
1107f3ae
IR
2481 gimple def_stmt;
2482
1107f3ae 2483 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
ab0ef706
JJ
2484 if (pattern_stmt_info == NULL)
2485 {
f5709183
IR
2486 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2487 bb_vinfo);
ab0ef706
JJ
2488 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2489 }
2490 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1107f3ae
IR
2491
2492 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2493 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
ab0ef706 2494 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1107f3ae
IR
2495 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2496 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2497 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
363477c0
JJ
2498 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2499 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2500 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
1107f3ae 2501 {
363477c0
JJ
2502 gimple_stmt_iterator si;
2503 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2504 !gsi_end_p (si); gsi_next (&si))
69d2aade 2505 {
363477c0
JJ
2506 def_stmt = gsi_stmt (si);
2507 def_stmt_info = vinfo_for_stmt (def_stmt);
2508 if (def_stmt_info == NULL)
2509 {
f5709183
IR
2510 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2511 bb_vinfo);
363477c0
JJ
2512 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2513 }
2514 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2515 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2516 STMT_VINFO_DEF_TYPE (def_stmt_info)
2517 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2518 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2519 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
69d2aade 2520 }
1107f3ae
IR
2521 }
2522}
2523
b8698a0f 2524/* Function vect_pattern_recog_1
20f06221
DN
2525
2526 Input:
2527 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2528 computation pattern.
2529 STMT: A stmt from which the pattern search should start.
2530
2531 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
2532 expression that computes the same functionality and can be used to
2533 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
2534
2535 Output:
b8698a0f
L
2536 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2537 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2538 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
2539 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2540 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2541 to the available target pattern.
2542
b8698a0f 2543 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
2544 for vect_recog_pattern. */
2545
2546static void
92aea285
JJ
2547vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2548 gimple_stmt_iterator si,
2549 VEC (gimple, heap) **stmts_to_replace)
20f06221 2550{
726a989a 2551 gimple stmt = gsi_stmt (si), pattern_stmt;
383d9c83 2552 stmt_vec_info stmt_info;
383d9c83 2553 loop_vec_info loop_vinfo;
20f06221
DN
2554 tree pattern_vectype;
2555 tree type_in, type_out;
20f06221 2556 enum tree_code code;
b5aeb3bb
IR
2557 int i;
2558 gimple next;
20f06221 2559
d1fc143d
JJ
2560 VEC_truncate (gimple, *stmts_to_replace, 0);
2561 VEC_quick_push (gimple, *stmts_to_replace, stmt);
2562 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
726a989a 2563 if (!pattern_stmt)
b8698a0f
L
2564 return;
2565
d1fc143d 2566 stmt = VEC_last (gimple, *stmts_to_replace);
383d9c83
IR
2567 stmt_info = vinfo_for_stmt (stmt);
2568 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2569
b8698a0f
L
2570 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2571 {
2572 /* No need to check target support (already checked by the pattern
2573 recognition function). */
b690cc0f 2574 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
2575 }
2576 else
2577 {
32e8bb8e 2578 enum machine_mode vec_mode;
20f06221
DN
2579 enum insn_code icode;
2580 optab optab;
2581
2582 /* Check target support */
b690cc0f
RG
2583 type_in = get_vectype_for_scalar_type (type_in);
2584 if (!type_in)
2585 return;
2586 if (type_out)
2587 type_out = get_vectype_for_scalar_type (type_out);
2588 else
2589 type_out = type_in;
15bbc165
AO
2590 if (!type_out)
2591 return;
b690cc0f 2592 pattern_vectype = type_out;
03d3e953 2593
726a989a
RB
2594 if (is_gimple_assign (pattern_stmt))
2595 code = gimple_assign_rhs_code (pattern_stmt);
2596 else
2597 {
2598 gcc_assert (is_gimple_call (pattern_stmt));
2599 code = CALL_EXPR;
2600 }
2601
b690cc0f
RG
2602 optab = optab_for_tree_code (code, type_in, optab_default);
2603 vec_mode = TYPE_MODE (type_in);
20f06221 2604 if (!optab
947131ba 2605 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 2606 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
20f06221
DN
2607 return;
2608 }
2609
2610 /* Found a vectorizable pattern. */
2611 if (vect_print_dump_info (REPORT_DETAILS))
2612 {
b8698a0f 2613 fprintf (vect_dump, "pattern recognized: ");
726a989a 2614 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
20f06221 2615 }
b8698a0f 2616
726a989a 2617 /* Mark the stmts that are involved in the pattern. */
1107f3ae 2618 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 2619
b5aeb3bb
IR
2620 /* Patterns cannot be vectorized using SLP, because they change the order of
2621 computation. */
f5709183
IR
2622 if (loop_vinfo)
2623 FOR_EACH_VEC_ELT (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
2624 if (next == stmt)
2625 VEC_ordered_remove (gimple, LOOP_VINFO_REDUCTIONS (loop_vinfo), i);
51312233 2626
1107f3ae
IR
2627 /* It is possible that additional pattern stmts are created and inserted in
2628 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2629 relevant statements. */
d1fc143d
JJ
2630 for (i = 0; VEC_iterate (gimple, *stmts_to_replace, i, stmt)
2631 && (unsigned) i < (VEC_length (gimple, *stmts_to_replace) - 1);
51312233
IR
2632 i++)
2633 {
2634 stmt_info = vinfo_for_stmt (stmt);
2635 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
2636 if (vect_print_dump_info (REPORT_DETAILS))
2637 {
2638 fprintf (vect_dump, "additional pattern stmt: ");
2639 print_gimple_stmt (vect_dump, pattern_stmt, 0, TDF_SLIM);
2640 }
2641
1107f3ae 2642 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
51312233 2643 }
20f06221
DN
2644}
2645
2646
2647/* Function vect_pattern_recog
2648
2649 Input:
2650 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2651 computation idioms.
2652
9d5e7640
IR
2653 Output - for each computation idiom that is detected we create a new stmt
2654 that provides the same functionality and that can be vectorized. We
20f06221
DN
2655 also record some information in the struct_stmt_info of the relevant
2656 stmts, as explained below:
2657
2658 At the entry to this function we have the following stmts, with the
2659 following initial value in the STMT_VINFO fields:
2660
2661 stmt in_pattern_p related_stmt vec_stmt
2662 S1: a_i = .... - - -
2663 S2: a_2 = ..use(a_i).. - - -
2664 S3: a_1 = ..use(a_2).. - - -
2665 S4: a_0 = ..use(a_1).. - - -
2666 S5: ... = ..use(a_0).. - - -
2667
2668 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
2669 represented by a single stmt. We then:
2670 - create a new stmt S6 equivalent to the pattern (the stmt is not
2671 inserted into the code)
20f06221
DN
2672 - fill in the STMT_VINFO fields as follows:
2673
2674 in_pattern_p related_stmt vec_stmt
b8698a0f 2675 S1: a_i = .... - - -
20f06221
DN
2676 S2: a_2 = ..use(a_i).. - - -
2677 S3: a_1 = ..use(a_2).. - - -
20f06221 2678 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 2679 '---> S6: a_new = .... - S4 -
20f06221
DN
2680 S5: ... = ..use(a_0).. - - -
2681
2682 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 2683 to each other through the RELATED_STMT field).
20f06221
DN
2684
2685 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2686 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2687 remain irrelevant unless used by stmts other than S4.
2688
2689 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 2690 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
2691 a pointer to the new vector stmt VS6 from S6 (as usual).
2692 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
2693
2694 in_pattern_p related_stmt vec_stmt
2695 S1: a_i = .... - - -
2696 S2: a_2 = ..use(a_i).. - - -
2697 S3: a_1 = ..use(a_2).. - - -
2698 > VS6: va_new = .... - - -
20f06221 2699 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 2700 '---> S6: a_new = .... - S4 VS6
20f06221
DN
2701 > VS5: ... = ..vuse(va_new).. - - -
2702 S5: ... = ..use(a_0).. - - -
2703
9d5e7640 2704 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
2705 elsewhere), and we'll end up with:
2706
b8698a0f 2707 VS6: va_new = ....
83197f37
IR
2708 VS5: ... = ..vuse(va_new)..
2709
2710 In case of more than one pattern statements, e.g., widen-mult with
2711 intermediate type:
2712
2713 S1 a_t = ;
2714 S2 a_T = (TYPE) a_t;
2715 '--> S3: a_it = (interm_type) a_t;
2716 S4 prod_T = a_T * CONST;
2717 '--> S5: prod_T' = a_it w* CONST;
2718
2719 there may be other users of a_T outside the pattern. In that case S2 will
2720 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2721 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2722 be recorded in S3. */
20f06221
DN
2723
2724void
f5709183 2725vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
20f06221 2726{
f5709183
IR
2727 struct loop *loop;
2728 basic_block *bbs, bb;
2729 unsigned int nbbs;
726a989a 2730 gimple_stmt_iterator si;
20f06221 2731 unsigned int i, j;
92aea285 2732 vect_recog_func_ptr vect_recog_func;
d1fc143d 2733 VEC (gimple, heap) *stmts_to_replace = VEC_alloc (gimple, heap, 1);
f5709183 2734 gimple stmt;
20f06221
DN
2735
2736 if (vect_print_dump_info (REPORT_DETAILS))
2737 fprintf (vect_dump, "=== vect_pattern_recog ===");
2738
f5709183
IR
2739 if (loop_vinfo)
2740 {
2741 loop = LOOP_VINFO_LOOP (loop_vinfo);
2742 bbs = LOOP_VINFO_BBS (loop_vinfo);
2743 nbbs = loop->num_nodes;
2744 }
2745 else
2746 {
2747 bb = BB_VINFO_BB (bb_vinfo);
2748 nbbs = 1;
2749 bbs = XNEW (basic_block);
2750 bbs[0] = bb;
2751 }
2752
20f06221
DN
2753 /* Scan through the loop stmts, applying the pattern recognition
2754 functions starting at each stmt visited: */
2755 for (i = 0; i < nbbs; i++)
2756 {
2757 basic_block bb = bbs[i];
726a989a 2758 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
20f06221 2759 {
f5709183
IR
2760 if (bb_vinfo && (stmt = gsi_stmt (si))
2761 && vinfo_for_stmt (stmt)
2762 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2763 continue;
2764
20f06221
DN
2765 /* Scan over all generic vect_recog_xxx_pattern functions. */
2766 for (j = 0; j < NUM_PATTERNS; j++)
2767 {
92aea285
JJ
2768 vect_recog_func = vect_vect_recog_func_ptrs[j];
2769 vect_pattern_recog_1 (vect_recog_func, si,
d1fc143d 2770 &stmts_to_replace);
20f06221
DN
2771 }
2772 }
2773 }
d1fc143d
JJ
2774
2775 VEC_free (gimple, heap, stmts_to_replace);
f5709183
IR
2776 if (bb_vinfo)
2777 free (bbs);
20f06221 2778}