]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
Daily bump.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
23a5b65a 2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
20f06221 25#include "tree.h"
d8a2d370 26#include "stor-layout.h"
20f06221
DN
27#include "target.h"
28#include "basic-block.h"
cf835838 29#include "gimple-pretty-print.h"
2fb9a547
AM
30#include "tree-ssa-alias.h"
31#include "internal-fn.h"
32#include "tree-eh.h"
33#include "gimple-expr.h"
34#include "is-a.h"
18f429e2 35#include "gimple.h"
45b0be94 36#include "gimplify.h"
5be5c238 37#include "gimple-iterator.h"
442b4905
AM
38#include "gimple-ssa.h"
39#include "tree-phinodes.h"
40#include "ssa-iterators.h"
d8a2d370 41#include "stringpool.h"
442b4905 42#include "tree-ssanames.h"
20f06221
DN
43#include "cfgloop.h"
44#include "expr.h"
45#include "optabs.h"
46#include "params.h"
47#include "tree-data-ref.h"
48#include "tree-vectorizer.h"
7ee2468b 49#include "recog.h" /* FIXME: for insn_data */
718f9c0f 50#include "diagnostic-core.h"
7ee2468b 51#include "dumpfile.h"
20f06221 52
20f06221 53/* Pattern recognition functions */
9771b263 54static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
51312233 55 tree *);
9771b263 56static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
51312233 57 tree *);
9771b263 58static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
51312233 59 tree *);
9771b263
DN
60static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
61static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
1107f3ae 62 tree *);
9771b263 63static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
36ba4aae 64 tree *, tree *);
7e9a3abb 65static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
9771b263 66static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
732a0ad3 67 tree *, tree *);
9771b263 68static gimple vect_recog_divmod_pattern (vec<gimple> *,
079c527f 69 tree *, tree *);
9771b263 70static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
69d2aade 71 tree *, tree *);
9771b263 72static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
20f06221
DN
73static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
74 vect_recog_widen_mult_pattern,
75 vect_recog_widen_sum_pattern,
0b2229b0 76 vect_recog_dot_prod_pattern,
1107f3ae 77 vect_recog_pow_pattern,
36ba4aae 78 vect_recog_widen_shift_pattern,
33018845 79 vect_recog_over_widening_pattern,
7e9a3abb 80 vect_recog_rotate_pattern,
732a0ad3 81 vect_recog_vector_vector_shift_pattern,
079c527f 82 vect_recog_divmod_pattern,
71c92d17
JJ
83 vect_recog_mixed_size_cond_pattern,
84 vect_recog_bool_pattern};
20f06221 85
083481d8
JJ
86static inline void
87append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
88{
a1a6c5b2
JJ
89 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
90 stmt);
083481d8
JJ
91}
92
93static inline void
94new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
95{
96 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
97 append_pattern_def_seq (stmt_info, stmt);
98}
99
f71cf56a
UW
100/* Check whether STMT2 is in the same loop or basic block as STMT1.
101 Which of the two applies depends on whether we're currently doing
102 loop-based or basic-block-based vectorization, as determined by
103 the vinfo_for_stmt for STMT1 (which must be defined).
104
105 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
106 to be defined as well. */
107
108static bool
109vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
110{
111 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
112 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
113 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
114
115 if (!gimple_bb (stmt2))
116 return false;
117
118 if (loop_vinfo)
119 {
120 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
121 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
122 return false;
123 }
124 else
125 {
126 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
127 || gimple_code (stmt2) == GIMPLE_PHI)
128 return false;
129 }
130
131 gcc_assert (vinfo_for_stmt (stmt2));
132 return true;
133}
134
9a7a4398
UW
135/* If the LHS of DEF_STMT has a single use, and that statement is
136 in the same loop or basic block, return it. */
137
138static gimple
139vect_single_imm_use (gimple def_stmt)
140{
141 tree lhs = gimple_assign_lhs (def_stmt);
142 use_operand_p use_p;
143 gimple use_stmt;
144
145 if (!single_imm_use (lhs, &use_p, &use_stmt))
146 return NULL;
147
148 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
149 return NULL;
150
151 return use_stmt;
152}
153
bc4fb355
IR
154/* Check whether NAME, an ssa-name used in USE_STMT,
155 is a result of a type promotion or demotion, such that:
20f06221 156 DEF_STMT: NAME = NOP (name0)
bc4fb355 157 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
383d9c83
IR
158 If CHECK_SIGN is TRUE, check that either both types are signed or both are
159 unsigned. */
20f06221
DN
160
161static bool
bc4fb355
IR
162type_conversion_p (tree name, gimple use_stmt, bool check_sign,
163 tree *orig_type, gimple *def_stmt, bool *promotion)
20f06221
DN
164{
165 tree dummy;
726a989a 166 gimple dummy_gimple;
20f06221
DN
167 loop_vec_info loop_vinfo;
168 stmt_vec_info stmt_vinfo;
20f06221
DN
169 tree type = TREE_TYPE (name);
170 tree oprnd0;
171 enum vect_def_type dt;
172 tree def;
f5709183 173 bb_vec_info bb_vinfo;
20f06221
DN
174
175 stmt_vinfo = vinfo_for_stmt (use_stmt);
176 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183
IR
177 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
178 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
179 &def, &dt))
20f06221
DN
180 return false;
181
8644a673
IR
182 if (dt != vect_internal_def
183 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
184 return false;
185
bc4fb355 186 if (!*def_stmt)
20f06221
DN
187 return false;
188
726a989a 189 if (!is_gimple_assign (*def_stmt))
20f06221
DN
190 return false;
191
bc4fb355 192 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
193 return false;
194
726a989a 195 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 196
bc4fb355
IR
197 *orig_type = TREE_TYPE (oprnd0);
198 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
199 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
200 return false;
201
202 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
203 *promotion = true;
204 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
205 *promotion = false;
206 else
20f06221
DN
207 return false;
208
24ee1384 209 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
f5709183 210 bb_vinfo, &dummy_gimple, &dummy, &dt))
20f06221
DN
211 return false;
212
20f06221
DN
213 return true;
214}
215
726a989a
RB
216/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
217 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
218
219static tree
220vect_recog_temp_ssa_var (tree type, gimple stmt)
221{
83d5977e 222 return make_temp_ssa_name (type, stmt, "patt");
726a989a 223}
20f06221
DN
224
225/* Function vect_recog_dot_prod_pattern
226
227 Try to find the following pattern:
228
229 type x_t, y_t;
230 TYPE1 prod;
231 TYPE2 sum = init;
232 loop:
233 sum_0 = phi <init, sum_1>
234 S1 x_t = ...
235 S2 y_t = ...
236 S3 x_T = (TYPE1) x_t;
237 S4 y_T = (TYPE1) y_t;
238 S5 prod = x_T * y_T;
239 [S6 prod = (TYPE2) prod; #optional]
240 S7 sum_1 = prod + sum_0;
241
b8698a0f
L
242 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
243 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 244 computation.
b8698a0f 245
20f06221
DN
246 Input:
247
51312233
IR
248 * STMTS: Contains a stmt from which the pattern search begins. In the
249 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
250 will be detected.
20f06221
DN
251
252 Output:
253
254 * TYPE_IN: The type of the input arguments to the pattern.
255
256 * TYPE_OUT: The type of the output of this pattern.
257
258 * Return value: A new stmt that will be used to replace the sequence of
259 stmts that constitute the pattern. In this case it will be:
260 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
261
262 Note: The dot-prod idiom is a widening reduction pattern that is
263 vectorized without preserving all the intermediate results. It
264 produces only N/2 (widened) results (by summing up pairs of
265 intermediate results) rather than all N results. Therefore, we
266 cannot allow this pattern when we want to get all the results and in
267 the correct order (as is the case when this computation is in an
268 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 269
726a989a 270static gimple
9771b263 271vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
51312233 272 tree *type_out)
20f06221 273{
9771b263 274 gimple stmt, last_stmt = (*stmts)[0];
20f06221
DN
275 tree oprnd0, oprnd1;
276 tree oprnd00, oprnd01;
51312233 277 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 278 tree type, half_type;
726a989a 279 gimple pattern_stmt;
20f06221 280 tree prod_type;
d29de1bf 281 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 282 struct loop *loop;
f471fe72 283 tree var;
bc4fb355 284 bool promotion;
20f06221 285
f5709183
IR
286 if (!loop_info)
287 return NULL;
288
289 loop = LOOP_VINFO_LOOP (loop_info);
290
51312233 291 if (!is_gimple_assign (last_stmt))
20f06221
DN
292 return NULL;
293
51312233 294 type = gimple_expr_type (last_stmt);
20f06221 295
b8698a0f 296 /* Look for the following pattern
20f06221
DN
297 DX = (TYPE1) X;
298 DY = (TYPE1) Y;
b8698a0f 299 DPROD = DX * DY;
20f06221
DN
300 DDPROD = (TYPE2) DPROD;
301 sum_1 = DDPROD + sum_0;
b8698a0f 302 In which
20f06221
DN
303 - DX is double the size of X
304 - DY is double the size of Y
305 - DX, DY, DPROD all have the same type
306 - sum is the same size of DPROD or bigger
307 - sum has been recognized as a reduction variable.
308
309 This is equivalent to:
310 DPROD = X w* Y; #widen mult
311 sum_1 = DPROD w+ sum_0; #widen summation
312 or
313 DPROD = X w* Y; #widen mult
314 sum_1 = DPROD + sum_0; #summation
315 */
316
317 /* Starting from LAST_STMT, follow the defs of its uses in search
318 of the above pattern. */
319
51312233 320 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
321 return NULL;
322
323 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
324 {
325 /* Has been detected as widening-summation? */
326
327 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
328 type = gimple_expr_type (stmt);
329 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 330 return NULL;
726a989a
RB
331 oprnd0 = gimple_assign_rhs1 (stmt);
332 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
333 half_type = TREE_TYPE (oprnd0);
334 }
335 else
336 {
726a989a 337 gimple def_stmt;
20f06221
DN
338
339 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
340 return NULL;
51312233
IR
341 oprnd0 = gimple_assign_rhs1 (last_stmt);
342 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
343 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
344 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 345 return NULL;
51312233 346 stmt = last_stmt;
20f06221 347
bc4fb355
IR
348 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
349 &promotion)
350 && promotion)
20f06221
DN
351 {
352 stmt = def_stmt;
726a989a 353 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
354 }
355 else
356 half_type = type;
357 }
358
51312233 359 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
360 we know that oprnd1 is the reduction variable (defined by a loop-header
361 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
362 Left to check that oprnd0 is defined by a (widen_)mult_expr */
ba02d3bc
RG
363 if (TREE_CODE (oprnd0) != SSA_NAME)
364 return NULL;
20f06221
DN
365
366 prod_type = half_type;
367 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
368
369 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 370 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
371 return NULL;
372
b8698a0f 373 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 374 inside the loop (in case we are analyzing an outer-loop). */
726a989a 375 if (!is_gimple_assign (stmt))
b8698a0f 376 return NULL;
20f06221
DN
377 stmt_vinfo = vinfo_for_stmt (stmt);
378 gcc_assert (stmt_vinfo);
8644a673 379 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 380 return NULL;
726a989a 381 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
382 return NULL;
383 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
384 {
385 /* Has been detected as a widening multiplication? */
386
387 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 388 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
389 return NULL;
390 stmt_vinfo = vinfo_for_stmt (stmt);
391 gcc_assert (stmt_vinfo);
8644a673 392 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
393 oprnd00 = gimple_assign_rhs1 (stmt);
394 oprnd01 = gimple_assign_rhs2 (stmt);
20f06221
DN
395 }
396 else
397 {
398 tree half_type0, half_type1;
726a989a 399 gimple def_stmt;
20f06221
DN
400 tree oprnd0, oprnd1;
401
726a989a
RB
402 oprnd0 = gimple_assign_rhs1 (stmt);
403 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
404 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
405 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 406 return NULL;
bc4fb355
IR
407 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
408 &promotion)
409 || !promotion)
20f06221 410 return NULL;
726a989a 411 oprnd00 = gimple_assign_rhs1 (def_stmt);
181f5f3e 412 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
bc4fb355
IR
413 &promotion)
414 || !promotion)
20f06221 415 return NULL;
726a989a 416 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 417 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
418 return NULL;
419 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
420 return NULL;
421 }
422
423 half_type = TREE_TYPE (oprnd00);
424 *type_in = half_type;
425 *type_out = type;
b8698a0f 426
20f06221 427 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 428 var = vect_recog_temp_ssa_var (type, NULL);
73804b12
RG
429 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
430 oprnd00, oprnd01, oprnd1);
b8698a0f 431
73fbfcad 432 if (dump_enabled_p ())
20f06221 433 {
ccb3ad87 434 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 435 "vect_recog_dot_prod_pattern: detected: ");
ccb3ad87 436 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 437 dump_printf (MSG_NOTE, "\n");
20f06221 438 }
d29de1bf
DN
439
440 /* We don't allow changing the order of the computation in the inner-loop
441 when doing outer-loop vectorization. */
51312233 442 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
d29de1bf 443
726a989a 444 return pattern_stmt;
20f06221 445}
b8698a0f 446
51312233 447
36ba4aae
IR
448/* Handle widening operation by a constant. At the moment we support MULT_EXPR
449 and LSHIFT_EXPR.
450
451 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
452 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
51312233
IR
453
454 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
36ba4aae
IR
455 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
456 that satisfies the above restrictions, we can perform a widening opeartion
457 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
458 with a_it = (interm_type) a_t; */
51312233
IR
459
460static bool
36ba4aae
IR
461vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
462 tree const_oprnd, tree *oprnd,
9771b263 463 vec<gimple> *stmts, tree type,
36ba4aae 464 tree *half_type, gimple def_stmt)
51312233 465{
83d5977e 466 tree new_type, new_oprnd;
51312233
IR
467 gimple new_stmt;
468
36ba4aae
IR
469 if (code != MULT_EXPR && code != LSHIFT_EXPR)
470 return false;
471
472 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
473 || (code == LSHIFT_EXPR
474 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
475 != 1))
476 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
51312233
IR
477 {
478 /* CONST_OPRND is a constant of HALF_TYPE. */
479 *oprnd = gimple_assign_rhs1 (def_stmt);
480 return true;
481 }
482
f71cf56a
UW
483 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
484 return false;
485
486 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
51312233
IR
487 return false;
488
36ba4aae 489 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
51312233
IR
490 a type 2 times bigger than HALF_TYPE. */
491 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
492 TYPE_UNSIGNED (type));
36ba4aae
IR
493 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
494 || (code == LSHIFT_EXPR
495 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
51312233
IR
496 return false;
497
36ba4aae 498 /* Use NEW_TYPE for widening operation. */
51312233
IR
499 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
500 {
501 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
502 /* Check if the already created pattern stmt is what we need. */
503 if (!is_gimple_assign (new_stmt)
504 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
505 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
506 return false;
507
9771b263 508 stmts->safe_push (def_stmt);
51312233
IR
509 *oprnd = gimple_assign_lhs (new_stmt);
510 }
511 else
512 {
513 /* Create a_T = (NEW_TYPE) a_t; */
514 *oprnd = gimple_assign_rhs1 (def_stmt);
83d5977e 515 new_oprnd = make_ssa_name (new_type, NULL);
51312233
IR
516 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
517 NULL_TREE);
51312233 518 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
9771b263 519 stmts->safe_push (def_stmt);
51312233
IR
520 *oprnd = new_oprnd;
521 }
522
523 *half_type = new_type;
524 return true;
525}
526
527
20f06221
DN
528/* Function vect_recog_widen_mult_pattern
529
530 Try to find the following pattern:
531
d367387c
CH
532 type1 a_t;
533 type2 b_t;
20f06221
DN
534 TYPE a_T, b_T, prod_T;
535
536 S1 a_t = ;
537 S2 b_t = ;
538 S3 a_T = (TYPE) a_t;
539 S4 b_T = (TYPE) b_t;
540 S5 prod_T = a_T * b_T;
541
d367387c 542 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
20f06221 543
d47657bd 544 Also detect unsigned cases:
383d9c83 545
d367387c
CH
546 unsigned type1 a_t;
547 unsigned type2 b_t;
383d9c83
IR
548 unsigned TYPE u_prod_T;
549 TYPE a_T, b_T, prod_T;
550
551 S1 a_t = ;
552 S2 b_t = ;
553 S3 a_T = (TYPE) a_t;
554 S4 b_T = (TYPE) b_t;
555 S5 prod_T = a_T * b_T;
556 S6 u_prod_T = (unsigned TYPE) prod_T;
557
558 and multiplication by constants:
559
560 type a_t;
561 TYPE a_T, prod_T;
562
563 S1 a_t = ;
564 S3 a_T = (TYPE) a_t;
565 S5 prod_T = a_T * CONST;
566
51312233
IR
567 A special case of multiplication by constants is when 'TYPE' is 4 times
568 bigger than 'type', but CONST fits an intermediate type 2 times smaller
569 than 'TYPE'. In that case we create an additional pattern stmt for S3
570 to create a variable of the intermediate type, and perform widen-mult
571 on the intermediate type as well:
572
573 type a_t;
574 interm_type a_it;
575 TYPE a_T, prod_T, prod_T';
576
577 S1 a_t = ;
578 S3 a_T = (TYPE) a_t;
579 '--> a_it = (interm_type) a_t;
580 S5 prod_T = a_T * CONST;
581 '--> prod_T' = a_it w* CONST;
20f06221 582
51312233
IR
583 Input/Output:
584
585 * STMTS: Contains a stmt from which the pattern search begins. In the
586 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
587 is detected. In case of unsigned widen-mult, the original stmt (S5) is
588 replaced with S6 in STMTS. In case of multiplication by a constant
589 of an intermediate type (the last case above), STMTS also contains S3
590 (inserted before S5).
20f06221
DN
591
592 Output:
593
594 * TYPE_IN: The type of the input arguments to the pattern.
595
383d9c83 596 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
597
598 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 599 stmts that constitute the pattern. In this case it will be:
20f06221 600 WIDEN_MULT <a_t, b_t>
d367387c
CH
601 If the result of WIDEN_MULT needs to be converted to a larger type, the
602 returned stmt will be this type conversion stmt.
20f06221
DN
603*/
604
726a989a 605static gimple
9771b263 606vect_recog_widen_mult_pattern (vec<gimple> *stmts,
51312233 607 tree *type_in, tree *type_out)
20f06221 608{
9771b263 609 gimple last_stmt = stmts->pop ();
726a989a 610 gimple def_stmt0, def_stmt1;
89d67cca
DN
611 tree oprnd0, oprnd1;
612 tree type, half_type0, half_type1;
d367387c
CH
613 gimple new_stmt = NULL, pattern_stmt = NULL;
614 tree vectype, vecitype;
726a989a 615 tree var;
89d67cca 616 enum tree_code dummy_code;
5d593372 617 int dummy_int;
9771b263 618 vec<tree> dummy_vec;
36ba4aae 619 bool op1_ok;
bc4fb355 620 bool promotion;
89d67cca 621
51312233 622 if (!is_gimple_assign (last_stmt))
89d67cca
DN
623 return NULL;
624
51312233 625 type = gimple_expr_type (last_stmt);
89d67cca
DN
626
627 /* Starting from LAST_STMT, follow the defs of its uses in search
628 of the above pattern. */
629
51312233 630 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
89d67cca
DN
631 return NULL;
632
51312233
IR
633 oprnd0 = gimple_assign_rhs1 (last_stmt);
634 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
635 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
636 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
637 return NULL;
638
383d9c83 639 /* Check argument 0. */
bc4fb355
IR
640 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
641 &promotion)
642 || !promotion)
643 return NULL;
383d9c83 644 /* Check argument 1. */
bc4fb355
IR
645 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
646 &def_stmt1, &promotion);
89d67cca 647
bc4fb355 648 if (op1_ok && promotion)
383d9c83
IR
649 {
650 oprnd0 = gimple_assign_rhs1 (def_stmt0);
651 oprnd1 = gimple_assign_rhs1 (def_stmt1);
652 }
36ba4aae 653 else
383d9c83 654 {
51312233 655 if (TREE_CODE (oprnd1) == INTEGER_CST
383d9c83 656 && TREE_CODE (half_type0) == INTEGER_TYPE
36ba4aae
IR
657 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
658 &oprnd0, stmts, type,
659 &half_type0, def_stmt0))
bfdeda2c
JJ
660 {
661 half_type1 = half_type0;
662 oprnd1 = fold_convert (half_type1, oprnd1);
663 }
383d9c83
IR
664 else
665 return NULL;
666 }
667
d367387c
CH
668 /* If the two arguments have different sizes, convert the one with
669 the smaller type into the larger type. */
670 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
671 {
672 tree* oprnd = NULL;
673 gimple def_stmt = NULL;
674
675 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
676 {
677 def_stmt = def_stmt0;
678 half_type0 = half_type1;
679 oprnd = &oprnd0;
680 }
681 else
682 {
683 def_stmt = def_stmt1;
684 half_type1 = half_type0;
685 oprnd = &oprnd1;
686 }
687
688 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
689 tree new_oprnd = make_ssa_name (half_type0, NULL);
690 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
691 old_oprnd, NULL_TREE);
692 *oprnd = new_oprnd;
693 }
694
383d9c83
IR
695 /* Handle unsigned case. Look for
696 S6 u_prod_T = (unsigned TYPE) prod_T;
697 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
698 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
699 {
9a7a4398
UW
700 gimple use_stmt;
701 tree use_lhs;
383d9c83
IR
702 tree use_type;
703
704 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
705 return NULL;
706
9a7a4398
UW
707 use_stmt = vect_single_imm_use (last_stmt);
708 if (!use_stmt || !is_gimple_assign (use_stmt)
709 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
383d9c83
IR
710 return NULL;
711
712 use_lhs = gimple_assign_lhs (use_stmt);
713 use_type = TREE_TYPE (use_lhs);
714 if (!INTEGRAL_TYPE_P (use_type)
715 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
716 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
717 return NULL;
718
719 type = use_type;
51312233 720 last_stmt = use_stmt;
383d9c83 721 }
89d67cca 722
9600efe1 723 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
724 return NULL;
725
d367387c
CH
726 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
727 to get an intermediate result of type ITYPE. In this case we need
728 to build a statement to convert this intermediate result to type TYPE. */
729 tree itype = type;
730 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
731 itype = build_nonstandard_integer_type
732 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
733 TYPE_UNSIGNED (type));
734
89d67cca 735 /* Pattern detected. */
73fbfcad 736 if (dump_enabled_p ())
ccb3ad87 737 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 738 "vect_recog_widen_mult_pattern: detected:\n");
89d67cca
DN
739
740 /* Check target support */
741 vectype = get_vectype_for_scalar_type (half_type0);
d367387c 742 vecitype = get_vectype_for_scalar_type (itype);
03d3e953 743 if (!vectype
d367387c 744 || !vecitype
51312233 745 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
d367387c 746 vecitype, vectype,
a86ec597
RH
747 &dummy_code, &dummy_code,
748 &dummy_int, &dummy_vec))
89d67cca
DN
749 return NULL;
750
751 *type_in = vectype;
d367387c 752 *type_out = get_vectype_for_scalar_type (type);
89d67cca
DN
753
754 /* Pattern supported. Create a stmt to be used to replace the pattern: */
d367387c 755 var = vect_recog_temp_ssa_var (itype, NULL);
726a989a
RB
756 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
757 oprnd1);
726a989a 758
d367387c
CH
759 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
760 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
761 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
762 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
763
764 /* If the original two operands have different sizes, we may need to convert
765 the smaller one into the larget type. If this is the case, at this point
766 the new stmt is already built. */
767 if (new_stmt)
768 {
769 append_pattern_def_seq (stmt_vinfo, new_stmt);
770 stmt_vec_info new_stmt_info
771 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
772 set_vinfo_for_stmt (new_stmt, new_stmt_info);
773 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
774 }
775
776 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
777 the result of the widen-mult operation into type TYPE. */
778 if (itype != type)
779 {
780 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
781 stmt_vec_info pattern_stmt_info
782 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
783 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
784 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
785 pattern_stmt
786 = gimple_build_assign_with_ops (NOP_EXPR,
787 vect_recog_temp_ssa_var (type, NULL),
788 gimple_assign_lhs (pattern_stmt),
789 NULL_TREE);
790 }
791
73fbfcad 792 if (dump_enabled_p ())
78c60e3d 793 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
726a989a 794
9771b263 795 stmts->safe_push (last_stmt);
726a989a 796 return pattern_stmt;
20f06221
DN
797}
798
799
0b2229b0
RG
800/* Function vect_recog_pow_pattern
801
802 Try to find the following pattern:
803
804 x = POW (y, N);
805
806 with POW being one of pow, powf, powi, powif and N being
807 either 2 or 0.5.
808
809 Input:
810
811 * LAST_STMT: A stmt from which the pattern search begins.
812
813 Output:
814
815 * TYPE_IN: The type of the input arguments to the pattern.
816
817 * TYPE_OUT: The type of the output of this pattern.
818
819 * Return value: A new stmt that will be used to replace the sequence of
820 stmts that constitute the pattern. In this case it will be:
726a989a 821 x = x * x
0b2229b0 822 or
726a989a 823 x = sqrt (x)
0b2229b0
RG
824*/
825
726a989a 826static gimple
9771b263 827vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
51312233 828 tree *type_out)
0b2229b0 829{
9771b263 830 gimple last_stmt = (*stmts)[0];
726a989a
RB
831 tree fn, base, exp = NULL;
832 gimple stmt;
833 tree var;
0b2229b0 834
51312233 835 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
836 return NULL;
837
51312233 838 fn = gimple_call_fndecl (last_stmt);
52bd463c
RG
839 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
840 return NULL;
841
0b2229b0
RG
842 switch (DECL_FUNCTION_CODE (fn))
843 {
844 case BUILT_IN_POWIF:
845 case BUILT_IN_POWI:
846 case BUILT_IN_POWF:
847 case BUILT_IN_POW:
51312233
IR
848 base = gimple_call_arg (last_stmt, 0);
849 exp = gimple_call_arg (last_stmt, 1);
0b2229b0
RG
850 if (TREE_CODE (exp) != REAL_CST
851 && TREE_CODE (exp) != INTEGER_CST)
726a989a 852 return NULL;
0b2229b0
RG
853 break;
854
726a989a
RB
855 default:
856 return NULL;
0b2229b0
RG
857 }
858
859 /* We now have a pow or powi builtin function call with a constant
860 exponent. */
861
0b2229b0
RG
862 *type_out = NULL_TREE;
863
864 /* Catch squaring. */
9541ffee 865 if ((tree_fits_shwi_p (exp)
9439e9a1 866 && tree_to_shwi (exp) == 2)
0b2229b0
RG
867 || (TREE_CODE (exp) == REAL_CST
868 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
c6b1b49b
RG
869 {
870 *type_in = TREE_TYPE (base);
726a989a
RB
871
872 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
873 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
726a989a 874 return stmt;
c6b1b49b 875 }
0b2229b0
RG
876
877 /* Catch square root. */
878 if (TREE_CODE (exp) == REAL_CST
879 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
880 {
881 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
c6b1b49b
RG
882 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
883 if (*type_in)
884 {
726a989a
RB
885 gimple stmt = gimple_build_call (newfn, 1, base);
886 if (vectorizable_function (stmt, *type_in, *type_in)
887 != NULL_TREE)
888 {
889 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
b8698a0f 890 gimple_call_set_lhs (stmt, var);
726a989a
RB
891 return stmt;
892 }
c6b1b49b 893 }
0b2229b0
RG
894 }
895
726a989a 896 return NULL;
0b2229b0
RG
897}
898
899
20f06221
DN
900/* Function vect_recog_widen_sum_pattern
901
902 Try to find the following pattern:
903
b8698a0f 904 type x_t;
20f06221
DN
905 TYPE x_T, sum = init;
906 loop:
907 sum_0 = phi <init, sum_1>
908 S1 x_t = *p;
909 S2 x_T = (TYPE) x_t;
910 S3 sum_1 = x_T + sum_0;
911
b8698a0f 912 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 913 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 914 a special case of a reduction computation.
20f06221
DN
915
916 Input:
917
918 * LAST_STMT: A stmt from which the pattern search begins. In the example,
919 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 920
20f06221 921 Output:
b8698a0f 922
20f06221
DN
923 * TYPE_IN: The type of the input arguments to the pattern.
924
925 * TYPE_OUT: The type of the output of this pattern.
926
927 * Return value: A new stmt that will be used to replace the sequence of
928 stmts that constitute the pattern. In this case it will be:
929 WIDEN_SUM <x_t, sum_0>
d29de1bf 930
b8698a0f 931 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 932 vectorized without preserving all the intermediate results. It
b8698a0f
L
933 produces only N/2 (widened) results (by summing up pairs of
934 intermediate results) rather than all N results. Therefore, we
935 cannot allow this pattern when we want to get all the results and in
936 the correct order (as is the case when this computation is in an
d29de1bf 937 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 938
726a989a 939static gimple
9771b263 940vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
51312233 941 tree *type_out)
20f06221 942{
9771b263 943 gimple stmt, last_stmt = (*stmts)[0];
20f06221 944 tree oprnd0, oprnd1;
51312233 945 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 946 tree type, half_type;
726a989a 947 gimple pattern_stmt;
d29de1bf 948 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 949 struct loop *loop;
726a989a 950 tree var;
bc4fb355 951 bool promotion;
20f06221 952
f5709183
IR
953 if (!loop_info)
954 return NULL;
955
956 loop = LOOP_VINFO_LOOP (loop_info);
957
51312233 958 if (!is_gimple_assign (last_stmt))
20f06221
DN
959 return NULL;
960
51312233 961 type = gimple_expr_type (last_stmt);
20f06221
DN
962
963 /* Look for the following pattern
964 DX = (TYPE) X;
965 sum_1 = DX + sum_0;
966 In which DX is at least double the size of X, and sum_1 has been
967 recognized as a reduction variable.
968 */
969
970 /* Starting from LAST_STMT, follow the defs of its uses in search
971 of the above pattern. */
972
51312233 973 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
974 return NULL;
975
976 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
977 return NULL;
978
51312233
IR
979 oprnd0 = gimple_assign_rhs1 (last_stmt);
980 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
981 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
982 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
983 return NULL;
984
51312233 985 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
986 we know that oprnd1 is the reduction variable (defined by a loop-header
987 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
988 Left to check that oprnd0 is defined by a cast from type 'type' to type
989 'TYPE'. */
990
bc4fb355
IR
991 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
992 &promotion)
993 || !promotion)
994 return NULL;
20f06221 995
726a989a 996 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
997 *type_in = half_type;
998 *type_out = type;
999
1000 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a
RB
1001 var = vect_recog_temp_ssa_var (type, NULL);
1002 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
1003 oprnd0, oprnd1);
726a989a 1004
73fbfcad 1005 if (dump_enabled_p ())
20f06221 1006 {
ccb3ad87 1007 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1008 "vect_recog_widen_sum_pattern: detected: ");
ccb3ad87 1009 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1010 dump_printf (MSG_NOTE, "\n");
20f06221 1011 }
d29de1bf
DN
1012
1013 /* We don't allow changing the order of the computation in the inner-loop
1014 when doing outer-loop vectorization. */
51312233 1015 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
d29de1bf 1016
726a989a 1017 return pattern_stmt;
20f06221
DN
1018}
1019
1020
1107f3ae
IR
1021/* Return TRUE if the operation in STMT can be performed on a smaller type.
1022
1023 Input:
1024 STMT - a statement to check.
1025 DEF - we support operations with two operands, one of which is constant.
1026 The other operand can be defined by a demotion operation, or by a
1027 previous statement in a sequence of over-promoted operations. In the
1028 later case DEF is used to replace that operand. (It is defined by a
1029 pattern statement we created for the previous statement in the
1030 sequence).
1031
1032 Input/output:
1033 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1034 NULL, it's the type of DEF.
1035 STMTS - additional pattern statements. If a pattern statement (type
1036 conversion) is created in this function, its original statement is
1037 added to STMTS.
1038
1039 Output:
1040 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1041 operands to use in the new pattern statement for STMT (will be created
1042 in vect_recog_over_widening_pattern ()).
1043 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1044 statements for STMT: the first one is a type promotion and the second
1045 one is the operation itself. We return the type promotion statement
363477c0 1046 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1107f3ae
IR
1047 the second pattern statement. */
1048
1049static bool
1050vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1051 tree *op0, tree *op1, gimple *new_def_stmt,
9771b263 1052 vec<gimple> *stmts)
1107f3ae
IR
1053{
1054 enum tree_code code;
1055 tree const_oprnd, oprnd;
83d5977e 1056 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1107f3ae
IR
1057 gimple def_stmt, new_stmt;
1058 bool first = false;
bc4fb355 1059 bool promotion;
f5709183 1060
d6e1acf6
JJ
1061 *op0 = NULL_TREE;
1062 *op1 = NULL_TREE;
1107f3ae
IR
1063 *new_def_stmt = NULL;
1064
1065 if (!is_gimple_assign (stmt))
1066 return false;
1067
1068 code = gimple_assign_rhs_code (stmt);
1069 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1070 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1071 return false;
1072
1073 oprnd = gimple_assign_rhs1 (stmt);
1074 const_oprnd = gimple_assign_rhs2 (stmt);
1075 type = gimple_expr_type (stmt);
1076
1077 if (TREE_CODE (oprnd) != SSA_NAME
1078 || TREE_CODE (const_oprnd) != INTEGER_CST)
1079 return false;
1080
9ef7adc0
RG
1081 /* If oprnd has other uses besides that in stmt we cannot mark it
1082 as being part of a pattern only. */
1083 if (!has_single_use (oprnd))
1084 return false;
1085
1107f3ae
IR
1086 /* If we are in the middle of a sequence, we use DEF from a previous
1087 statement. Otherwise, OPRND has to be a result of type promotion. */
1088 if (*new_type)
1089 {
1090 half_type = *new_type;
1091 oprnd = def;
1092 }
1093 else
1094 {
1095 first = true;
bc4fb355 1096 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
f71cf56a
UW
1097 &promotion)
1098 || !promotion
1099 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1107f3ae
IR
1100 return false;
1101 }
1102
1103 /* Can we perform the operation on a smaller type? */
1104 switch (code)
1105 {
1106 case BIT_IOR_EXPR:
1107 case BIT_XOR_EXPR:
1108 case BIT_AND_EXPR:
1109 if (!int_fits_type_p (const_oprnd, half_type))
1110 {
1111 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1112 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1113 return false;
1114
1115 interm_type = build_nonstandard_integer_type (
1116 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1117 if (!int_fits_type_p (const_oprnd, interm_type))
1118 return false;
1119 }
1120
1121 break;
1122
1123 case LSHIFT_EXPR:
1124 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1125 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1126 return false;
1127
1128 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1129 (e.g., if the original value was char, the shift amount is at most 8
1130 if we want to use short). */
1131 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1132 return false;
1133
1134 interm_type = build_nonstandard_integer_type (
1135 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1136
1137 if (!vect_supportable_shift (code, interm_type))
1138 return false;
1139
1140 break;
1141
1142 case RSHIFT_EXPR:
1143 if (vect_supportable_shift (code, half_type))
1144 break;
1145
1146 /* Try intermediate type - HALF_TYPE is not supported. */
1147 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1148 return false;
1149
1150 interm_type = build_nonstandard_integer_type (
1151 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1152
1153 if (!vect_supportable_shift (code, interm_type))
1154 return false;
1155
1156 break;
1157
1158 default:
1159 gcc_unreachable ();
1160 }
1161
1162 /* There are four possible cases:
1163 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1164 the first statement in the sequence)
1165 a. The original, HALF_TYPE, is not enough - we replace the promotion
1166 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1167 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1168 promotion.
1169 2. OPRND is defined by a pattern statement we created.
1170 a. Its type is not sufficient for the operation, we create a new stmt:
1171 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1172 this statement in NEW_DEF_STMT, and it is later put in
363477c0 1173 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1107f3ae
IR
1174 b. OPRND is good to use in the new statement. */
1175 if (first)
1176 {
1177 if (interm_type)
1178 {
1179 /* Replace the original type conversion HALF_TYPE->TYPE with
1180 HALF_TYPE->INTERM_TYPE. */
1181 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1182 {
1183 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1184 /* Check if the already created pattern stmt is what we need. */
1185 if (!is_gimple_assign (new_stmt)
1186 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1187 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1188 return false;
1189
9771b263 1190 stmts->safe_push (def_stmt);
1107f3ae
IR
1191 oprnd = gimple_assign_lhs (new_stmt);
1192 }
1193 else
1194 {
1195 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1196 oprnd = gimple_assign_rhs1 (def_stmt);
83d5977e 1197 new_oprnd = make_ssa_name (interm_type, NULL);
1107f3ae
IR
1198 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1199 oprnd, NULL_TREE);
1107f3ae 1200 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
9771b263 1201 stmts->safe_push (def_stmt);
1107f3ae
IR
1202 oprnd = new_oprnd;
1203 }
1204 }
1205 else
1206 {
1207 /* Retrieve the operand before the type promotion. */
1208 oprnd = gimple_assign_rhs1 (def_stmt);
1209 }
1210 }
1211 else
1212 {
1213 if (interm_type)
1214 {
1215 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
83d5977e 1216 new_oprnd = make_ssa_name (interm_type, NULL);
1107f3ae
IR
1217 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1218 oprnd, NULL_TREE);
1107f3ae
IR
1219 oprnd = new_oprnd;
1220 *new_def_stmt = new_stmt;
1221 }
1222
1223 /* Otherwise, OPRND is already set. */
1224 }
1225
1226 if (interm_type)
1227 *new_type = interm_type;
1228 else
1229 *new_type = half_type;
1230
1231 *op0 = oprnd;
1232 *op1 = fold_convert (*new_type, const_oprnd);
1233
1234 return true;
1235}
1236
1237
1238/* Try to find a statement or a sequence of statements that can be performed
1239 on a smaller type:
1240
1241 type x_t;
1242 TYPE x_T, res0_T, res1_T;
1243 loop:
1244 S1 x_t = *p;
1245 S2 x_T = (TYPE) x_t;
1246 S3 res0_T = op (x_T, C0);
1247 S4 res1_T = op (res0_T, C1);
1248 S5 ... = () res1_T; - type demotion
1249
1250 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1251 constants.
1252 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1253 be 'type' or some intermediate type. For now, we expect S5 to be a type
71c92d17 1254 demotion operation. We also check that S3 and S4 have only one use. */
1107f3ae 1255
1107f3ae 1256static gimple
9771b263 1257vect_recog_over_widening_pattern (vec<gimple> *stmts,
1107f3ae
IR
1258 tree *type_in, tree *type_out)
1259{
9771b263 1260 gimple stmt = stmts->pop ();
1107f3ae 1261 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
9a7a4398 1262 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
83d5977e 1263 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1107f3ae 1264 bool first;
b2a1a74d 1265 tree type = NULL;
1107f3ae
IR
1266
1267 first = true;
1268 while (1)
1269 {
1270 if (!vinfo_for_stmt (stmt)
1271 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1272 return NULL;
1273
1274 new_def_stmt = NULL;
1275 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1276 &op0, &op1, &new_def_stmt,
1277 stmts))
1278 {
1279 if (first)
1280 return NULL;
1281 else
1282 break;
1283 }
1284
1285 /* STMT can be performed on a smaller type. Check its uses. */
9a7a4398
UW
1286 use_stmt = vect_single_imm_use (stmt);
1287 if (!use_stmt || !is_gimple_assign (use_stmt))
1107f3ae
IR
1288 return NULL;
1289
1290 /* Create pattern statement for STMT. */
1291 vectype = get_vectype_for_scalar_type (new_type);
1292 if (!vectype)
1293 return NULL;
1294
1295 /* We want to collect all the statements for which we create pattern
1296 statetments, except for the case when the last statement in the
1297 sequence doesn't have a corresponding pattern statement. In such
1298 case we associate the last pattern statement with the last statement
36ba4aae 1299 in the sequence. Therefore, we only add the original statement to
1107f3ae
IR
1300 the list if we know that it is not the last. */
1301 if (prev_stmt)
9771b263 1302 stmts->safe_push (prev_stmt);
1107f3ae
IR
1303
1304 var = vect_recog_temp_ssa_var (new_type, NULL);
62371b92
JJ
1305 pattern_stmt
1306 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1307 op0, op1);
1107f3ae 1308 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
083481d8 1309 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1107f3ae 1310
73fbfcad 1311 if (dump_enabled_p ())
1107f3ae 1312 {
ccb3ad87 1313 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1314 "created pattern stmt: ");
ccb3ad87 1315 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1316 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1317 }
1318
b2a1a74d 1319 type = gimple_expr_type (stmt);
1107f3ae
IR
1320 prev_stmt = stmt;
1321 stmt = use_stmt;
1322
1323 first = false;
1324 }
1325
1326 /* We got a sequence. We expect it to end with a type demotion operation.
1327 Otherwise, we quit (for now). There are three possible cases: the
1328 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1329 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1330 NEW_TYPE differs (we create a new conversion statement). */
1331 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1332 {
1333 use_lhs = gimple_assign_lhs (use_stmt);
1334 use_type = TREE_TYPE (use_lhs);
82db3d43 1335 /* Support only type demotion or signedess change. */
1107f3ae 1336 if (!INTEGRAL_TYPE_P (use_type)
82db3d43 1337 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1107f3ae
IR
1338 return NULL;
1339
82db3d43
IR
1340 /* Check that NEW_TYPE is not bigger than the conversion result. */
1341 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1342 return NULL;
1343
1107f3ae
IR
1344 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1345 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1346 {
1347 /* Create NEW_TYPE->USE_TYPE conversion. */
83d5977e 1348 new_oprnd = make_ssa_name (use_type, NULL);
1107f3ae
IR
1349 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1350 var, NULL_TREE);
1107f3ae
IR
1351 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1352
1353 *type_in = get_vectype_for_scalar_type (new_type);
1354 *type_out = get_vectype_for_scalar_type (use_type);
1355
1356 /* We created a pattern statement for the last statement in the
1357 sequence, so we don't need to associate it with the pattern
1358 statement created for PREV_STMT. Therefore, we add PREV_STMT
1359 to the list in order to mark it later in vect_pattern_recog_1. */
1360 if (prev_stmt)
9771b263 1361 stmts->safe_push (prev_stmt);
1107f3ae
IR
1362 }
1363 else
1364 {
1365 if (prev_stmt)
363477c0
JJ
1366 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1367 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1107f3ae
IR
1368
1369 *type_in = vectype;
1370 *type_out = NULL_TREE;
1371 }
1372
9771b263 1373 stmts->safe_push (use_stmt);
1107f3ae
IR
1374 }
1375 else
1376 /* TODO: support general case, create a conversion to the correct type. */
1377 return NULL;
1378
1379 /* Pattern detected. */
73fbfcad 1380 if (dump_enabled_p ())
1107f3ae 1381 {
ccb3ad87 1382 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1383 "vect_recog_over_widening_pattern: detected: ");
ccb3ad87 1384 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1385 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1386 }
1387
1388 return pattern_stmt;
1389}
1390
36ba4aae
IR
1391/* Detect widening shift pattern:
1392
1393 type a_t;
1394 TYPE a_T, res_T;
1395
1396 S1 a_t = ;
1397 S2 a_T = (TYPE) a_t;
1398 S3 res_T = a_T << CONST;
1399
1400 where type 'TYPE' is at least double the size of type 'type'.
1401
33018845
UW
1402 Also detect cases where the shift result is immediately converted
1403 to another type 'result_type' that is no larger in size than 'TYPE'.
1404 In those cases we perform a widen-shift that directly results in
1405 'result_type', to avoid a possible over-widening situation:
36ba4aae 1406
33018845 1407 type a_t;
36ba4aae 1408 TYPE a_T, res_T;
33018845 1409 result_type res_result;
36ba4aae
IR
1410
1411 S1 a_t = ;
1412 S2 a_T = (TYPE) a_t;
1413 S3 res_T = a_T << CONST;
33018845
UW
1414 S4 res_result = (result_type) res_T;
1415 '--> res_result' = a_t w<< CONST;
36ba4aae
IR
1416
1417 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1418 create an additional pattern stmt for S2 to create a variable of an
1419 intermediate type, and perform widen-shift on the intermediate type:
1420
1421 type a_t;
1422 interm_type a_it;
1423 TYPE a_T, res_T, res_T';
1424
1425 S1 a_t = ;
1426 S2 a_T = (TYPE) a_t;
1427 '--> a_it = (interm_type) a_t;
1428 S3 res_T = a_T << CONST;
1429 '--> res_T' = a_it <<* CONST;
1430
1431 Input/Output:
1432
1433 * STMTS: Contains a stmt from which the pattern search begins.
1434 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1435 in STMTS. When an intermediate type is used and a pattern statement is
1436 created for S2, we also put S2 here (before S3).
1437
1438 Output:
1439
1440 * TYPE_IN: The type of the input arguments to the pattern.
1441
1442 * TYPE_OUT: The type of the output of this pattern.
1443
1444 * Return value: A new stmt that will be used to replace the sequence of
1445 stmts that constitute the pattern. In this case it will be:
1446 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1447
1448static gimple
9771b263 1449vect_recog_widen_shift_pattern (vec<gimple> *stmts,
36ba4aae
IR
1450 tree *type_in, tree *type_out)
1451{
9771b263 1452 gimple last_stmt = stmts->pop ();
36ba4aae
IR
1453 gimple def_stmt0;
1454 tree oprnd0, oprnd1;
1455 tree type, half_type0;
33018845 1456 gimple pattern_stmt;
36ba4aae 1457 tree vectype, vectype_out = NULL_TREE;
36ba4aae
IR
1458 tree var;
1459 enum tree_code dummy_code;
1460 int dummy_int;
9771b263 1461 vec<tree> dummy_vec;
33018845 1462 gimple use_stmt;
bc4fb355 1463 bool promotion;
36ba4aae
IR
1464
1465 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1466 return NULL;
1467
36ba4aae 1468 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
33018845 1469 return NULL;
36ba4aae
IR
1470
1471 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1472 return NULL;
1473
1474 oprnd0 = gimple_assign_rhs1 (last_stmt);
1475 oprnd1 = gimple_assign_rhs2 (last_stmt);
1476 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1477 return NULL;
1478
1479 /* Check operand 0: it has to be defined by a type promotion. */
bc4fb355
IR
1480 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1481 &promotion)
1482 || !promotion)
1483 return NULL;
36ba4aae
IR
1484
1485 /* Check operand 1: has to be positive. We check that it fits the type
1486 in vect_handle_widen_op_by_const (). */
1487 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1488 return NULL;
1489
1490 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1491 type = gimple_expr_type (last_stmt);
1492
33018845
UW
1493 /* Check for subsequent conversion to another type. */
1494 use_stmt = vect_single_imm_use (last_stmt);
1495 if (use_stmt && is_gimple_assign (use_stmt)
1496 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1497 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1498 {
1499 tree use_lhs = gimple_assign_lhs (use_stmt);
1500 tree use_type = TREE_TYPE (use_lhs);
1501
1502 if (INTEGRAL_TYPE_P (use_type)
1503 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1504 {
1505 last_stmt = use_stmt;
1506 type = use_type;
1507 }
1508 }
1509
36ba4aae
IR
1510 /* Check if this a widening operation. */
1511 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1512 &oprnd0, stmts,
1513 type, &half_type0, def_stmt0))
1514 return NULL;
1515
36ba4aae 1516 /* Pattern detected. */
73fbfcad 1517 if (dump_enabled_p ())
ccb3ad87 1518 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1519 "vect_recog_widen_shift_pattern: detected:\n");
36ba4aae
IR
1520
1521 /* Check target support. */
1522 vectype = get_vectype_for_scalar_type (half_type0);
1523 vectype_out = get_vectype_for_scalar_type (type);
1524
1525 if (!vectype
1526 || !vectype_out
1527 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1528 vectype_out, vectype,
a86ec597
RH
1529 &dummy_code, &dummy_code,
1530 &dummy_int, &dummy_vec))
36ba4aae
IR
1531 return NULL;
1532
1533 *type_in = vectype;
1534 *type_out = vectype_out;
1535
1536 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1537 var = vect_recog_temp_ssa_var (type, NULL);
1538 pattern_stmt =
1539 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1540
73fbfcad 1541 if (dump_enabled_p ())
78c60e3d 1542 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
7e9a3abb
JJ
1543
1544 stmts->safe_push (last_stmt);
1545 return pattern_stmt;
1546}
1547
1548/* Detect a rotate pattern wouldn't be otherwise vectorized:
1549
1550 type a_t, b_t, c_t;
1551
1552 S0 a_t = b_t r<< c_t;
1553
1554 Input/Output:
1555
1556 * STMTS: Contains a stmt from which the pattern search begins,
1557 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1558 with a sequence:
1559
1560 S1 d_t = -c_t;
1561 S2 e_t = d_t & (B - 1);
1562 S3 f_t = b_t << c_t;
1563 S4 g_t = b_t >> e_t;
1564 S0 a_t = f_t | g_t;
1565
1566 where B is element bitsize of type.
1567
1568 Output:
1569
1570 * TYPE_IN: The type of the input arguments to the pattern.
1571
1572 * TYPE_OUT: The type of the output of this pattern.
1573
1574 * Return value: A new stmt that will be used to replace the rotate
1575 S0 stmt. */
1576
1577static gimple
1578vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1579{
1580 gimple last_stmt = stmts->pop ();
1581 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1582 gimple pattern_stmt, def_stmt;
1583 enum tree_code rhs_code;
1584 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1585 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1586 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1587 enum vect_def_type dt;
1588 optab optab1, optab2;
68119618 1589 edge ext_def = NULL;
7e9a3abb
JJ
1590
1591 if (!is_gimple_assign (last_stmt))
1592 return NULL;
1593
1594 rhs_code = gimple_assign_rhs_code (last_stmt);
1595 switch (rhs_code)
1596 {
1597 case LROTATE_EXPR:
1598 case RROTATE_EXPR:
1599 break;
1600 default:
1601 return NULL;
1602 }
1603
1604 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1605 return NULL;
1606
1607 lhs = gimple_assign_lhs (last_stmt);
1608 oprnd0 = gimple_assign_rhs1 (last_stmt);
1609 type = TREE_TYPE (oprnd0);
1610 oprnd1 = gimple_assign_rhs2 (last_stmt);
1611 if (TREE_CODE (oprnd0) != SSA_NAME
1612 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1613 || !INTEGRAL_TYPE_P (type)
1614 || !TYPE_UNSIGNED (type))
1615 return NULL;
1616
1617 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1618 &def, &dt))
1619 return NULL;
1620
1621 if (dt != vect_internal_def
1622 && dt != vect_constant_def
1623 && dt != vect_external_def)
1624 return NULL;
1625
1626 vectype = get_vectype_for_scalar_type (type);
1627 if (vectype == NULL_TREE)
1628 return NULL;
1629
1630 /* If vector/vector or vector/scalar rotate is supported by the target,
1631 don't do anything here. */
1632 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1633 if (optab1
1634 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1635 return NULL;
1636
1637 if (bb_vinfo != NULL || dt != vect_internal_def)
1638 {
1639 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1640 if (optab2
1641 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1642 return NULL;
1643 }
1644
1645 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1646 don't do anything here either. */
1647 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1648 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1649 if (!optab1
1650 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1651 || !optab2
1652 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1653 {
1654 if (bb_vinfo == NULL && dt == vect_internal_def)
1655 return NULL;
1656 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1657 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1658 if (!optab1
1659 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1660 || !optab2
1661 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1662 return NULL;
1663 }
1664
1665 *type_in = vectype;
1666 *type_out = vectype;
1667 if (*type_in == NULL_TREE)
1668 return NULL;
1669
68119618
JJ
1670 if (dt == vect_external_def
1671 && TREE_CODE (oprnd1) == SSA_NAME
1672 && loop_vinfo)
1673 {
1674 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1675 ext_def = loop_preheader_edge (loop);
1676 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1677 {
1678 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1679 if (bb == NULL
1680 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1681 ext_def = NULL;
1682 }
1683 }
1684
7e9a3abb
JJ
1685 def = NULL_TREE;
1686 if (TREE_CODE (oprnd1) == INTEGER_CST
1687 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1688 def = oprnd1;
1689 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1690 {
1691 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1692 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1693 && TYPE_PRECISION (TREE_TYPE (rhs1))
1694 == TYPE_PRECISION (type))
1695 def = rhs1;
1696 }
1697
1698 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1699 if (def == NULL_TREE)
1700 {
1701 def = vect_recog_temp_ssa_var (type, NULL);
1702 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1703 NULL_TREE);
68119618
JJ
1704 if (ext_def)
1705 {
1706 basic_block new_bb
1707 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1708 gcc_assert (!new_bb);
1709 }
1710 else
1711 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
1712 }
1713 stype = TREE_TYPE (def);
1714
1715 if (TREE_CODE (def) == INTEGER_CST)
1716 {
cc269bb6 1717 if (!tree_fits_uhwi_p (def)
7d362f6c 1718 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
7e9a3abb
JJ
1719 || integer_zerop (def))
1720 return NULL;
1721 def2 = build_int_cst (stype,
1722 GET_MODE_PRECISION (TYPE_MODE (type))
ae7e9ddd 1723 - tree_to_uhwi (def));
7e9a3abb
JJ
1724 }
1725 else
1726 {
1727 tree vecstype = get_vectype_for_scalar_type (stype);
1728 stmt_vec_info def_stmt_vinfo;
1729
1730 if (vecstype == NULL_TREE)
1731 return NULL;
1732 def2 = vect_recog_temp_ssa_var (stype, NULL);
1733 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def,
1734 NULL_TREE);
68119618
JJ
1735 if (ext_def)
1736 {
1737 basic_block new_bb
1738 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1739 gcc_assert (!new_bb);
1740 }
1741 else
1742 {
1743 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1744 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1745 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1746 append_pattern_def_seq (stmt_vinfo, def_stmt);
1747 }
7e9a3abb
JJ
1748
1749 def2 = vect_recog_temp_ssa_var (stype, NULL);
1750 tree mask
1751 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1752 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1753 gimple_assign_lhs (def_stmt),
1754 mask);
68119618
JJ
1755 if (ext_def)
1756 {
1757 basic_block new_bb
1758 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1759 gcc_assert (!new_bb);
1760 }
1761 else
1762 {
1763 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1764 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1765 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1766 append_pattern_def_seq (stmt_vinfo, def_stmt);
1767 }
7e9a3abb
JJ
1768 }
1769
1770 var1 = vect_recog_temp_ssa_var (type, NULL);
1771 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1772 ? LSHIFT_EXPR : RSHIFT_EXPR,
1773 var1, oprnd0, def);
1774 append_pattern_def_seq (stmt_vinfo, def_stmt);
1775
1776 var2 = vect_recog_temp_ssa_var (type, NULL);
1777 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
1778 ? RSHIFT_EXPR : LSHIFT_EXPR,
1779 var2, oprnd0, def2);
1780 append_pattern_def_seq (stmt_vinfo, def_stmt);
1781
1782 /* Pattern detected. */
1783 if (dump_enabled_p ())
1784 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1785 "vect_recog_rotate_pattern: detected:\n");
7e9a3abb
JJ
1786
1787 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1788 var = vect_recog_temp_ssa_var (type, NULL);
1789 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
1790
1791 if (dump_enabled_p ())
1792 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
36ba4aae 1793
9771b263 1794 stmts->safe_push (last_stmt);
36ba4aae
IR
1795 return pattern_stmt;
1796}
1107f3ae 1797
732a0ad3
JJ
1798/* Detect a vector by vector shift pattern that wouldn't be otherwise
1799 vectorized:
1800
1801 type a_t;
1802 TYPE b_T, res_T;
1803
1804 S1 a_t = ;
1805 S2 b_T = ;
1806 S3 res_T = b_T op a_t;
1807
1808 where type 'TYPE' is a type with different size than 'type',
1809 and op is <<, >> or rotate.
1810
1811 Also detect cases:
1812
1813 type a_t;
1814 TYPE b_T, c_T, res_T;
1815
1816 S0 c_T = ;
1817 S1 a_t = (type) c_T;
1818 S2 b_T = ;
1819 S3 res_T = b_T op a_t;
1820
1821 Input/Output:
1822
1823 * STMTS: Contains a stmt from which the pattern search begins,
1824 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1825 with a shift/rotate which has same type on both operands, in the
1826 second case just b_T op c_T, in the first case with added cast
363477c0 1827 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
1828
1829 Output:
1830
1831 * TYPE_IN: The type of the input arguments to the pattern.
1832
1833 * TYPE_OUT: The type of the output of this pattern.
1834
1835 * Return value: A new stmt that will be used to replace the shift/rotate
1836 S3 stmt. */
1837
1838static gimple
9771b263 1839vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
732a0ad3
JJ
1840 tree *type_in, tree *type_out)
1841{
9771b263 1842 gimple last_stmt = stmts->pop ();
732a0ad3
JJ
1843 tree oprnd0, oprnd1, lhs, var;
1844 gimple pattern_stmt, def_stmt;
1845 enum tree_code rhs_code;
1846 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1847 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 1848 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
732a0ad3
JJ
1849 enum vect_def_type dt;
1850 tree def;
1851
1852 if (!is_gimple_assign (last_stmt))
1853 return NULL;
1854
1855 rhs_code = gimple_assign_rhs_code (last_stmt);
1856 switch (rhs_code)
1857 {
1858 case LSHIFT_EXPR:
1859 case RSHIFT_EXPR:
1860 case LROTATE_EXPR:
1861 case RROTATE_EXPR:
1862 break;
1863 default:
1864 return NULL;
1865 }
1866
1867 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1868 return NULL;
1869
1870 lhs = gimple_assign_lhs (last_stmt);
1871 oprnd0 = gimple_assign_rhs1 (last_stmt);
1872 oprnd1 = gimple_assign_rhs2 (last_stmt);
1873 if (TREE_CODE (oprnd0) != SSA_NAME
1874 || TREE_CODE (oprnd1) != SSA_NAME
1875 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1876 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1877 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1878 || TYPE_PRECISION (TREE_TYPE (lhs))
1879 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1880 return NULL;
1881
f5709183 1882 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 1883 &def, &dt))
732a0ad3
JJ
1884 return NULL;
1885
1886 if (dt != vect_internal_def)
1887 return NULL;
1888
1889 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1890 *type_out = *type_in;
1891 if (*type_in == NULL_TREE)
1892 return NULL;
1893
1894 def = NULL_TREE;
1895 if (gimple_assign_cast_p (def_stmt))
1896 {
1897 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1898 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1899 && TYPE_PRECISION (TREE_TYPE (rhs1))
1900 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1901 def = rhs1;
1902 }
1903
1904 if (def == NULL_TREE)
1905 {
1906 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1907 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1908 NULL_TREE);
083481d8 1909 new_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
1910 }
1911
1912 /* Pattern detected. */
73fbfcad 1913 if (dump_enabled_p ())
ccb3ad87 1914 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1915 "vect_recog_vector_vector_shift_pattern: detected:\n");
732a0ad3
JJ
1916
1917 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1918 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1919 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1920
73fbfcad 1921 if (dump_enabled_p ())
78c60e3d 1922 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
732a0ad3 1923
9771b263 1924 stmts->safe_push (last_stmt);
732a0ad3
JJ
1925 return pattern_stmt;
1926}
1927
079c527f 1928/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
1929 otherwise vectorized:
1930
1931 type a_t, b_t;
1932
1933 S1 a_t = b_t / N;
1934
079c527f 1935 where type 'type' is an integral type and N is a constant.
363477c0 1936
079c527f 1937 Similarly handle modulo by a constant:
363477c0
JJ
1938
1939 S4 a_t = b_t % N;
1940
1941 Input/Output:
1942
1943 * STMTS: Contains a stmt from which the pattern search begins,
079c527f
JJ
1944 i.e. the division stmt. S1 is replaced by if N is a power
1945 of two constant and type is signed:
363477c0
JJ
1946 S3 y_t = b_t < 0 ? N - 1 : 0;
1947 S2 x_t = b_t + y_t;
1948 S1' a_t = x_t >> log2 (N);
1949
079c527f
JJ
1950 S4 is replaced if N is a power of two constant and
1951 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
1952 S9 y_T = b_t < 0 ? -1U : 0U;
1953 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1954 S7 z_t = (type) z_T;
1955 S6 w_t = b_t + z_t;
1956 S5 x_t = w_t & (N - 1);
1957 S4' a_t = x_t - z_t;
1958
1959 Output:
1960
1961 * TYPE_IN: The type of the input arguments to the pattern.
1962
1963 * TYPE_OUT: The type of the output of this pattern.
1964
1965 * Return value: A new stmt that will be used to replace the division
1966 S1 or modulo S4 stmt. */
1967
1968static gimple
9771b263 1969vect_recog_divmod_pattern (vec<gimple> *stmts,
079c527f 1970 tree *type_in, tree *type_out)
363477c0 1971{
9771b263 1972 gimple last_stmt = stmts->pop ();
5deb57cb 1973 tree oprnd0, oprnd1, vectype, itype, cond;
363477c0
JJ
1974 gimple pattern_stmt, def_stmt;
1975 enum tree_code rhs_code;
1976 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1977 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
079c527f 1978 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
363477c0 1979 optab optab;
00f07b86 1980 tree q;
079c527f 1981 int dummy_int, prec;
079c527f 1982 stmt_vec_info def_stmt_vinfo;
363477c0
JJ
1983
1984 if (!is_gimple_assign (last_stmt))
1985 return NULL;
1986
1987 rhs_code = gimple_assign_rhs_code (last_stmt);
1988 switch (rhs_code)
1989 {
1990 case TRUNC_DIV_EXPR:
1991 case TRUNC_MOD_EXPR:
1992 break;
1993 default:
1994 return NULL;
1995 }
1996
1997 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1998 return NULL;
1999
2000 oprnd0 = gimple_assign_rhs1 (last_stmt);
2001 oprnd1 = gimple_assign_rhs2 (last_stmt);
2002 itype = TREE_TYPE (oprnd0);
2003 if (TREE_CODE (oprnd0) != SSA_NAME
2004 || TREE_CODE (oprnd1) != INTEGER_CST
2005 || TREE_CODE (itype) != INTEGER_TYPE
079c527f 2006 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
363477c0
JJ
2007 return NULL;
2008
2009 vectype = get_vectype_for_scalar_type (itype);
2010 if (vectype == NULL_TREE)
2011 return NULL;
2012
2013 /* If the target can handle vectorized division or modulo natively,
2014 don't attempt to optimize this. */
2015 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2225b9f2 2016 if (optab != unknown_optab)
363477c0
JJ
2017 {
2018 enum machine_mode vec_mode = TYPE_MODE (vectype);
2019 int icode = (int) optab_handler (optab, vec_mode);
e6d4f8f5 2020 if (icode != CODE_FOR_nothing)
363477c0
JJ
2021 return NULL;
2022 }
2023
079c527f
JJ
2024 prec = TYPE_PRECISION (itype);
2025 if (integer_pow2p (oprnd1))
363477c0 2026 {
079c527f
JJ
2027 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2028 return NULL;
363477c0 2029
079c527f 2030 /* Pattern detected. */
73fbfcad 2031 if (dump_enabled_p ())
ccb3ad87 2032 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2033 "vect_recog_divmod_pattern: detected:\n");
079c527f
JJ
2034
2035 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2036 build_int_cst (itype, 0));
2037 if (rhs_code == TRUNC_DIV_EXPR)
2038 {
2039 tree var = vect_recog_temp_ssa_var (itype, NULL);
2040 tree shift;
2041 def_stmt
73804b12
RG
2042 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2043 fold_build2 (MINUS_EXPR, itype,
2044 oprnd1,
2045 build_int_cst (itype,
2046 1)),
2047 build_int_cst (itype, 0));
079c527f
JJ
2048 new_pattern_def_seq (stmt_vinfo, def_stmt);
2049 var = vect_recog_temp_ssa_var (itype, NULL);
2050 def_stmt
2051 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
2052 gimple_assign_lhs (def_stmt));
2053 append_pattern_def_seq (stmt_vinfo, def_stmt);
2054
2055 shift = build_int_cst (itype, tree_log2 (oprnd1));
2056 pattern_stmt
2057 = gimple_build_assign_with_ops (RSHIFT_EXPR,
2058 vect_recog_temp_ssa_var (itype,
2059 NULL),
2060 var, shift);
2061 }
2062 else
2063 {
2064 tree signmask;
2065 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2066 if (compare_tree_int (oprnd1, 2) == 0)
2067 {
2068 signmask = vect_recog_temp_ssa_var (itype, NULL);
2069 def_stmt
73804b12
RG
2070 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
2071 build_int_cst (itype, 1),
2072 build_int_cst (itype, 0));
079c527f
JJ
2073 append_pattern_def_seq (stmt_vinfo, def_stmt);
2074 }
2075 else
2076 {
2077 tree utype
2078 = build_nonstandard_integer_type (prec, 1);
2079 tree vecutype = get_vectype_for_scalar_type (utype);
2080 tree shift
2081 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2082 - tree_log2 (oprnd1));
2083 tree var = vect_recog_temp_ssa_var (utype, NULL);
2084
2085 def_stmt
73804b12
RG
2086 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2087 build_int_cst (utype, -1),
2088 build_int_cst (utype, 0));
079c527f
JJ
2089 def_stmt_vinfo
2090 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2091 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2092 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2093 append_pattern_def_seq (stmt_vinfo, def_stmt);
2094 var = vect_recog_temp_ssa_var (utype, NULL);
2095 def_stmt
2096 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2097 gimple_assign_lhs (def_stmt),
2098 shift);
2099 def_stmt_vinfo
2100 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2101 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2102 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2103 append_pattern_def_seq (stmt_vinfo, def_stmt);
2104 signmask = vect_recog_temp_ssa_var (itype, NULL);
2105 def_stmt
2106 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
2107 NULL_TREE);
2108 append_pattern_def_seq (stmt_vinfo, def_stmt);
2109 }
2110 def_stmt
2111 = gimple_build_assign_with_ops (PLUS_EXPR,
2112 vect_recog_temp_ssa_var (itype,
2113 NULL),
2114 oprnd0, signmask);
2115 append_pattern_def_seq (stmt_vinfo, def_stmt);
2116 def_stmt
2117 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2118 vect_recog_temp_ssa_var (itype,
2119 NULL),
2120 gimple_assign_lhs (def_stmt),
2121 fold_build2 (MINUS_EXPR, itype,
2122 oprnd1,
2123 build_int_cst (itype,
2124 1)));
2125 append_pattern_def_seq (stmt_vinfo, def_stmt);
2126
2127 pattern_stmt
2128 = gimple_build_assign_with_ops (MINUS_EXPR,
2129 vect_recog_temp_ssa_var (itype,
2130 NULL),
2131 gimple_assign_lhs (def_stmt),
2132 signmask);
2133 }
2134
73fbfcad 2135 if (dump_enabled_p ())
78c60e3d
SS
2136 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2137 0);
079c527f 2138
9771b263 2139 stmts->safe_push (last_stmt);
079c527f
JJ
2140
2141 *type_in = vectype;
2142 *type_out = vectype;
2143 return pattern_stmt;
363477c0 2144 }
079c527f 2145
6b58915b
RS
2146 if (prec > HOST_BITS_PER_WIDE_INT
2147 || integer_zerop (oprnd1))
079c527f
JJ
2148 return NULL;
2149
00f07b86
RH
2150 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2151 return NULL;
079c527f
JJ
2152
2153 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2154
2155 if (TYPE_UNSIGNED (itype))
363477c0 2156 {
079c527f
JJ
2157 unsigned HOST_WIDE_INT mh, ml;
2158 int pre_shift, post_shift;
6b58915b
RS
2159 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2160 & GET_MODE_MASK (TYPE_MODE (itype)));
5deb57cb 2161 tree t1, t2, t3, t4;
079c527f
JJ
2162
2163 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2164 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2165 return NULL;
2166
2167 /* Find a suitable multiplier and right shift count
2168 instead of multiplying with D. */
2169 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2170
2171 /* If the suggested multiplier is more than SIZE bits, we can do better
2172 for even divisors, using an initial right shift. */
2173 if (mh != 0 && (d & 1) == 0)
363477c0 2174 {
079c527f
JJ
2175 pre_shift = floor_log2 (d & -d);
2176 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2177 &ml, &post_shift, &dummy_int);
2178 gcc_assert (!mh);
2179 }
2180 else
2181 pre_shift = 0;
2182
2183 if (mh != 0)
2184 {
2185 if (post_shift - 1 >= prec)
2186 return NULL;
2187
5deb57cb
JJ
2188 /* t1 = oprnd0 h* ml;
2189 t2 = oprnd0 - t1;
2190 t3 = t2 >> 1;
2191 t4 = t1 + t3;
2192 q = t4 >> (post_shift - 1); */
2193 t1 = vect_recog_temp_ssa_var (itype, NULL);
363477c0 2194 def_stmt
5deb57cb 2195 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
079c527f
JJ
2196 build_int_cst (itype, ml));
2197 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2198
5deb57cb 2199 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2200 def_stmt
5deb57cb 2201 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
083481d8 2202 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2203
2204 t3 = vect_recog_temp_ssa_var (itype, NULL);
2205 def_stmt
5deb57cb 2206 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
079c527f
JJ
2207 integer_one_node);
2208 append_pattern_def_seq (stmt_vinfo, def_stmt);
2209
5deb57cb 2210 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2211 def_stmt
5deb57cb 2212 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
079c527f
JJ
2213
2214 if (post_shift != 1)
2215 {
2216 append_pattern_def_seq (stmt_vinfo, def_stmt);
2217
5deb57cb 2218 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2219 pattern_stmt
5deb57cb 2220 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
079c527f
JJ
2221 build_int_cst (itype,
2222 post_shift
2223 - 1));
2224 }
2225 else
2226 {
5deb57cb 2227 q = t4;
079c527f
JJ
2228 pattern_stmt = def_stmt;
2229 }
363477c0
JJ
2230 }
2231 else
2232 {
079c527f
JJ
2233 if (pre_shift >= prec || post_shift >= prec)
2234 return NULL;
2235
2236 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2237 t2 = t1 h* ml;
2238 q = t2 >> post_shift; */
079c527f
JJ
2239 if (pre_shift)
2240 {
2241 t1 = vect_recog_temp_ssa_var (itype, NULL);
2242 def_stmt
2243 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2244 build_int_cst (NULL,
2245 pre_shift));
2246 append_pattern_def_seq (stmt_vinfo, def_stmt);
2247 }
2248 else
2249 t1 = oprnd0;
363477c0 2250
5deb57cb 2251 t2 = vect_recog_temp_ssa_var (itype, NULL);
363477c0 2252 def_stmt
5deb57cb 2253 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
079c527f 2254 build_int_cst (itype, ml));
079c527f 2255
5deb57cb
JJ
2256 if (post_shift)
2257 {
2258 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2259
5deb57cb
JJ
2260 q = vect_recog_temp_ssa_var (itype, NULL);
2261 def_stmt
2262 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2263 build_int_cst (itype,
2264 post_shift));
2265 }
2266 else
2267 q = t2;
2268
2269 pattern_stmt = def_stmt;
079c527f
JJ
2270 }
2271 }
2272 else
2273 {
2274 unsigned HOST_WIDE_INT ml;
4ee4c52c 2275 int post_shift;
6b58915b 2276 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2277 unsigned HOST_WIDE_INT abs_d;
2278 bool add = false;
5deb57cb 2279 tree t1, t2, t3, t4;
079c527f
JJ
2280
2281 /* Give up for -1. */
2282 if (d == -1)
2283 return NULL;
2284
079c527f
JJ
2285 /* Since d might be INT_MIN, we have to cast to
2286 unsigned HOST_WIDE_INT before negating to avoid
2287 undefined signed overflow. */
2288 abs_d = (d >= 0
2289 ? (unsigned HOST_WIDE_INT) d
2290 : - (unsigned HOST_WIDE_INT) d);
2291
2292 /* n rem d = n rem -d */
2293 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2294 {
2295 d = abs_d;
2296 oprnd1 = build_int_cst (itype, abs_d);
2297 }
2298 else if (HOST_BITS_PER_WIDE_INT >= prec
2299 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2300 /* This case is not handled correctly below. */
2301 return NULL;
2302
4ee4c52c 2303 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
079c527f
JJ
2304 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2305 {
2306 add = true;
2307 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2308 }
2309 if (post_shift >= prec)
2310 return NULL;
2311
7abed779 2312 /* t1 = oprnd0 h* ml; */
5deb57cb 2313 t1 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2314 def_stmt
5deb57cb 2315 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
079c527f 2316 build_int_cst (itype, ml));
079c527f
JJ
2317
2318 if (add)
2319 {
5deb57cb 2320 /* t2 = t1 + oprnd0; */
7abed779 2321 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2322 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2323 def_stmt
5deb57cb 2324 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
079c527f
JJ
2325 }
2326 else
5deb57cb 2327 t2 = t1;
079c527f 2328
5deb57cb 2329 if (post_shift)
079c527f 2330 {
5deb57cb 2331 /* t3 = t2 >> post_shift; */
7abed779 2332 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2333 t3 = vect_recog_temp_ssa_var (itype, NULL);
363477c0 2334 def_stmt
5deb57cb 2335 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
079c527f 2336 build_int_cst (itype, post_shift));
363477c0 2337 }
079c527f 2338 else
5deb57cb 2339 t3 = t2;
079c527f 2340
7abed779
JJ
2341 double_int oprnd0_min, oprnd0_max;
2342 int msb = 1;
2343 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2344 {
2345 if (!oprnd0_min.is_negative ())
2346 msb = 0;
2347 else if (oprnd0_max.is_negative ())
2348 msb = -1;
2349 }
079c527f 2350
7abed779
JJ
2351 if (msb == 0 && d >= 0)
2352 {
2353 /* q = t3; */
2354 q = t3;
2355 pattern_stmt = def_stmt;
2356 }
2357 else
2358 {
2359 /* t4 = oprnd0 >> (prec - 1);
2360 or if we know from VRP that oprnd0 >= 0
2361 t4 = 0;
2362 or if we know from VRP that oprnd0 < 0
2363 t4 = -1; */
2364 append_pattern_def_seq (stmt_vinfo, def_stmt);
2365 t4 = vect_recog_temp_ssa_var (itype, NULL);
2366 if (msb != 1)
2367 def_stmt
2368 = gimple_build_assign_with_ops (INTEGER_CST,
2369 t4, build_int_cst (itype, msb),
2370 NULL_TREE);
2371 else
2372 def_stmt
2373 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2374 build_int_cst (itype, prec - 1));
2375 append_pattern_def_seq (stmt_vinfo, def_stmt);
2376
2377 /* q = t3 - t4; or q = t4 - t3; */
2378 q = vect_recog_temp_ssa_var (itype, NULL);
2379 pattern_stmt
2380 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2381 d < 0 ? t3 : t4);
2382 }
079c527f
JJ
2383 }
2384
2385 if (rhs_code == TRUNC_MOD_EXPR)
2386 {
2387 tree r, t1;
2388
2389 /* We divided. Now finish by:
2390 t1 = q * oprnd1;
2391 r = oprnd0 - t1; */
2392 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2393
2394 t1 = vect_recog_temp_ssa_var (itype, NULL);
363477c0 2395 def_stmt
079c527f 2396 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
083481d8 2397 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 2398
079c527f 2399 r = vect_recog_temp_ssa_var (itype, NULL);
363477c0 2400 pattern_stmt
079c527f 2401 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
363477c0
JJ
2402 }
2403
079c527f 2404 /* Pattern detected. */
73fbfcad 2405 if (dump_enabled_p ())
78c60e3d 2406 {
ccb3ad87 2407 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2408 "vect_recog_divmod_pattern: detected: ");
ccb3ad87 2409 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 2410 dump_printf (MSG_NOTE, "\n");
78c60e3d 2411 }
363477c0 2412
9771b263 2413 stmts->safe_push (last_stmt);
363477c0
JJ
2414
2415 *type_in = vectype;
2416 *type_out = vectype;
2417 return pattern_stmt;
2418}
2419
69d2aade
JJ
2420/* Function vect_recog_mixed_size_cond_pattern
2421
2422 Try to find the following pattern:
2423
2424 type x_t, y_t;
2425 TYPE a_T, b_T, c_T;
2426 loop:
2427 S1 a_T = x_t CMP y_t ? b_T : c_T;
2428
2429 where type 'TYPE' is an integral type which has different size
bc4fb355 2430 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 2431 than 'type', the constants need to fit into an integer type
bc4fb355 2432 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
2433
2434 Input:
2435
2436 * LAST_STMT: A stmt from which the pattern search begins.
2437
2438 Output:
2439
2440 * TYPE_IN: The type of the input arguments to the pattern.
2441
2442 * TYPE_OUT: The type of the output of this pattern.
2443
2444 * Return value: A new stmt that will be used to replace the pattern.
2445 Additionally a def_stmt is added.
2446
2447 a_it = x_t CMP y_t ? b_it : c_it;
2448 a_T = (TYPE) a_it; */
2449
2450static gimple
9771b263 2451vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
69d2aade
JJ
2452 tree *type_out)
2453{
9771b263 2454 gimple last_stmt = (*stmts)[0];
69d2aade
JJ
2455 tree cond_expr, then_clause, else_clause;
2456 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
bc4fb355 2457 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
69d2aade
JJ
2458 enum machine_mode cmpmode;
2459 gimple pattern_stmt, def_stmt;
2460 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 2461 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
bc4fb355
IR
2462 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2463 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2464 bool promotion;
2465 tree comp_scalar_type;
69d2aade
JJ
2466
2467 if (!is_gimple_assign (last_stmt)
2468 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2469 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2470 return NULL;
2471
2472 cond_expr = gimple_assign_rhs1 (last_stmt);
2473 then_clause = gimple_assign_rhs2 (last_stmt);
2474 else_clause = gimple_assign_rhs3 (last_stmt);
2475
87aab9b2
JJ
2476 if (!COMPARISON_CLASS_P (cond_expr))
2477 return NULL;
2478
bc4fb355
IR
2479 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2480 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 2481 if (comp_vectype == NULL_TREE)
69d2aade
JJ
2482 return NULL;
2483
2484 type = gimple_expr_type (last_stmt);
bc4fb355
IR
2485 if (types_compatible_p (type, comp_scalar_type)
2486 || ((TREE_CODE (then_clause) != INTEGER_CST
2487 || TREE_CODE (else_clause) != INTEGER_CST)
2488 && !INTEGRAL_TYPE_P (comp_scalar_type))
2489 || !INTEGRAL_TYPE_P (type))
2490 return NULL;
2491
2492 if ((TREE_CODE (then_clause) != INTEGER_CST
2493 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2494 &def_stmt0, &promotion))
2495 || (TREE_CODE (else_clause) != INTEGER_CST
2496 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2497 &def_stmt1, &promotion)))
2498 return NULL;
2499
2500 if (orig_type0 && orig_type1
2501 && !types_compatible_p (orig_type0, orig_type1))
2502 return NULL;
2503
2504 if (orig_type0)
2505 {
2506 if (!types_compatible_p (orig_type0, comp_scalar_type))
2507 return NULL;
2508 then_clause = gimple_assign_rhs1 (def_stmt0);
2509 itype = orig_type0;
2510 }
2511
2512 if (orig_type1)
2513 {
2514 if (!types_compatible_p (orig_type1, comp_scalar_type))
2515 return NULL;
2516 else_clause = gimple_assign_rhs1 (def_stmt1);
2517 itype = orig_type1;
2518 }
2519
69d2aade
JJ
2520 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2521
2522 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2523 return NULL;
2524
2525 vectype = get_vectype_for_scalar_type (type);
2526 if (vectype == NULL_TREE)
2527 return NULL;
2528
2529 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2530 return NULL;
2531
bc4fb355
IR
2532 if (itype == NULL_TREE)
2533 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2534 TYPE_UNSIGNED (type));
2535
69d2aade
JJ
2536 if (itype == NULL_TREE
2537 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2538 return NULL;
2539
2540 vecitype = get_vectype_for_scalar_type (itype);
2541 if (vecitype == NULL_TREE)
2542 return NULL;
2543
2544 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2545 return NULL;
2546
2547 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2548 {
bc4fb355
IR
2549 if ((TREE_CODE (then_clause) == INTEGER_CST
2550 && !int_fits_type_p (then_clause, itype))
2551 || (TREE_CODE (else_clause) == INTEGER_CST
2552 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
2553 return NULL;
2554 }
2555
2556 def_stmt
73804b12
RG
2557 = gimple_build_assign_with_ops (COND_EXPR,
2558 vect_recog_temp_ssa_var (itype, NULL),
2559 unshare_expr (cond_expr),
2560 fold_convert (itype, then_clause),
2561 fold_convert (itype, else_clause));
69d2aade
JJ
2562 pattern_stmt
2563 = gimple_build_assign_with_ops (NOP_EXPR,
2564 vect_recog_temp_ssa_var (type, NULL),
2565 gimple_assign_lhs (def_stmt), NULL_TREE);
2566
083481d8 2567 new_pattern_def_seq (stmt_vinfo, def_stmt);
f5709183 2568 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
69d2aade
JJ
2569 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2570 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2571 *type_in = vecitype;
2572 *type_out = vectype;
2573
73fbfcad 2574 if (dump_enabled_p ())
ccb3ad87 2575 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2576 "vect_recog_mixed_size_cond_pattern: detected:\n");
f5709183 2577
69d2aade
JJ
2578 return pattern_stmt;
2579}
2580
2581
71c92d17
JJ
2582/* Helper function of vect_recog_bool_pattern. Called recursively, return
2583 true if bool VAR can be optimized that way. */
2584
2585static bool
f5709183 2586check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
71c92d17
JJ
2587{
2588 gimple def_stmt;
2589 enum vect_def_type dt;
2590 tree def, rhs1;
2591 enum tree_code rhs_code;
2592
f5709183
IR
2593 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2594 &dt))
71c92d17
JJ
2595 return false;
2596
2597 if (dt != vect_internal_def)
2598 return false;
2599
2600 if (!is_gimple_assign (def_stmt))
2601 return false;
2602
2603 if (!has_single_use (def))
2604 return false;
2605
2606 rhs1 = gimple_assign_rhs1 (def_stmt);
2607 rhs_code = gimple_assign_rhs_code (def_stmt);
2608 switch (rhs_code)
2609 {
2610 case SSA_NAME:
f5709183 2611 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2612
2613 CASE_CONVERT:
2614 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2615 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2616 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2617 return false;
f5709183 2618 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2619
2620 case BIT_NOT_EXPR:
f5709183 2621 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2622
2623 case BIT_AND_EXPR:
2624 case BIT_IOR_EXPR:
2625 case BIT_XOR_EXPR:
f5709183 2626 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
71c92d17 2627 return false;
f5709183
IR
2628 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2629 bb_vinfo);
71c92d17
JJ
2630
2631 default:
2632 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2633 {
2634 tree vecitype, comp_vectype;
2635
2f326699
JJ
2636 /* If the comparison can throw, then is_gimple_condexpr will be
2637 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2638 if (stmt_could_throw_p (def_stmt))
2639 return false;
2640
71c92d17
JJ
2641 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2642 if (comp_vectype == NULL_TREE)
2643 return false;
2644
2645 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2646 {
2647 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2648 tree itype
ab0ef706 2649 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2650 vecitype = get_vectype_for_scalar_type (itype);
2651 if (vecitype == NULL_TREE)
2652 return false;
2653 }
2654 else
2655 vecitype = comp_vectype;
2656 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2657 }
2658 return false;
2659 }
2660}
2661
2662
2663/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2664 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
363477c0 2665 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
71c92d17
JJ
2666
2667static tree
2668adjust_bool_pattern_cast (tree type, tree var)
2669{
2670 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2671 gimple cast_stmt, pattern_stmt;
2672
363477c0 2673 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
71c92d17 2674 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
083481d8 2675 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
71c92d17
JJ
2676 cast_stmt
2677 = gimple_build_assign_with_ops (NOP_EXPR,
2678 vect_recog_temp_ssa_var (type, NULL),
2679 gimple_assign_lhs (pattern_stmt),
2680 NULL_TREE);
2681 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2682 return gimple_assign_lhs (cast_stmt);
2683}
2684
2685
2686/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2687 recursively. VAR is an SSA_NAME that should be transformed from bool
2688 to a wider integer type, OUT_TYPE is the desired final integer type of
2689 the whole pattern, TRUEVAL should be NULL unless optimizing
2690 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2691 in the then_clause, STMTS is where statements with added pattern stmts
2692 should be pushed to. */
2693
2694static tree
2695adjust_bool_pattern (tree var, tree out_type, tree trueval,
9771b263 2696 vec<gimple> *stmts)
71c92d17
JJ
2697{
2698 gimple stmt = SSA_NAME_DEF_STMT (var);
2699 enum tree_code rhs_code, def_rhs_code;
2700 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2701 location_t loc;
2702 gimple pattern_stmt, def_stmt;
2703
2704 rhs1 = gimple_assign_rhs1 (stmt);
2705 rhs2 = gimple_assign_rhs2 (stmt);
2706 rhs_code = gimple_assign_rhs_code (stmt);
2707 loc = gimple_location (stmt);
2708 switch (rhs_code)
2709 {
2710 case SSA_NAME:
2711 CASE_CONVERT:
2712 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2713 itype = TREE_TYPE (irhs1);
2714 pattern_stmt
2715 = gimple_build_assign_with_ops (SSA_NAME,
2716 vect_recog_temp_ssa_var (itype, NULL),
2717 irhs1, NULL_TREE);
2718 break;
2719
2720 case BIT_NOT_EXPR:
2721 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2722 itype = TREE_TYPE (irhs1);
2723 pattern_stmt
2724 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2725 vect_recog_temp_ssa_var (itype, NULL),
2726 irhs1, build_int_cst (itype, 1));
2727 break;
2728
2729 case BIT_AND_EXPR:
2730 /* Try to optimize x = y & (a < b ? 1 : 0); into
2731 x = (a < b ? y : 0);
2732
2733 E.g. for:
2734 bool a_b, b_b, c_b;
2735 TYPE d_T;
2736
2737 S1 a_b = x1 CMP1 y1;
2738 S2 b_b = x2 CMP2 y2;
2739 S3 c_b = a_b & b_b;
2740 S4 d_T = (TYPE) c_b;
2741
2742 we would normally emit:
2743
2744 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2745 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2746 S3' c_T = a_T & b_T;
2747 S4' d_T = c_T;
2748
2749 but we can save one stmt by using the
2750 result of one of the COND_EXPRs in the other COND_EXPR and leave
2751 BIT_AND_EXPR stmt out:
2752
2753 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2754 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2755 S4' f_T = c_T;
2756
2757 At least when VEC_COND_EXPR is implemented using masks
2758 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2759 computes the comparison masks and ands it, in one case with
2760 all ones vector, in the other case with a vector register.
2761 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2762 often more expensive. */
2763 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2764 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2765 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2766 {
2767 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2768 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2769 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2770 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2771 {
2772 gimple tstmt;
2773 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2774 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
9771b263 2775 tstmt = stmts->pop ();
71c92d17 2776 gcc_assert (tstmt == def_stmt);
9771b263 2777 stmts->quick_push (stmt);
71c92d17
JJ
2778 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2779 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2780 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2781 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2782 return irhs2;
2783 }
2784 else
2785 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2786 goto and_ior_xor;
2787 }
2788 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2789 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2790 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2791 {
2792 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2793 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2794 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2795 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2796 {
2797 gimple tstmt;
2798 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2799 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
9771b263 2800 tstmt = stmts->pop ();
71c92d17 2801 gcc_assert (tstmt == def_stmt);
9771b263 2802 stmts->quick_push (stmt);
71c92d17
JJ
2803 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2804 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2805 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2806 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2807 return irhs1;
2808 }
2809 else
2810 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2811 goto and_ior_xor;
2812 }
2813 /* FALLTHRU */
2814 case BIT_IOR_EXPR:
2815 case BIT_XOR_EXPR:
2816 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2817 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2818 and_ior_xor:
2819 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2820 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2821 {
2822 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2823 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2824 int out_prec = TYPE_PRECISION (out_type);
2825 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2826 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2827 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2828 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2829 else
2830 {
2831 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2832 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2833 }
2834 }
2835 itype = TREE_TYPE (irhs1);
2836 pattern_stmt
2837 = gimple_build_assign_with_ops (rhs_code,
2838 vect_recog_temp_ssa_var (itype, NULL),
2839 irhs1, irhs2);
2840 break;
2841
2842 default:
2843 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2844 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2
JJ
2845 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2846 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2847 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17
JJ
2848 {
2849 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2850 itype
ab0ef706 2851 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2852 }
2853 else
2854 itype = TREE_TYPE (rhs1);
2855 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2856 if (trueval == NULL_TREE)
2857 trueval = build_int_cst (itype, 1);
2858 else
2859 gcc_checking_assert (useless_type_conversion_p (itype,
2860 TREE_TYPE (trueval)));
2861 pattern_stmt
73804b12
RG
2862 = gimple_build_assign_with_ops (COND_EXPR,
2863 vect_recog_temp_ssa_var (itype, NULL),
2864 cond_expr, trueval,
2865 build_int_cst (itype, 0));
71c92d17
JJ
2866 break;
2867 }
2868
9771b263 2869 stmts->safe_push (stmt);
71c92d17
JJ
2870 gimple_set_location (pattern_stmt, loc);
2871 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2872 return gimple_assign_lhs (pattern_stmt);
2873}
2874
2875
2876/* Function vect_recog_bool_pattern
2877
2878 Try to find pattern like following:
2879
2880 bool a_b, b_b, c_b, d_b, e_b;
2881 TYPE f_T;
2882 loop:
2883 S1 a_b = x1 CMP1 y1;
2884 S2 b_b = x2 CMP2 y2;
2885 S3 c_b = a_b & b_b;
2886 S4 d_b = x3 CMP3 y3;
2887 S5 e_b = c_b | d_b;
2888 S6 f_T = (TYPE) e_b;
2889
2890 where type 'TYPE' is an integral type.
2891
2892 Input:
2893
2894 * LAST_STMT: A stmt at the end from which the pattern
2895 search begins, i.e. cast of a bool to
2896 an integer type.
2897
2898 Output:
2899
2900 * TYPE_IN: The type of the input arguments to the pattern.
2901
2902 * TYPE_OUT: The type of the output of this pattern.
2903
2904 * Return value: A new stmt that will be used to replace the pattern.
2905
2906 Assuming size of TYPE is the same as size of all comparisons
2907 (otherwise some casts would be added where needed), the above
2908 sequence we create related pattern stmts:
2909 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2910 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2911 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2912 S5' e_T = c_T | d_T;
2913 S6' f_T = e_T;
2914
2915 Instead of the above S3' we could emit:
2916 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2917 S3' c_T = a_T | b_T;
2918 but the above is more efficient. */
2919
2920static gimple
9771b263 2921vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
71c92d17
JJ
2922 tree *type_out)
2923{
9771b263 2924 gimple last_stmt = stmts->pop ();
71c92d17
JJ
2925 enum tree_code rhs_code;
2926 tree var, lhs, rhs, vectype;
2927 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2928 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 2929 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
71c92d17
JJ
2930 gimple pattern_stmt;
2931
2932 if (!is_gimple_assign (last_stmt))
2933 return NULL;
2934
2935 var = gimple_assign_rhs1 (last_stmt);
2936 lhs = gimple_assign_lhs (last_stmt);
2937
2938 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2939 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2940 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2941 return NULL;
2942
2943 rhs_code = gimple_assign_rhs_code (last_stmt);
2944 if (CONVERT_EXPR_CODE_P (rhs_code))
2945 {
78048b1c
JJ
2946 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2947 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
2948 return NULL;
2949 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2950 if (vectype == NULL_TREE)
2951 return NULL;
2952
f5709183 2953 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
71c92d17
JJ
2954 return NULL;
2955
2956 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2957 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2958 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2959 pattern_stmt
2960 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2961 else
2962 pattern_stmt
2963 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2964 *type_out = vectype;
2965 *type_in = vectype;
9771b263 2966 stmts->safe_push (last_stmt);
73fbfcad 2967 if (dump_enabled_p ())
ccb3ad87 2968 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2969 "vect_recog_bool_pattern: detected:\n");
f5709183 2970
71c92d17
JJ
2971 return pattern_stmt;
2972 }
ab0ef706
JJ
2973 else if (rhs_code == SSA_NAME
2974 && STMT_VINFO_DATA_REF (stmt_vinfo))
2975 {
2976 stmt_vec_info pattern_stmt_info;
2977 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2978 gcc_assert (vectype != NULL_TREE);
78336739
JJ
2979 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2980 return NULL;
f5709183 2981 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
ab0ef706
JJ
2982 return NULL;
2983
2984 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2985 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2986 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2987 {
2988 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2989 gimple cast_stmt
2990 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
083481d8 2991 new_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
2992 rhs = rhs2;
2993 }
2994 pattern_stmt
2995 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
f5709183
IR
2996 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2997 bb_vinfo);
ab0ef706
JJ
2998 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2999 STMT_VINFO_DATA_REF (pattern_stmt_info)
3000 = STMT_VINFO_DATA_REF (stmt_vinfo);
3001 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3002 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3003 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3004 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3005 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3006 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3007 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3008 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
78048b1c 3009 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
ab0ef706
JJ
3010 *type_out = vectype;
3011 *type_in = vectype;
9771b263 3012 stmts->safe_push (last_stmt);
73fbfcad 3013 if (dump_enabled_p ())
ccb3ad87 3014 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3015 "vect_recog_bool_pattern: detected:\n");
ab0ef706
JJ
3016 return pattern_stmt;
3017 }
71c92d17
JJ
3018 else
3019 return NULL;
3020}
3021
3022
1107f3ae
IR
3023/* Mark statements that are involved in a pattern. */
3024
3025static inline void
3026vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3027 tree pattern_vectype)
3028{
3029 stmt_vec_info pattern_stmt_info, def_stmt_info;
3030 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3031 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
f5709183 3032 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
1107f3ae
IR
3033 gimple def_stmt;
3034
1107f3ae 3035 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
ab0ef706
JJ
3036 if (pattern_stmt_info == NULL)
3037 {
f5709183
IR
3038 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3039 bb_vinfo);
ab0ef706
JJ
3040 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3041 }
3042 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1107f3ae
IR
3043
3044 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3045 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
ab0ef706 3046 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1107f3ae
IR
3047 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3048 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3049 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
363477c0
JJ
3050 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3051 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3052 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
1107f3ae 3053 {
363477c0
JJ
3054 gimple_stmt_iterator si;
3055 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3056 !gsi_end_p (si); gsi_next (&si))
69d2aade 3057 {
363477c0
JJ
3058 def_stmt = gsi_stmt (si);
3059 def_stmt_info = vinfo_for_stmt (def_stmt);
3060 if (def_stmt_info == NULL)
3061 {
f5709183
IR
3062 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3063 bb_vinfo);
363477c0
JJ
3064 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3065 }
3066 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3067 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
3068 STMT_VINFO_DEF_TYPE (def_stmt_info)
3069 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
3070 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3071 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
69d2aade 3072 }
1107f3ae
IR
3073 }
3074}
3075
b8698a0f 3076/* Function vect_pattern_recog_1
20f06221
DN
3077
3078 Input:
3079 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3080 computation pattern.
3081 STMT: A stmt from which the pattern search should start.
3082
3083 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
3084 expression that computes the same functionality and can be used to
3085 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
3086
3087 Output:
b8698a0f
L
3088 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3089 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3090 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
3091 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3092 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3093 to the available target pattern.
3094
b8698a0f 3095 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
3096 for vect_recog_pattern. */
3097
3098static void
92aea285
JJ
3099vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3100 gimple_stmt_iterator si,
9771b263 3101 vec<gimple> *stmts_to_replace)
20f06221 3102{
726a989a 3103 gimple stmt = gsi_stmt (si), pattern_stmt;
383d9c83 3104 stmt_vec_info stmt_info;
383d9c83 3105 loop_vec_info loop_vinfo;
20f06221
DN
3106 tree pattern_vectype;
3107 tree type_in, type_out;
20f06221 3108 enum tree_code code;
b5aeb3bb
IR
3109 int i;
3110 gimple next;
20f06221 3111
9771b263
DN
3112 stmts_to_replace->truncate (0);
3113 stmts_to_replace->quick_push (stmt);
d1fc143d 3114 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
726a989a 3115 if (!pattern_stmt)
b8698a0f
L
3116 return;
3117
9771b263 3118 stmt = stmts_to_replace->last ();
383d9c83
IR
3119 stmt_info = vinfo_for_stmt (stmt);
3120 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3121
b8698a0f
L
3122 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3123 {
3124 /* No need to check target support (already checked by the pattern
3125 recognition function). */
b690cc0f 3126 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
3127 }
3128 else
3129 {
32e8bb8e 3130 enum machine_mode vec_mode;
20f06221
DN
3131 enum insn_code icode;
3132 optab optab;
3133
3134 /* Check target support */
b690cc0f
RG
3135 type_in = get_vectype_for_scalar_type (type_in);
3136 if (!type_in)
3137 return;
3138 if (type_out)
3139 type_out = get_vectype_for_scalar_type (type_out);
3140 else
3141 type_out = type_in;
15bbc165
AO
3142 if (!type_out)
3143 return;
b690cc0f 3144 pattern_vectype = type_out;
03d3e953 3145
726a989a
RB
3146 if (is_gimple_assign (pattern_stmt))
3147 code = gimple_assign_rhs_code (pattern_stmt);
3148 else
3149 {
3150 gcc_assert (is_gimple_call (pattern_stmt));
3151 code = CALL_EXPR;
3152 }
3153
b690cc0f
RG
3154 optab = optab_for_tree_code (code, type_in, optab_default);
3155 vec_mode = TYPE_MODE (type_in);
20f06221 3156 if (!optab
947131ba 3157 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 3158 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
20f06221
DN
3159 return;
3160 }
3161
3162 /* Found a vectorizable pattern. */
73fbfcad 3163 if (dump_enabled_p ())
20f06221 3164 {
ccb3ad87 3165 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3166 "pattern recognized: ");
ccb3ad87 3167 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 3168 dump_printf (MSG_NOTE, "\n");
20f06221 3169 }
b8698a0f 3170
726a989a 3171 /* Mark the stmts that are involved in the pattern. */
1107f3ae 3172 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 3173
b5aeb3bb
IR
3174 /* Patterns cannot be vectorized using SLP, because they change the order of
3175 computation. */
f5709183 3176 if (loop_vinfo)
9771b263 3177 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
f5709183 3178 if (next == stmt)
9771b263 3179 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
51312233 3180
1107f3ae
IR
3181 /* It is possible that additional pattern stmts are created and inserted in
3182 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3183 relevant statements. */
9771b263
DN
3184 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3185 && (unsigned) i < (stmts_to_replace->length () - 1);
51312233
IR
3186 i++)
3187 {
3188 stmt_info = vinfo_for_stmt (stmt);
3189 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
73fbfcad 3190 if (dump_enabled_p ())
51312233 3191 {
ccb3ad87 3192 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3193 "additional pattern stmt: ");
ccb3ad87 3194 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 3195 dump_printf (MSG_NOTE, "\n");
51312233
IR
3196 }
3197
1107f3ae 3198 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
51312233 3199 }
20f06221
DN
3200}
3201
3202
3203/* Function vect_pattern_recog
3204
3205 Input:
3206 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3207 computation idioms.
3208
9d5e7640
IR
3209 Output - for each computation idiom that is detected we create a new stmt
3210 that provides the same functionality and that can be vectorized. We
20f06221
DN
3211 also record some information in the struct_stmt_info of the relevant
3212 stmts, as explained below:
3213
3214 At the entry to this function we have the following stmts, with the
3215 following initial value in the STMT_VINFO fields:
3216
3217 stmt in_pattern_p related_stmt vec_stmt
3218 S1: a_i = .... - - -
3219 S2: a_2 = ..use(a_i).. - - -
3220 S3: a_1 = ..use(a_2).. - - -
3221 S4: a_0 = ..use(a_1).. - - -
3222 S5: ... = ..use(a_0).. - - -
3223
3224 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
3225 represented by a single stmt. We then:
3226 - create a new stmt S6 equivalent to the pattern (the stmt is not
3227 inserted into the code)
20f06221
DN
3228 - fill in the STMT_VINFO fields as follows:
3229
3230 in_pattern_p related_stmt vec_stmt
b8698a0f 3231 S1: a_i = .... - - -
20f06221
DN
3232 S2: a_2 = ..use(a_i).. - - -
3233 S3: a_1 = ..use(a_2).. - - -
20f06221 3234 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 3235 '---> S6: a_new = .... - S4 -
20f06221
DN
3236 S5: ... = ..use(a_0).. - - -
3237
3238 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 3239 to each other through the RELATED_STMT field).
20f06221
DN
3240
3241 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3242 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3243 remain irrelevant unless used by stmts other than S4.
3244
3245 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 3246 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
3247 a pointer to the new vector stmt VS6 from S6 (as usual).
3248 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
3249
3250 in_pattern_p related_stmt vec_stmt
3251 S1: a_i = .... - - -
3252 S2: a_2 = ..use(a_i).. - - -
3253 S3: a_1 = ..use(a_2).. - - -
3254 > VS6: va_new = .... - - -
20f06221 3255 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 3256 '---> S6: a_new = .... - S4 VS6
20f06221
DN
3257 > VS5: ... = ..vuse(va_new).. - - -
3258 S5: ... = ..use(a_0).. - - -
3259
9d5e7640 3260 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
3261 elsewhere), and we'll end up with:
3262
b8698a0f 3263 VS6: va_new = ....
83197f37
IR
3264 VS5: ... = ..vuse(va_new)..
3265
3266 In case of more than one pattern statements, e.g., widen-mult with
3267 intermediate type:
3268
3269 S1 a_t = ;
3270 S2 a_T = (TYPE) a_t;
3271 '--> S3: a_it = (interm_type) a_t;
3272 S4 prod_T = a_T * CONST;
3273 '--> S5: prod_T' = a_it w* CONST;
3274
3275 there may be other users of a_T outside the pattern. In that case S2 will
3276 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3277 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3278 be recorded in S3. */
20f06221
DN
3279
3280void
f5709183 3281vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
20f06221 3282{
f5709183 3283 struct loop *loop;
772e61e1 3284 basic_block *bbs;
f5709183 3285 unsigned int nbbs;
726a989a 3286 gimple_stmt_iterator si;
20f06221 3287 unsigned int i, j;
92aea285 3288 vect_recog_func_ptr vect_recog_func;
00f96dc9 3289 auto_vec<gimple, 1> stmts_to_replace;
f5709183 3290 gimple stmt;
20f06221 3291
73fbfcad 3292 if (dump_enabled_p ())
78c60e3d 3293 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3294 "=== vect_pattern_recog ===\n");
20f06221 3295
f5709183
IR
3296 if (loop_vinfo)
3297 {
3298 loop = LOOP_VINFO_LOOP (loop_vinfo);
3299 bbs = LOOP_VINFO_BBS (loop_vinfo);
3300 nbbs = loop->num_nodes;
3301 }
3302 else
3303 {
772e61e1 3304 bbs = &BB_VINFO_BB (bb_vinfo);
f5709183 3305 nbbs = 1;
f5709183
IR
3306 }
3307
20f06221
DN
3308 /* Scan through the loop stmts, applying the pattern recognition
3309 functions starting at each stmt visited: */
3310 for (i = 0; i < nbbs; i++)
3311 {
3312 basic_block bb = bbs[i];
726a989a 3313 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
20f06221 3314 {
f5709183
IR
3315 if (bb_vinfo && (stmt = gsi_stmt (si))
3316 && vinfo_for_stmt (stmt)
3317 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3318 continue;
3319
20f06221
DN
3320 /* Scan over all generic vect_recog_xxx_pattern functions. */
3321 for (j = 0; j < NUM_PATTERNS; j++)
3322 {
92aea285
JJ
3323 vect_recog_func = vect_vect_recog_func_ptrs[j];
3324 vect_pattern_recog_1 (vect_recog_func, si,
d1fc143d 3325 &stmts_to_replace);
20f06221
DN
3326 }
3327 }
3328 }
3329}