]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
This patch rewrites the old VEC macro-based interface into a new one
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
4a61a337 1/* Analysis Utilities for Loop Vectorization.
3a542b98 2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
777e6799 3 Free Software Foundation, Inc.
4a61a337 4 Contributed by Dorit Nuzman <dorit@il.ibm.com>
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
8c4c00c1 10Software Foundation; either version 3, or (at your option) any later
4a61a337 11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
8c4c00c1 19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
4a61a337 21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
25#include "tm.h"
26#include "ggc.h"
27#include "tree.h"
4a61a337 28#include "target.h"
29#include "basic-block.h"
ce084dfc 30#include "gimple-pretty-print.h"
4a61a337 31#include "tree-flow.h"
4a61a337 32#include "cfgloop.h"
33#include "expr.h"
34#include "optabs.h"
35#include "params.h"
36#include "tree-data-ref.h"
37#include "tree-vectorizer.h"
b9ed1410 38#include "recog.h" /* FIXME: for insn_data */
0b205f4c 39#include "diagnostic-core.h"
b9ed1410 40#include "dumpfile.h"
4a61a337 41
4a61a337 42/* Pattern recognition functions */
f1f41a6c 43static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
0187b74e 44 tree *);
f1f41a6c 45static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
0187b74e 46 tree *);
f1f41a6c 47static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
0187b74e 48 tree *);
f1f41a6c 49static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
50static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
45eea33f 51 tree *);
f1f41a6c 52static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
6083c152 53 tree *, tree *);
f1f41a6c 54static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
40dfedc8 55 tree *, tree *);
f1f41a6c 56static gimple vect_recog_divmod_pattern (vec<gimple> *,
127cb1cd 57 tree *, tree *);
f1f41a6c 58static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
84557284 59 tree *, tree *);
f1f41a6c 60static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
4a61a337 61static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
62 vect_recog_widen_mult_pattern,
63 vect_recog_widen_sum_pattern,
c37bea13 64 vect_recog_dot_prod_pattern,
45eea33f 65 vect_recog_pow_pattern,
6083c152 66 vect_recog_widen_shift_pattern,
da5b41a4 67 vect_recog_over_widening_pattern,
40dfedc8 68 vect_recog_vector_vector_shift_pattern,
127cb1cd 69 vect_recog_divmod_pattern,
50f85e2e 70 vect_recog_mixed_size_cond_pattern,
71 vect_recog_bool_pattern};
4a61a337 72
6d741312 73static inline void
74append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
75{
e0d98d5f 76 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
77 stmt);
6d741312 78}
79
80static inline void
81new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
82{
83 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
84 append_pattern_def_seq (stmt_info, stmt);
85}
86
33f33894 87/* Check whether STMT2 is in the same loop or basic block as STMT1.
88 Which of the two applies depends on whether we're currently doing
89 loop-based or basic-block-based vectorization, as determined by
90 the vinfo_for_stmt for STMT1 (which must be defined).
91
92 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
93 to be defined as well. */
94
95static bool
96vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
97{
98 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
99 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
100 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
101
102 if (!gimple_bb (stmt2))
103 return false;
104
105 if (loop_vinfo)
106 {
107 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
108 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
109 return false;
110 }
111 else
112 {
113 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
114 || gimple_code (stmt2) == GIMPLE_PHI)
115 return false;
116 }
117
118 gcc_assert (vinfo_for_stmt (stmt2));
119 return true;
120}
121
019bbf38 122/* If the LHS of DEF_STMT has a single use, and that statement is
123 in the same loop or basic block, return it. */
124
125static gimple
126vect_single_imm_use (gimple def_stmt)
127{
128 tree lhs = gimple_assign_lhs (def_stmt);
129 use_operand_p use_p;
130 gimple use_stmt;
131
132 if (!single_imm_use (lhs, &use_p, &use_stmt))
133 return NULL;
134
135 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
136 return NULL;
137
138 return use_stmt;
139}
140
087903db 141/* Check whether NAME, an ssa-name used in USE_STMT,
142 is a result of a type promotion or demotion, such that:
4a61a337 143 DEF_STMT: NAME = NOP (name0)
087903db 144 where the type of name0 (ORIG_TYPE) is smaller/bigger than the type of NAME.
0eee81bc 145 If CHECK_SIGN is TRUE, check that either both types are signed or both are
146 unsigned. */
4a61a337 147
148static bool
087903db 149type_conversion_p (tree name, gimple use_stmt, bool check_sign,
150 tree *orig_type, gimple *def_stmt, bool *promotion)
4a61a337 151{
152 tree dummy;
75a70cf9 153 gimple dummy_gimple;
4a61a337 154 loop_vec_info loop_vinfo;
155 stmt_vec_info stmt_vinfo;
4a61a337 156 tree type = TREE_TYPE (name);
157 tree oprnd0;
158 enum vect_def_type dt;
159 tree def;
4c0c783a 160 bb_vec_info bb_vinfo;
4a61a337 161
162 stmt_vinfo = vinfo_for_stmt (use_stmt);
163 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 164 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
165 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
166 &def, &dt))
4a61a337 167 return false;
168
f083cd24 169 if (dt != vect_internal_def
170 && dt != vect_external_def && dt != vect_constant_def)
4a61a337 171 return false;
172
087903db 173 if (!*def_stmt)
4a61a337 174 return false;
175
75a70cf9 176 if (!is_gimple_assign (*def_stmt))
4a61a337 177 return false;
178
087903db 179 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
4a61a337 180 return false;
181
75a70cf9 182 oprnd0 = gimple_assign_rhs1 (*def_stmt);
4a61a337 183
087903db 184 *orig_type = TREE_TYPE (oprnd0);
185 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
186 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
187 return false;
188
189 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
190 *promotion = true;
191 else if (TYPE_PRECISION (*orig_type) >= (TYPE_PRECISION (type) * 2))
192 *promotion = false;
193 else
4a61a337 194 return false;
195
bed8b93b 196 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
4c0c783a 197 bb_vinfo, &dummy_gimple, &dummy, &dt))
4a61a337 198 return false;
199
4a61a337 200 return true;
201}
202
75a70cf9 203/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
204 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
205
206static tree
207vect_recog_temp_ssa_var (tree type, gimple stmt)
208{
03d37e4e 209 return make_temp_ssa_name (type, stmt, "patt");
75a70cf9 210}
4a61a337 211
212/* Function vect_recog_dot_prod_pattern
213
214 Try to find the following pattern:
215
216 type x_t, y_t;
217 TYPE1 prod;
218 TYPE2 sum = init;
219 loop:
220 sum_0 = phi <init, sum_1>
221 S1 x_t = ...
222 S2 y_t = ...
223 S3 x_T = (TYPE1) x_t;
224 S4 y_T = (TYPE1) y_t;
225 S5 prod = x_T * y_T;
226 [S6 prod = (TYPE2) prod; #optional]
227 S7 sum_1 = prod + sum_0;
228
48e1416a 229 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
230 same size of 'TYPE1' or bigger. This is a special case of a reduction
4a61a337 231 computation.
48e1416a 232
4a61a337 233 Input:
234
0187b74e 235 * STMTS: Contains a stmt from which the pattern search begins. In the
236 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
237 will be detected.
4a61a337 238
239 Output:
240
241 * TYPE_IN: The type of the input arguments to the pattern.
242
243 * TYPE_OUT: The type of the output of this pattern.
244
245 * Return value: A new stmt that will be used to replace the sequence of
246 stmts that constitute the pattern. In this case it will be:
247 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
221e9a92 248
249 Note: The dot-prod idiom is a widening reduction pattern that is
250 vectorized without preserving all the intermediate results. It
251 produces only N/2 (widened) results (by summing up pairs of
252 intermediate results) rather than all N results. Therefore, we
253 cannot allow this pattern when we want to get all the results and in
254 the correct order (as is the case when this computation is in an
255 inner-loop nested in an outer-loop that us being vectorized). */
4a61a337 256
75a70cf9 257static gimple
f1f41a6c 258vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
0187b74e 259 tree *type_out)
4a61a337 260{
f1f41a6c 261 gimple stmt, last_stmt = (*stmts)[0];
4a61a337 262 tree oprnd0, oprnd1;
263 tree oprnd00, oprnd01;
0187b74e 264 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
4a61a337 265 tree type, half_type;
75a70cf9 266 gimple pattern_stmt;
4a61a337 267 tree prod_type;
221e9a92 268 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 269 struct loop *loop;
c86930b0 270 tree var;
087903db 271 bool promotion;
4a61a337 272
4c0c783a 273 if (!loop_info)
274 return NULL;
275
276 loop = LOOP_VINFO_LOOP (loop_info);
277
0187b74e 278 if (!is_gimple_assign (last_stmt))
4a61a337 279 return NULL;
280
0187b74e 281 type = gimple_expr_type (last_stmt);
4a61a337 282
48e1416a 283 /* Look for the following pattern
4a61a337 284 DX = (TYPE1) X;
285 DY = (TYPE1) Y;
48e1416a 286 DPROD = DX * DY;
4a61a337 287 DDPROD = (TYPE2) DPROD;
288 sum_1 = DDPROD + sum_0;
48e1416a 289 In which
4a61a337 290 - DX is double the size of X
291 - DY is double the size of Y
292 - DX, DY, DPROD all have the same type
293 - sum is the same size of DPROD or bigger
294 - sum has been recognized as a reduction variable.
295
296 This is equivalent to:
297 DPROD = X w* Y; #widen mult
298 sum_1 = DPROD w+ sum_0; #widen summation
299 or
300 DPROD = X w* Y; #widen mult
301 sum_1 = DPROD + sum_0; #summation
302 */
303
304 /* Starting from LAST_STMT, follow the defs of its uses in search
305 of the above pattern. */
306
0187b74e 307 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
4a61a337 308 return NULL;
309
310 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
311 {
312 /* Has been detected as widening-summation? */
313
314 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
75a70cf9 315 type = gimple_expr_type (stmt);
316 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
4a61a337 317 return NULL;
75a70cf9 318 oprnd0 = gimple_assign_rhs1 (stmt);
319 oprnd1 = gimple_assign_rhs2 (stmt);
4a61a337 320 half_type = TREE_TYPE (oprnd0);
321 }
322 else
323 {
75a70cf9 324 gimple def_stmt;
4a61a337 325
326 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
327 return NULL;
0187b74e 328 oprnd0 = gimple_assign_rhs1 (last_stmt);
329 oprnd1 = gimple_assign_rhs2 (last_stmt);
1ea6a73c 330 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
331 || !types_compatible_p (TREE_TYPE (oprnd1), type))
4a61a337 332 return NULL;
0187b74e 333 stmt = last_stmt;
4a61a337 334
087903db 335 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
336 &promotion)
337 && promotion)
4a61a337 338 {
339 stmt = def_stmt;
75a70cf9 340 oprnd0 = gimple_assign_rhs1 (stmt);
4a61a337 341 }
342 else
343 half_type = type;
344 }
345
0187b74e 346 /* So far so good. Since last_stmt was detected as a (summation) reduction,
4a61a337 347 we know that oprnd1 is the reduction variable (defined by a loop-header
348 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
349 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1991698f 350 if (TREE_CODE (oprnd0) != SSA_NAME)
351 return NULL;
4a61a337 352
353 prod_type = half_type;
354 stmt = SSA_NAME_DEF_STMT (oprnd0);
4af22cd9 355
356 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
4ba74662 357 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
4af22cd9 358 return NULL;
359
48e1416a 360 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
e3e3ee4b 361 inside the loop (in case we are analyzing an outer-loop). */
75a70cf9 362 if (!is_gimple_assign (stmt))
48e1416a 363 return NULL;
4a61a337 364 stmt_vinfo = vinfo_for_stmt (stmt);
365 gcc_assert (stmt_vinfo);
f083cd24 366 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
23d9a167 367 return NULL;
75a70cf9 368 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
4a61a337 369 return NULL;
370 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
371 {
372 /* Has been detected as a widening multiplication? */
373
374 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
75a70cf9 375 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
4a61a337 376 return NULL;
377 stmt_vinfo = vinfo_for_stmt (stmt);
378 gcc_assert (stmt_vinfo);
f083cd24 379 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
75a70cf9 380 oprnd00 = gimple_assign_rhs1 (stmt);
381 oprnd01 = gimple_assign_rhs2 (stmt);
4a61a337 382 }
383 else
384 {
385 tree half_type0, half_type1;
75a70cf9 386 gimple def_stmt;
4a61a337 387 tree oprnd0, oprnd1;
388
75a70cf9 389 oprnd0 = gimple_assign_rhs1 (stmt);
390 oprnd1 = gimple_assign_rhs2 (stmt);
1ea6a73c 391 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
392 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
4a61a337 393 return NULL;
087903db 394 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
395 &promotion)
396 || !promotion)
4a61a337 397 return NULL;
75a70cf9 398 oprnd00 = gimple_assign_rhs1 (def_stmt);
087903db 399 if (!type_conversion_p (oprnd0, stmt, true, &half_type1, &def_stmt,
400 &promotion)
401 || !promotion)
4a61a337 402 return NULL;
75a70cf9 403 oprnd01 = gimple_assign_rhs1 (def_stmt);
1ea6a73c 404 if (!types_compatible_p (half_type0, half_type1))
4a61a337 405 return NULL;
406 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
407 return NULL;
408 }
409
410 half_type = TREE_TYPE (oprnd00);
411 *type_in = half_type;
412 *type_out = type;
48e1416a 413
4a61a337 414 /* Pattern detected. Create a stmt to be used to replace the pattern: */
75a70cf9 415 var = vect_recog_temp_ssa_var (type, NULL);
446e85eb 416 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
417 oprnd00, oprnd01, oprnd1);
48e1416a 418
6d8fb6cf 419 if (dump_enabled_p ())
4a61a337 420 {
7bd765d4 421 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
422 "vect_recog_dot_prod_pattern: detected: ");
423 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
4a61a337 424 }
221e9a92 425
426 /* We don't allow changing the order of the computation in the inner-loop
427 when doing outer-loop vectorization. */
0187b74e 428 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
221e9a92 429
75a70cf9 430 return pattern_stmt;
4a61a337 431}
48e1416a 432
0187b74e 433
6083c152 434/* Handle widening operation by a constant. At the moment we support MULT_EXPR
435 and LSHIFT_EXPR.
436
437 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
438 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
0187b74e 439
440 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
6083c152 441 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
442 that satisfies the above restrictions, we can perform a widening opeartion
443 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
444 with a_it = (interm_type) a_t; */
0187b74e 445
446static bool
6083c152 447vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
448 tree const_oprnd, tree *oprnd,
f1f41a6c 449 vec<gimple> *stmts, tree type,
6083c152 450 tree *half_type, gimple def_stmt)
0187b74e 451{
03d37e4e 452 tree new_type, new_oprnd;
0187b74e 453 gimple new_stmt;
454
6083c152 455 if (code != MULT_EXPR && code != LSHIFT_EXPR)
456 return false;
457
458 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
459 || (code == LSHIFT_EXPR
460 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
461 != 1))
462 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
0187b74e 463 {
464 /* CONST_OPRND is a constant of HALF_TYPE. */
465 *oprnd = gimple_assign_rhs1 (def_stmt);
466 return true;
467 }
468
33f33894 469 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
470 return false;
471
472 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
0187b74e 473 return false;
474
6083c152 475 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
0187b74e 476 a type 2 times bigger than HALF_TYPE. */
477 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
478 TYPE_UNSIGNED (type));
6083c152 479 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
480 || (code == LSHIFT_EXPR
481 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
0187b74e 482 return false;
483
6083c152 484 /* Use NEW_TYPE for widening operation. */
0187b74e 485 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
486 {
487 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
488 /* Check if the already created pattern stmt is what we need. */
489 if (!is_gimple_assign (new_stmt)
490 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
491 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
492 return false;
493
f1f41a6c 494 stmts->safe_push (def_stmt);
0187b74e 495 *oprnd = gimple_assign_lhs (new_stmt);
496 }
497 else
498 {
499 /* Create a_T = (NEW_TYPE) a_t; */
500 *oprnd = gimple_assign_rhs1 (def_stmt);
03d37e4e 501 new_oprnd = make_ssa_name (new_type, NULL);
0187b74e 502 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd,
503 NULL_TREE);
0187b74e 504 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
f1f41a6c 505 stmts->safe_push (def_stmt);
0187b74e 506 *oprnd = new_oprnd;
507 }
508
509 *half_type = new_type;
510 return true;
511}
512
513
4a61a337 514/* Function vect_recog_widen_mult_pattern
515
516 Try to find the following pattern:
517
518 type a_t, b_t;
519 TYPE a_T, b_T, prod_T;
520
521 S1 a_t = ;
522 S2 b_t = ;
523 S3 a_T = (TYPE) a_t;
524 S4 b_T = (TYPE) b_t;
525 S5 prod_T = a_T * b_T;
526
527 where type 'TYPE' is at least double the size of type 'type'.
528
c2c4377d 529 Also detect unsigned cases:
0eee81bc 530
531 unsigned type a_t, b_t;
532 unsigned TYPE u_prod_T;
533 TYPE a_T, b_T, prod_T;
534
535 S1 a_t = ;
536 S2 b_t = ;
537 S3 a_T = (TYPE) a_t;
538 S4 b_T = (TYPE) b_t;
539 S5 prod_T = a_T * b_T;
540 S6 u_prod_T = (unsigned TYPE) prod_T;
541
542 and multiplication by constants:
543
544 type a_t;
545 TYPE a_T, prod_T;
546
547 S1 a_t = ;
548 S3 a_T = (TYPE) a_t;
549 S5 prod_T = a_T * CONST;
550
0187b74e 551 A special case of multiplication by constants is when 'TYPE' is 4 times
552 bigger than 'type', but CONST fits an intermediate type 2 times smaller
553 than 'TYPE'. In that case we create an additional pattern stmt for S3
554 to create a variable of the intermediate type, and perform widen-mult
555 on the intermediate type as well:
556
557 type a_t;
558 interm_type a_it;
559 TYPE a_T, prod_T, prod_T';
560
561 S1 a_t = ;
562 S3 a_T = (TYPE) a_t;
563 '--> a_it = (interm_type) a_t;
564 S5 prod_T = a_T * CONST;
565 '--> prod_T' = a_it w* CONST;
4a61a337 566
0187b74e 567 Input/Output:
568
569 * STMTS: Contains a stmt from which the pattern search begins. In the
570 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
571 is detected. In case of unsigned widen-mult, the original stmt (S5) is
572 replaced with S6 in STMTS. In case of multiplication by a constant
573 of an intermediate type (the last case above), STMTS also contains S3
574 (inserted before S5).
4a61a337 575
576 Output:
577
578 * TYPE_IN: The type of the input arguments to the pattern.
579
0eee81bc 580 * TYPE_OUT: The type of the output of this pattern.
4a61a337 581
582 * Return value: A new stmt that will be used to replace the sequence of
0eee81bc 583 stmts that constitute the pattern. In this case it will be:
4a61a337 584 WIDEN_MULT <a_t, b_t>
585*/
586
75a70cf9 587static gimple
f1f41a6c 588vect_recog_widen_mult_pattern (vec<gimple> *stmts,
0187b74e 589 tree *type_in, tree *type_out)
4a61a337 590{
f1f41a6c 591 gimple last_stmt = stmts->pop ();
75a70cf9 592 gimple def_stmt0, def_stmt1;
c6c91d61 593 tree oprnd0, oprnd1;
594 tree type, half_type0, half_type1;
75a70cf9 595 gimple pattern_stmt;
0eee81bc 596 tree vectype, vectype_out = NULL_TREE;
75a70cf9 597 tree var;
c6c91d61 598 enum tree_code dummy_code;
862bb3cd 599 int dummy_int;
f1f41a6c 600 vec<tree> dummy_vec;
6083c152 601 bool op1_ok;
087903db 602 bool promotion;
c6c91d61 603
0187b74e 604 if (!is_gimple_assign (last_stmt))
c6c91d61 605 return NULL;
606
0187b74e 607 type = gimple_expr_type (last_stmt);
c6c91d61 608
609 /* Starting from LAST_STMT, follow the defs of its uses in search
610 of the above pattern. */
611
0187b74e 612 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
c6c91d61 613 return NULL;
614
0187b74e 615 oprnd0 = gimple_assign_rhs1 (last_stmt);
616 oprnd1 = gimple_assign_rhs2 (last_stmt);
1ea6a73c 617 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
618 || !types_compatible_p (TREE_TYPE (oprnd1), type))
c6c91d61 619 return NULL;
620
0eee81bc 621 /* Check argument 0. */
087903db 622 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
623 &promotion)
624 || !promotion)
625 return NULL;
0eee81bc 626 /* Check argument 1. */
087903db 627 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
628 &def_stmt1, &promotion);
c6c91d61 629
087903db 630 if (op1_ok && promotion)
0eee81bc 631 {
632 oprnd0 = gimple_assign_rhs1 (def_stmt0);
633 oprnd1 = gimple_assign_rhs1 (def_stmt1);
634 }
6083c152 635 else
0eee81bc 636 {
0187b74e 637 if (TREE_CODE (oprnd1) == INTEGER_CST
0eee81bc 638 && TREE_CODE (half_type0) == INTEGER_TYPE
6083c152 639 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
640 &oprnd0, stmts, type,
641 &half_type0, def_stmt0))
0187b74e 642 half_type1 = half_type0;
0eee81bc 643 else
644 return NULL;
645 }
646
647 /* Handle unsigned case. Look for
648 S6 u_prod_T = (unsigned TYPE) prod_T;
649 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
650 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
651 {
019bbf38 652 gimple use_stmt;
653 tree use_lhs;
0eee81bc 654 tree use_type;
655
656 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
657 return NULL;
658
019bbf38 659 use_stmt = vect_single_imm_use (last_stmt);
660 if (!use_stmt || !is_gimple_assign (use_stmt)
661 || gimple_assign_rhs_code (use_stmt) != NOP_EXPR)
0eee81bc 662 return NULL;
663
664 use_lhs = gimple_assign_lhs (use_stmt);
665 use_type = TREE_TYPE (use_lhs);
666 if (!INTEGRAL_TYPE_P (use_type)
667 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
668 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
669 return NULL;
670
671 type = use_type;
0187b74e 672 last_stmt = use_stmt;
0eee81bc 673 }
c6c91d61 674
1ea6a73c 675 if (!types_compatible_p (half_type0, half_type1))
c6c91d61 676 return NULL;
677
678 /* Pattern detected. */
6d8fb6cf 679 if (dump_enabled_p ())
7bd765d4 680 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
681 "vect_recog_widen_mult_pattern: detected: ");
c6c91d61 682
683 /* Check target support */
684 vectype = get_vectype_for_scalar_type (half_type0);
b334cbba 685 vectype_out = get_vectype_for_scalar_type (type);
f031fa03 686 if (!vectype
1548bce8 687 || !vectype_out
0187b74e 688 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
b334cbba 689 vectype_out, vectype,
087dde2d 690 &dummy_code, &dummy_code,
691 &dummy_int, &dummy_vec))
c6c91d61 692 return NULL;
693
694 *type_in = vectype;
b334cbba 695 *type_out = vectype_out;
c6c91d61 696
697 /* Pattern supported. Create a stmt to be used to replace the pattern: */
75a70cf9 698 var = vect_recog_temp_ssa_var (type, NULL);
699 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
700 oprnd1);
75a70cf9 701
6d8fb6cf 702 if (dump_enabled_p ())
7bd765d4 703 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
75a70cf9 704
f1f41a6c 705 stmts->safe_push (last_stmt);
75a70cf9 706 return pattern_stmt;
4a61a337 707}
708
709
c37bea13 710/* Function vect_recog_pow_pattern
711
712 Try to find the following pattern:
713
714 x = POW (y, N);
715
716 with POW being one of pow, powf, powi, powif and N being
717 either 2 or 0.5.
718
719 Input:
720
721 * LAST_STMT: A stmt from which the pattern search begins.
722
723 Output:
724
725 * TYPE_IN: The type of the input arguments to the pattern.
726
727 * TYPE_OUT: The type of the output of this pattern.
728
729 * Return value: A new stmt that will be used to replace the sequence of
730 stmts that constitute the pattern. In this case it will be:
75a70cf9 731 x = x * x
c37bea13 732 or
75a70cf9 733 x = sqrt (x)
c37bea13 734*/
735
75a70cf9 736static gimple
f1f41a6c 737vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
0187b74e 738 tree *type_out)
c37bea13 739{
f1f41a6c 740 gimple last_stmt = (*stmts)[0];
75a70cf9 741 tree fn, base, exp = NULL;
742 gimple stmt;
743 tree var;
c37bea13 744
0187b74e 745 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
c37bea13 746 return NULL;
747
0187b74e 748 fn = gimple_call_fndecl (last_stmt);
1bc5f74f 749 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
750 return NULL;
751
c37bea13 752 switch (DECL_FUNCTION_CODE (fn))
753 {
754 case BUILT_IN_POWIF:
755 case BUILT_IN_POWI:
756 case BUILT_IN_POWF:
757 case BUILT_IN_POW:
0187b74e 758 base = gimple_call_arg (last_stmt, 0);
759 exp = gimple_call_arg (last_stmt, 1);
c37bea13 760 if (TREE_CODE (exp) != REAL_CST
761 && TREE_CODE (exp) != INTEGER_CST)
75a70cf9 762 return NULL;
c37bea13 763 break;
764
75a70cf9 765 default:
766 return NULL;
c37bea13 767 }
768
769 /* We now have a pow or powi builtin function call with a constant
770 exponent. */
771
c37bea13 772 *type_out = NULL_TREE;
773
774 /* Catch squaring. */
775 if ((host_integerp (exp, 0)
776 && tree_low_cst (exp, 0) == 2)
777 || (TREE_CODE (exp) == REAL_CST
778 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
4957645d 779 {
780 *type_in = TREE_TYPE (base);
75a70cf9 781
782 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
783 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
75a70cf9 784 return stmt;
4957645d 785 }
c37bea13 786
787 /* Catch square root. */
788 if (TREE_CODE (exp) == REAL_CST
789 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
790 {
791 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
4957645d 792 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
793 if (*type_in)
794 {
75a70cf9 795 gimple stmt = gimple_build_call (newfn, 1, base);
796 if (vectorizable_function (stmt, *type_in, *type_in)
797 != NULL_TREE)
798 {
799 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
48e1416a 800 gimple_call_set_lhs (stmt, var);
75a70cf9 801 return stmt;
802 }
4957645d 803 }
c37bea13 804 }
805
75a70cf9 806 return NULL;
c37bea13 807}
808
809
4a61a337 810/* Function vect_recog_widen_sum_pattern
811
812 Try to find the following pattern:
813
48e1416a 814 type x_t;
4a61a337 815 TYPE x_T, sum = init;
816 loop:
817 sum_0 = phi <init, sum_1>
818 S1 x_t = *p;
819 S2 x_T = (TYPE) x_t;
820 S3 sum_1 = x_T + sum_0;
821
48e1416a 822 where type 'TYPE' is at least double the size of type 'type', i.e - we're
4a61a337 823 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
9ca2c29a 824 a special case of a reduction computation.
4a61a337 825
826 Input:
827
828 * LAST_STMT: A stmt from which the pattern search begins. In the example,
829 when this function is called with S3, the pattern {S2,S3} will be detected.
48e1416a 830
4a61a337 831 Output:
48e1416a 832
4a61a337 833 * TYPE_IN: The type of the input arguments to the pattern.
834
835 * TYPE_OUT: The type of the output of this pattern.
836
837 * Return value: A new stmt that will be used to replace the sequence of
838 stmts that constitute the pattern. In this case it will be:
839 WIDEN_SUM <x_t, sum_0>
221e9a92 840
48e1416a 841 Note: The widening-sum idiom is a widening reduction pattern that is
221e9a92 842 vectorized without preserving all the intermediate results. It
48e1416a 843 produces only N/2 (widened) results (by summing up pairs of
844 intermediate results) rather than all N results. Therefore, we
845 cannot allow this pattern when we want to get all the results and in
846 the correct order (as is the case when this computation is in an
221e9a92 847 inner-loop nested in an outer-loop that us being vectorized). */
4a61a337 848
75a70cf9 849static gimple
f1f41a6c 850vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
0187b74e 851 tree *type_out)
4a61a337 852{
f1f41a6c 853 gimple stmt, last_stmt = (*stmts)[0];
4a61a337 854 tree oprnd0, oprnd1;
0187b74e 855 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
4a61a337 856 tree type, half_type;
75a70cf9 857 gimple pattern_stmt;
221e9a92 858 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 859 struct loop *loop;
75a70cf9 860 tree var;
087903db 861 bool promotion;
4a61a337 862
4c0c783a 863 if (!loop_info)
864 return NULL;
865
866 loop = LOOP_VINFO_LOOP (loop_info);
867
0187b74e 868 if (!is_gimple_assign (last_stmt))
4a61a337 869 return NULL;
870
0187b74e 871 type = gimple_expr_type (last_stmt);
4a61a337 872
873 /* Look for the following pattern
874 DX = (TYPE) X;
875 sum_1 = DX + sum_0;
876 In which DX is at least double the size of X, and sum_1 has been
877 recognized as a reduction variable.
878 */
879
880 /* Starting from LAST_STMT, follow the defs of its uses in search
881 of the above pattern. */
882
0187b74e 883 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
4a61a337 884 return NULL;
885
886 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
887 return NULL;
888
0187b74e 889 oprnd0 = gimple_assign_rhs1 (last_stmt);
890 oprnd1 = gimple_assign_rhs2 (last_stmt);
1ea6a73c 891 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
892 || !types_compatible_p (TREE_TYPE (oprnd1), type))
4a61a337 893 return NULL;
894
0187b74e 895 /* So far so good. Since last_stmt was detected as a (summation) reduction,
4a61a337 896 we know that oprnd1 is the reduction variable (defined by a loop-header
897 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
898 Left to check that oprnd0 is defined by a cast from type 'type' to type
899 'TYPE'. */
900
087903db 901 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
902 &promotion)
903 || !promotion)
904 return NULL;
4a61a337 905
75a70cf9 906 oprnd0 = gimple_assign_rhs1 (stmt);
4a61a337 907 *type_in = half_type;
908 *type_out = type;
909
910 /* Pattern detected. Create a stmt to be used to replace the pattern: */
75a70cf9 911 var = vect_recog_temp_ssa_var (type, NULL);
912 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
913 oprnd0, oprnd1);
75a70cf9 914
6d8fb6cf 915 if (dump_enabled_p ())
4a61a337 916 {
7bd765d4 917 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
918 "vect_recog_widen_sum_pattern: detected: ");
919 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
4a61a337 920 }
221e9a92 921
922 /* We don't allow changing the order of the computation in the inner-loop
923 when doing outer-loop vectorization. */
0187b74e 924 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
221e9a92 925
75a70cf9 926 return pattern_stmt;
4a61a337 927}
928
929
45eea33f 930/* Return TRUE if the operation in STMT can be performed on a smaller type.
931
932 Input:
933 STMT - a statement to check.
934 DEF - we support operations with two operands, one of which is constant.
935 The other operand can be defined by a demotion operation, or by a
936 previous statement in a sequence of over-promoted operations. In the
937 later case DEF is used to replace that operand. (It is defined by a
938 pattern statement we created for the previous statement in the
939 sequence).
940
941 Input/output:
942 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
943 NULL, it's the type of DEF.
944 STMTS - additional pattern statements. If a pattern statement (type
945 conversion) is created in this function, its original statement is
946 added to STMTS.
947
948 Output:
949 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
950 operands to use in the new pattern statement for STMT (will be created
951 in vect_recog_over_widening_pattern ()).
952 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
953 statements for STMT: the first one is a type promotion and the second
954 one is the operation itself. We return the type promotion statement
18937389 955 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
45eea33f 956 the second pattern statement. */
957
958static bool
959vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
960 tree *op0, tree *op1, gimple *new_def_stmt,
f1f41a6c 961 vec<gimple> *stmts)
45eea33f 962{
963 enum tree_code code;
964 tree const_oprnd, oprnd;
03d37e4e 965 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
45eea33f 966 gimple def_stmt, new_stmt;
967 bool first = false;
087903db 968 bool promotion;
4c0c783a 969
bc4577c4 970 *op0 = NULL_TREE;
971 *op1 = NULL_TREE;
45eea33f 972 *new_def_stmt = NULL;
973
974 if (!is_gimple_assign (stmt))
975 return false;
976
977 code = gimple_assign_rhs_code (stmt);
978 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
979 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
980 return false;
981
982 oprnd = gimple_assign_rhs1 (stmt);
983 const_oprnd = gimple_assign_rhs2 (stmt);
984 type = gimple_expr_type (stmt);
985
986 if (TREE_CODE (oprnd) != SSA_NAME
987 || TREE_CODE (const_oprnd) != INTEGER_CST)
988 return false;
989
897c6e08 990 /* If oprnd has other uses besides that in stmt we cannot mark it
991 as being part of a pattern only. */
992 if (!has_single_use (oprnd))
993 return false;
994
45eea33f 995 /* If we are in the middle of a sequence, we use DEF from a previous
996 statement. Otherwise, OPRND has to be a result of type promotion. */
997 if (*new_type)
998 {
999 half_type = *new_type;
1000 oprnd = def;
1001 }
1002 else
1003 {
1004 first = true;
087903db 1005 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
33f33894 1006 &promotion)
1007 || !promotion
1008 || !vect_same_loop_or_bb_p (stmt, def_stmt))
45eea33f 1009 return false;
1010 }
1011
1012 /* Can we perform the operation on a smaller type? */
1013 switch (code)
1014 {
1015 case BIT_IOR_EXPR:
1016 case BIT_XOR_EXPR:
1017 case BIT_AND_EXPR:
1018 if (!int_fits_type_p (const_oprnd, half_type))
1019 {
1020 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1021 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1022 return false;
1023
1024 interm_type = build_nonstandard_integer_type (
1025 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1026 if (!int_fits_type_p (const_oprnd, interm_type))
1027 return false;
1028 }
1029
1030 break;
1031
1032 case LSHIFT_EXPR:
1033 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1034 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1035 return false;
1036
1037 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1038 (e.g., if the original value was char, the shift amount is at most 8
1039 if we want to use short). */
1040 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1041 return false;
1042
1043 interm_type = build_nonstandard_integer_type (
1044 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1045
1046 if (!vect_supportable_shift (code, interm_type))
1047 return false;
1048
1049 break;
1050
1051 case RSHIFT_EXPR:
1052 if (vect_supportable_shift (code, half_type))
1053 break;
1054
1055 /* Try intermediate type - HALF_TYPE is not supported. */
1056 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1057 return false;
1058
1059 interm_type = build_nonstandard_integer_type (
1060 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1061
1062 if (!vect_supportable_shift (code, interm_type))
1063 return false;
1064
1065 break;
1066
1067 default:
1068 gcc_unreachable ();
1069 }
1070
1071 /* There are four possible cases:
1072 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1073 the first statement in the sequence)
1074 a. The original, HALF_TYPE, is not enough - we replace the promotion
1075 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1076 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1077 promotion.
1078 2. OPRND is defined by a pattern statement we created.
1079 a. Its type is not sufficient for the operation, we create a new stmt:
1080 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1081 this statement in NEW_DEF_STMT, and it is later put in
18937389 1082 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
45eea33f 1083 b. OPRND is good to use in the new statement. */
1084 if (first)
1085 {
1086 if (interm_type)
1087 {
1088 /* Replace the original type conversion HALF_TYPE->TYPE with
1089 HALF_TYPE->INTERM_TYPE. */
1090 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1091 {
1092 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1093 /* Check if the already created pattern stmt is what we need. */
1094 if (!is_gimple_assign (new_stmt)
1095 || gimple_assign_rhs_code (new_stmt) != NOP_EXPR
1096 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1097 return false;
1098
f1f41a6c 1099 stmts->safe_push (def_stmt);
45eea33f 1100 oprnd = gimple_assign_lhs (new_stmt);
1101 }
1102 else
1103 {
1104 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1105 oprnd = gimple_assign_rhs1 (def_stmt);
03d37e4e 1106 new_oprnd = make_ssa_name (interm_type, NULL);
45eea33f 1107 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1108 oprnd, NULL_TREE);
45eea33f 1109 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
f1f41a6c 1110 stmts->safe_push (def_stmt);
45eea33f 1111 oprnd = new_oprnd;
1112 }
1113 }
1114 else
1115 {
1116 /* Retrieve the operand before the type promotion. */
1117 oprnd = gimple_assign_rhs1 (def_stmt);
1118 }
1119 }
1120 else
1121 {
1122 if (interm_type)
1123 {
1124 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
03d37e4e 1125 new_oprnd = make_ssa_name (interm_type, NULL);
45eea33f 1126 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1127 oprnd, NULL_TREE);
45eea33f 1128 oprnd = new_oprnd;
1129 *new_def_stmt = new_stmt;
1130 }
1131
1132 /* Otherwise, OPRND is already set. */
1133 }
1134
1135 if (interm_type)
1136 *new_type = interm_type;
1137 else
1138 *new_type = half_type;
1139
1140 *op0 = oprnd;
1141 *op1 = fold_convert (*new_type, const_oprnd);
1142
1143 return true;
1144}
1145
1146
1147/* Try to find a statement or a sequence of statements that can be performed
1148 on a smaller type:
1149
1150 type x_t;
1151 TYPE x_T, res0_T, res1_T;
1152 loop:
1153 S1 x_t = *p;
1154 S2 x_T = (TYPE) x_t;
1155 S3 res0_T = op (x_T, C0);
1156 S4 res1_T = op (res0_T, C1);
1157 S5 ... = () res1_T; - type demotion
1158
1159 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1160 constants.
1161 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1162 be 'type' or some intermediate type. For now, we expect S5 to be a type
50f85e2e 1163 demotion operation. We also check that S3 and S4 have only one use. */
45eea33f 1164
45eea33f 1165static gimple
f1f41a6c 1166vect_recog_over_widening_pattern (vec<gimple> *stmts,
45eea33f 1167 tree *type_in, tree *type_out)
1168{
f1f41a6c 1169 gimple stmt = stmts->pop ();
45eea33f 1170 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
019bbf38 1171 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
03d37e4e 1172 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
45eea33f 1173 bool first;
6a5f222a 1174 tree type = NULL;
45eea33f 1175
1176 first = true;
1177 while (1)
1178 {
1179 if (!vinfo_for_stmt (stmt)
1180 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1181 return NULL;
1182
1183 new_def_stmt = NULL;
1184 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1185 &op0, &op1, &new_def_stmt,
1186 stmts))
1187 {
1188 if (first)
1189 return NULL;
1190 else
1191 break;
1192 }
1193
1194 /* STMT can be performed on a smaller type. Check its uses. */
019bbf38 1195 use_stmt = vect_single_imm_use (stmt);
1196 if (!use_stmt || !is_gimple_assign (use_stmt))
45eea33f 1197 return NULL;
1198
1199 /* Create pattern statement for STMT. */
1200 vectype = get_vectype_for_scalar_type (new_type);
1201 if (!vectype)
1202 return NULL;
1203
1204 /* We want to collect all the statements for which we create pattern
1205 statetments, except for the case when the last statement in the
1206 sequence doesn't have a corresponding pattern statement. In such
1207 case we associate the last pattern statement with the last statement
6083c152 1208 in the sequence. Therefore, we only add the original statement to
45eea33f 1209 the list if we know that it is not the last. */
1210 if (prev_stmt)
f1f41a6c 1211 stmts->safe_push (prev_stmt);
45eea33f 1212
1213 var = vect_recog_temp_ssa_var (new_type, NULL);
f6bc1dff 1214 pattern_stmt
1215 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1216 op0, op1);
45eea33f 1217 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
6d741312 1218 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
45eea33f 1219
6d8fb6cf 1220 if (dump_enabled_p ())
45eea33f 1221 {
7bd765d4 1222 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1223 "created pattern stmt: ");
1224 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
45eea33f 1225 }
1226
6a5f222a 1227 type = gimple_expr_type (stmt);
45eea33f 1228 prev_stmt = stmt;
1229 stmt = use_stmt;
1230
1231 first = false;
1232 }
1233
1234 /* We got a sequence. We expect it to end with a type demotion operation.
1235 Otherwise, we quit (for now). There are three possible cases: the
1236 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1237 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1238 NEW_TYPE differs (we create a new conversion statement). */
1239 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1240 {
1241 use_lhs = gimple_assign_lhs (use_stmt);
1242 use_type = TREE_TYPE (use_lhs);
6175d24a 1243 /* Support only type demotion or signedess change. */
45eea33f 1244 if (!INTEGRAL_TYPE_P (use_type)
6175d24a 1245 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
45eea33f 1246 return NULL;
1247
6175d24a 1248 /* Check that NEW_TYPE is not bigger than the conversion result. */
1249 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1250 return NULL;
1251
45eea33f 1252 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1253 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1254 {
1255 /* Create NEW_TYPE->USE_TYPE conversion. */
03d37e4e 1256 new_oprnd = make_ssa_name (use_type, NULL);
45eea33f 1257 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
1258 var, NULL_TREE);
45eea33f 1259 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1260
1261 *type_in = get_vectype_for_scalar_type (new_type);
1262 *type_out = get_vectype_for_scalar_type (use_type);
1263
1264 /* We created a pattern statement for the last statement in the
1265 sequence, so we don't need to associate it with the pattern
1266 statement created for PREV_STMT. Therefore, we add PREV_STMT
1267 to the list in order to mark it later in vect_pattern_recog_1. */
1268 if (prev_stmt)
f1f41a6c 1269 stmts->safe_push (prev_stmt);
45eea33f 1270 }
1271 else
1272 {
1273 if (prev_stmt)
18937389 1274 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1275 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
45eea33f 1276
1277 *type_in = vectype;
1278 *type_out = NULL_TREE;
1279 }
1280
f1f41a6c 1281 stmts->safe_push (use_stmt);
45eea33f 1282 }
1283 else
1284 /* TODO: support general case, create a conversion to the correct type. */
1285 return NULL;
1286
1287 /* Pattern detected. */
6d8fb6cf 1288 if (dump_enabled_p ())
45eea33f 1289 {
7bd765d4 1290 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1291 "vect_recog_over_widening_pattern: detected: ");
1292 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
45eea33f 1293 }
1294
1295 return pattern_stmt;
1296}
1297
6083c152 1298/* Detect widening shift pattern:
1299
1300 type a_t;
1301 TYPE a_T, res_T;
1302
1303 S1 a_t = ;
1304 S2 a_T = (TYPE) a_t;
1305 S3 res_T = a_T << CONST;
1306
1307 where type 'TYPE' is at least double the size of type 'type'.
1308
da5b41a4 1309 Also detect cases where the shift result is immediately converted
1310 to another type 'result_type' that is no larger in size than 'TYPE'.
1311 In those cases we perform a widen-shift that directly results in
1312 'result_type', to avoid a possible over-widening situation:
6083c152 1313
da5b41a4 1314 type a_t;
6083c152 1315 TYPE a_T, res_T;
da5b41a4 1316 result_type res_result;
6083c152 1317
1318 S1 a_t = ;
1319 S2 a_T = (TYPE) a_t;
1320 S3 res_T = a_T << CONST;
da5b41a4 1321 S4 res_result = (result_type) res_T;
1322 '--> res_result' = a_t w<< CONST;
6083c152 1323
1324 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1325 create an additional pattern stmt for S2 to create a variable of an
1326 intermediate type, and perform widen-shift on the intermediate type:
1327
1328 type a_t;
1329 interm_type a_it;
1330 TYPE a_T, res_T, res_T';
1331
1332 S1 a_t = ;
1333 S2 a_T = (TYPE) a_t;
1334 '--> a_it = (interm_type) a_t;
1335 S3 res_T = a_T << CONST;
1336 '--> res_T' = a_it <<* CONST;
1337
1338 Input/Output:
1339
1340 * STMTS: Contains a stmt from which the pattern search begins.
1341 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1342 in STMTS. When an intermediate type is used and a pattern statement is
1343 created for S2, we also put S2 here (before S3).
1344
1345 Output:
1346
1347 * TYPE_IN: The type of the input arguments to the pattern.
1348
1349 * TYPE_OUT: The type of the output of this pattern.
1350
1351 * Return value: A new stmt that will be used to replace the sequence of
1352 stmts that constitute the pattern. In this case it will be:
1353 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1354
1355static gimple
f1f41a6c 1356vect_recog_widen_shift_pattern (vec<gimple> *stmts,
6083c152 1357 tree *type_in, tree *type_out)
1358{
f1f41a6c 1359 gimple last_stmt = stmts->pop ();
6083c152 1360 gimple def_stmt0;
1361 tree oprnd0, oprnd1;
1362 tree type, half_type0;
da5b41a4 1363 gimple pattern_stmt;
6083c152 1364 tree vectype, vectype_out = NULL_TREE;
6083c152 1365 tree var;
1366 enum tree_code dummy_code;
1367 int dummy_int;
f1f41a6c 1368 vec<tree> dummy_vec;
da5b41a4 1369 gimple use_stmt;
087903db 1370 bool promotion;
6083c152 1371
1372 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1373 return NULL;
1374
6083c152 1375 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
da5b41a4 1376 return NULL;
6083c152 1377
1378 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1379 return NULL;
1380
1381 oprnd0 = gimple_assign_rhs1 (last_stmt);
1382 oprnd1 = gimple_assign_rhs2 (last_stmt);
1383 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1384 return NULL;
1385
1386 /* Check operand 0: it has to be defined by a type promotion. */
087903db 1387 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1388 &promotion)
1389 || !promotion)
1390 return NULL;
6083c152 1391
1392 /* Check operand 1: has to be positive. We check that it fits the type
1393 in vect_handle_widen_op_by_const (). */
1394 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1395 return NULL;
1396
1397 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1398 type = gimple_expr_type (last_stmt);
1399
da5b41a4 1400 /* Check for subsequent conversion to another type. */
1401 use_stmt = vect_single_imm_use (last_stmt);
1402 if (use_stmt && is_gimple_assign (use_stmt)
1403 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1404 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1405 {
1406 tree use_lhs = gimple_assign_lhs (use_stmt);
1407 tree use_type = TREE_TYPE (use_lhs);
1408
1409 if (INTEGRAL_TYPE_P (use_type)
1410 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1411 {
1412 last_stmt = use_stmt;
1413 type = use_type;
1414 }
1415 }
1416
6083c152 1417 /* Check if this a widening operation. */
1418 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1419 &oprnd0, stmts,
1420 type, &half_type0, def_stmt0))
1421 return NULL;
1422
6083c152 1423 /* Pattern detected. */
6d8fb6cf 1424 if (dump_enabled_p ())
7bd765d4 1425 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1426 "vect_recog_widen_shift_pattern: detected: ");
6083c152 1427
1428 /* Check target support. */
1429 vectype = get_vectype_for_scalar_type (half_type0);
1430 vectype_out = get_vectype_for_scalar_type (type);
1431
1432 if (!vectype
1433 || !vectype_out
1434 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1435 vectype_out, vectype,
087dde2d 1436 &dummy_code, &dummy_code,
1437 &dummy_int, &dummy_vec))
6083c152 1438 return NULL;
1439
1440 *type_in = vectype;
1441 *type_out = vectype_out;
1442
1443 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1444 var = vect_recog_temp_ssa_var (type, NULL);
1445 pattern_stmt =
1446 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1447
6d8fb6cf 1448 if (dump_enabled_p ())
7bd765d4 1449 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
6083c152 1450
f1f41a6c 1451 stmts->safe_push (last_stmt);
6083c152 1452 return pattern_stmt;
1453}
45eea33f 1454
40dfedc8 1455/* Detect a vector by vector shift pattern that wouldn't be otherwise
1456 vectorized:
1457
1458 type a_t;
1459 TYPE b_T, res_T;
1460
1461 S1 a_t = ;
1462 S2 b_T = ;
1463 S3 res_T = b_T op a_t;
1464
1465 where type 'TYPE' is a type with different size than 'type',
1466 and op is <<, >> or rotate.
1467
1468 Also detect cases:
1469
1470 type a_t;
1471 TYPE b_T, c_T, res_T;
1472
1473 S0 c_T = ;
1474 S1 a_t = (type) c_T;
1475 S2 b_T = ;
1476 S3 res_T = b_T op a_t;
1477
1478 Input/Output:
1479
1480 * STMTS: Contains a stmt from which the pattern search begins,
1481 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
1482 with a shift/rotate which has same type on both operands, in the
1483 second case just b_T op c_T, in the first case with added cast
18937389 1484 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
40dfedc8 1485
1486 Output:
1487
1488 * TYPE_IN: The type of the input arguments to the pattern.
1489
1490 * TYPE_OUT: The type of the output of this pattern.
1491
1492 * Return value: A new stmt that will be used to replace the shift/rotate
1493 S3 stmt. */
1494
1495static gimple
f1f41a6c 1496vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
40dfedc8 1497 tree *type_in, tree *type_out)
1498{
f1f41a6c 1499 gimple last_stmt = stmts->pop ();
40dfedc8 1500 tree oprnd0, oprnd1, lhs, var;
1501 gimple pattern_stmt, def_stmt;
1502 enum tree_code rhs_code;
1503 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1504 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 1505 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
40dfedc8 1506 enum vect_def_type dt;
1507 tree def;
1508
1509 if (!is_gimple_assign (last_stmt))
1510 return NULL;
1511
1512 rhs_code = gimple_assign_rhs_code (last_stmt);
1513 switch (rhs_code)
1514 {
1515 case LSHIFT_EXPR:
1516 case RSHIFT_EXPR:
1517 case LROTATE_EXPR:
1518 case RROTATE_EXPR:
1519 break;
1520 default:
1521 return NULL;
1522 }
1523
1524 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1525 return NULL;
1526
1527 lhs = gimple_assign_lhs (last_stmt);
1528 oprnd0 = gimple_assign_rhs1 (last_stmt);
1529 oprnd1 = gimple_assign_rhs2 (last_stmt);
1530 if (TREE_CODE (oprnd0) != SSA_NAME
1531 || TREE_CODE (oprnd1) != SSA_NAME
1532 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
1533 || TYPE_PRECISION (TREE_TYPE (oprnd1))
1534 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
1535 || TYPE_PRECISION (TREE_TYPE (lhs))
1536 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
1537 return NULL;
1538
4c0c783a 1539 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
bed8b93b 1540 &def, &dt))
40dfedc8 1541 return NULL;
1542
1543 if (dt != vect_internal_def)
1544 return NULL;
1545
1546 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
1547 *type_out = *type_in;
1548 if (*type_in == NULL_TREE)
1549 return NULL;
1550
1551 def = NULL_TREE;
1552 if (gimple_assign_cast_p (def_stmt))
1553 {
1554 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1555 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
1556 && TYPE_PRECISION (TREE_TYPE (rhs1))
1557 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
1558 def = rhs1;
1559 }
1560
1561 if (def == NULL_TREE)
1562 {
1563 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1564 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1,
1565 NULL_TREE);
6d741312 1566 new_pattern_def_seq (stmt_vinfo, def_stmt);
40dfedc8 1567 }
1568
1569 /* Pattern detected. */
6d8fb6cf 1570 if (dump_enabled_p ())
7bd765d4 1571 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1572 "vect_recog_vector_vector_shift_pattern: detected: ");
40dfedc8 1573
1574 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1575 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
1576 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
1577
6d8fb6cf 1578 if (dump_enabled_p ())
7bd765d4 1579 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
40dfedc8 1580
f1f41a6c 1581 stmts->safe_push (last_stmt);
40dfedc8 1582 return pattern_stmt;
1583}
1584
127cb1cd 1585/* Detect a signed division by a constant that wouldn't be
18937389 1586 otherwise vectorized:
1587
1588 type a_t, b_t;
1589
1590 S1 a_t = b_t / N;
1591
127cb1cd 1592 where type 'type' is an integral type and N is a constant.
18937389 1593
127cb1cd 1594 Similarly handle modulo by a constant:
18937389 1595
1596 S4 a_t = b_t % N;
1597
1598 Input/Output:
1599
1600 * STMTS: Contains a stmt from which the pattern search begins,
127cb1cd 1601 i.e. the division stmt. S1 is replaced by if N is a power
1602 of two constant and type is signed:
18937389 1603 S3 y_t = b_t < 0 ? N - 1 : 0;
1604 S2 x_t = b_t + y_t;
1605 S1' a_t = x_t >> log2 (N);
1606
127cb1cd 1607 S4 is replaced if N is a power of two constant and
1608 type is signed by (where *_T temporaries have unsigned type):
18937389 1609 S9 y_T = b_t < 0 ? -1U : 0U;
1610 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
1611 S7 z_t = (type) z_T;
1612 S6 w_t = b_t + z_t;
1613 S5 x_t = w_t & (N - 1);
1614 S4' a_t = x_t - z_t;
1615
1616 Output:
1617
1618 * TYPE_IN: The type of the input arguments to the pattern.
1619
1620 * TYPE_OUT: The type of the output of this pattern.
1621
1622 * Return value: A new stmt that will be used to replace the division
1623 S1 or modulo S4 stmt. */
1624
1625static gimple
f1f41a6c 1626vect_recog_divmod_pattern (vec<gimple> *stmts,
127cb1cd 1627 tree *type_in, tree *type_out)
18937389 1628{
f1f41a6c 1629 gimple last_stmt = stmts->pop ();
3af51fe9 1630 tree oprnd0, oprnd1, vectype, itype, cond;
18937389 1631 gimple pattern_stmt, def_stmt;
1632 enum tree_code rhs_code;
1633 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1634 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
127cb1cd 1635 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
18937389 1636 optab optab;
ebf4f764 1637 tree q;
127cb1cd 1638 int dummy_int, prec;
127cb1cd 1639 stmt_vec_info def_stmt_vinfo;
18937389 1640
1641 if (!is_gimple_assign (last_stmt))
1642 return NULL;
1643
1644 rhs_code = gimple_assign_rhs_code (last_stmt);
1645 switch (rhs_code)
1646 {
1647 case TRUNC_DIV_EXPR:
1648 case TRUNC_MOD_EXPR:
1649 break;
1650 default:
1651 return NULL;
1652 }
1653
1654 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1655 return NULL;
1656
1657 oprnd0 = gimple_assign_rhs1 (last_stmt);
1658 oprnd1 = gimple_assign_rhs2 (last_stmt);
1659 itype = TREE_TYPE (oprnd0);
1660 if (TREE_CODE (oprnd0) != SSA_NAME
1661 || TREE_CODE (oprnd1) != INTEGER_CST
1662 || TREE_CODE (itype) != INTEGER_TYPE
127cb1cd 1663 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
18937389 1664 return NULL;
1665
1666 vectype = get_vectype_for_scalar_type (itype);
1667 if (vectype == NULL_TREE)
1668 return NULL;
1669
1670 /* If the target can handle vectorized division or modulo natively,
1671 don't attempt to optimize this. */
1672 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
6cdd383a 1673 if (optab != unknown_optab)
18937389 1674 {
1675 enum machine_mode vec_mode = TYPE_MODE (vectype);
1676 int icode = (int) optab_handler (optab, vec_mode);
82e76116 1677 if (icode != CODE_FOR_nothing)
18937389 1678 return NULL;
1679 }
1680
127cb1cd 1681 prec = TYPE_PRECISION (itype);
1682 if (integer_pow2p (oprnd1))
18937389 1683 {
127cb1cd 1684 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
1685 return NULL;
18937389 1686
127cb1cd 1687 /* Pattern detected. */
6d8fb6cf 1688 if (dump_enabled_p ())
7bd765d4 1689 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
1690 "vect_recog_divmod_pattern: detected: ");
127cb1cd 1691
1692 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
1693 build_int_cst (itype, 0));
1694 if (rhs_code == TRUNC_DIV_EXPR)
1695 {
1696 tree var = vect_recog_temp_ssa_var (itype, NULL);
1697 tree shift;
1698 def_stmt
446e85eb 1699 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1700 fold_build2 (MINUS_EXPR, itype,
1701 oprnd1,
1702 build_int_cst (itype,
1703 1)),
1704 build_int_cst (itype, 0));
127cb1cd 1705 new_pattern_def_seq (stmt_vinfo, def_stmt);
1706 var = vect_recog_temp_ssa_var (itype, NULL);
1707 def_stmt
1708 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
1709 gimple_assign_lhs (def_stmt));
1710 append_pattern_def_seq (stmt_vinfo, def_stmt);
1711
1712 shift = build_int_cst (itype, tree_log2 (oprnd1));
1713 pattern_stmt
1714 = gimple_build_assign_with_ops (RSHIFT_EXPR,
1715 vect_recog_temp_ssa_var (itype,
1716 NULL),
1717 var, shift);
1718 }
1719 else
1720 {
1721 tree signmask;
1722 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1723 if (compare_tree_int (oprnd1, 2) == 0)
1724 {
1725 signmask = vect_recog_temp_ssa_var (itype, NULL);
1726 def_stmt
446e85eb 1727 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
1728 build_int_cst (itype, 1),
1729 build_int_cst (itype, 0));
127cb1cd 1730 append_pattern_def_seq (stmt_vinfo, def_stmt);
1731 }
1732 else
1733 {
1734 tree utype
1735 = build_nonstandard_integer_type (prec, 1);
1736 tree vecutype = get_vectype_for_scalar_type (utype);
1737 tree shift
1738 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
1739 - tree_log2 (oprnd1));
1740 tree var = vect_recog_temp_ssa_var (utype, NULL);
1741
1742 def_stmt
446e85eb 1743 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
1744 build_int_cst (utype, -1),
1745 build_int_cst (utype, 0));
127cb1cd 1746 def_stmt_vinfo
1747 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1748 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1749 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1750 append_pattern_def_seq (stmt_vinfo, def_stmt);
1751 var = vect_recog_temp_ssa_var (utype, NULL);
1752 def_stmt
1753 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
1754 gimple_assign_lhs (def_stmt),
1755 shift);
1756 def_stmt_vinfo
1757 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1758 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1759 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
1760 append_pattern_def_seq (stmt_vinfo, def_stmt);
1761 signmask = vect_recog_temp_ssa_var (itype, NULL);
1762 def_stmt
1763 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var,
1764 NULL_TREE);
1765 append_pattern_def_seq (stmt_vinfo, def_stmt);
1766 }
1767 def_stmt
1768 = gimple_build_assign_with_ops (PLUS_EXPR,
1769 vect_recog_temp_ssa_var (itype,
1770 NULL),
1771 oprnd0, signmask);
1772 append_pattern_def_seq (stmt_vinfo, def_stmt);
1773 def_stmt
1774 = gimple_build_assign_with_ops (BIT_AND_EXPR,
1775 vect_recog_temp_ssa_var (itype,
1776 NULL),
1777 gimple_assign_lhs (def_stmt),
1778 fold_build2 (MINUS_EXPR, itype,
1779 oprnd1,
1780 build_int_cst (itype,
1781 1)));
1782 append_pattern_def_seq (stmt_vinfo, def_stmt);
1783
1784 pattern_stmt
1785 = gimple_build_assign_with_ops (MINUS_EXPR,
1786 vect_recog_temp_ssa_var (itype,
1787 NULL),
1788 gimple_assign_lhs (def_stmt),
1789 signmask);
1790 }
1791
6d8fb6cf 1792 if (dump_enabled_p ())
7bd765d4 1793 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
1794 0);
127cb1cd 1795
f1f41a6c 1796 stmts->safe_push (last_stmt);
127cb1cd 1797
1798 *type_in = vectype;
1799 *type_out = vectype;
1800 return pattern_stmt;
18937389 1801 }
127cb1cd 1802
1803 if (!host_integerp (oprnd1, TYPE_UNSIGNED (itype))
1804 || integer_zerop (oprnd1)
1805 || prec > HOST_BITS_PER_WIDE_INT)
1806 return NULL;
1807
ebf4f764 1808 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
1809 return NULL;
127cb1cd 1810
1811 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1812
1813 if (TYPE_UNSIGNED (itype))
18937389 1814 {
127cb1cd 1815 unsigned HOST_WIDE_INT mh, ml;
1816 int pre_shift, post_shift;
1817 unsigned HOST_WIDE_INT d = tree_low_cst (oprnd1, 1)
1818 & GET_MODE_MASK (TYPE_MODE (itype));
3af51fe9 1819 tree t1, t2, t3, t4;
127cb1cd 1820
1821 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
1822 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
1823 return NULL;
1824
1825 /* Find a suitable multiplier and right shift count
1826 instead of multiplying with D. */
1827 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
1828
1829 /* If the suggested multiplier is more than SIZE bits, we can do better
1830 for even divisors, using an initial right shift. */
1831 if (mh != 0 && (d & 1) == 0)
18937389 1832 {
127cb1cd 1833 pre_shift = floor_log2 (d & -d);
1834 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
1835 &ml, &post_shift, &dummy_int);
1836 gcc_assert (!mh);
1837 }
1838 else
1839 pre_shift = 0;
1840
1841 if (mh != 0)
1842 {
1843 if (post_shift - 1 >= prec)
1844 return NULL;
1845
3af51fe9 1846 /* t1 = oprnd0 h* ml;
1847 t2 = oprnd0 - t1;
1848 t3 = t2 >> 1;
1849 t4 = t1 + t3;
1850 q = t4 >> (post_shift - 1); */
1851 t1 = vect_recog_temp_ssa_var (itype, NULL);
18937389 1852 def_stmt
3af51fe9 1853 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
127cb1cd 1854 build_int_cst (itype, ml));
1855 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 1856
3af51fe9 1857 t2 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 1858 def_stmt
3af51fe9 1859 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
6d741312 1860 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 1861
1862 t3 = vect_recog_temp_ssa_var (itype, NULL);
1863 def_stmt
3af51fe9 1864 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
127cb1cd 1865 integer_one_node);
1866 append_pattern_def_seq (stmt_vinfo, def_stmt);
1867
3af51fe9 1868 t4 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 1869 def_stmt
3af51fe9 1870 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
127cb1cd 1871
1872 if (post_shift != 1)
1873 {
1874 append_pattern_def_seq (stmt_vinfo, def_stmt);
1875
3af51fe9 1876 q = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 1877 pattern_stmt
3af51fe9 1878 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
127cb1cd 1879 build_int_cst (itype,
1880 post_shift
1881 - 1));
1882 }
1883 else
1884 {
3af51fe9 1885 q = t4;
127cb1cd 1886 pattern_stmt = def_stmt;
1887 }
18937389 1888 }
1889 else
1890 {
127cb1cd 1891 if (pre_shift >= prec || post_shift >= prec)
1892 return NULL;
1893
1894 /* t1 = oprnd0 >> pre_shift;
3af51fe9 1895 t2 = t1 h* ml;
1896 q = t2 >> post_shift; */
127cb1cd 1897 if (pre_shift)
1898 {
1899 t1 = vect_recog_temp_ssa_var (itype, NULL);
1900 def_stmt
1901 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
1902 build_int_cst (NULL,
1903 pre_shift));
1904 append_pattern_def_seq (stmt_vinfo, def_stmt);
1905 }
1906 else
1907 t1 = oprnd0;
18937389 1908
3af51fe9 1909 t2 = vect_recog_temp_ssa_var (itype, NULL);
18937389 1910 def_stmt
3af51fe9 1911 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
127cb1cd 1912 build_int_cst (itype, ml));
127cb1cd 1913
3af51fe9 1914 if (post_shift)
1915 {
1916 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 1917
3af51fe9 1918 q = vect_recog_temp_ssa_var (itype, NULL);
1919 def_stmt
1920 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
1921 build_int_cst (itype,
1922 post_shift));
1923 }
1924 else
1925 q = t2;
1926
1927 pattern_stmt = def_stmt;
127cb1cd 1928 }
1929 }
1930 else
1931 {
1932 unsigned HOST_WIDE_INT ml;
60420e1c 1933 int post_shift;
127cb1cd 1934 HOST_WIDE_INT d = tree_low_cst (oprnd1, 0);
1935 unsigned HOST_WIDE_INT abs_d;
1936 bool add = false;
3af51fe9 1937 tree t1, t2, t3, t4;
127cb1cd 1938
1939 /* Give up for -1. */
1940 if (d == -1)
1941 return NULL;
1942
127cb1cd 1943 /* Since d might be INT_MIN, we have to cast to
1944 unsigned HOST_WIDE_INT before negating to avoid
1945 undefined signed overflow. */
1946 abs_d = (d >= 0
1947 ? (unsigned HOST_WIDE_INT) d
1948 : - (unsigned HOST_WIDE_INT) d);
1949
1950 /* n rem d = n rem -d */
1951 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
1952 {
1953 d = abs_d;
1954 oprnd1 = build_int_cst (itype, abs_d);
1955 }
1956 else if (HOST_BITS_PER_WIDE_INT >= prec
1957 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1958 /* This case is not handled correctly below. */
1959 return NULL;
1960
60420e1c 1961 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
127cb1cd 1962 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
1963 {
1964 add = true;
1965 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
1966 }
1967 if (post_shift >= prec)
1968 return NULL;
1969
3af51fe9 1970 /* t1 = oprnd1 h* ml; */
1971 t1 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 1972 def_stmt
3af51fe9 1973 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
127cb1cd 1974 build_int_cst (itype, ml));
1975 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 1976
1977 if (add)
1978 {
3af51fe9 1979 /* t2 = t1 + oprnd0; */
1980 t2 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 1981 def_stmt
3af51fe9 1982 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
6d741312 1983 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 1984 }
1985 else
3af51fe9 1986 t2 = t1;
127cb1cd 1987
3af51fe9 1988 if (post_shift)
127cb1cd 1989 {
3af51fe9 1990 /* t3 = t2 >> post_shift; */
1991 t3 = vect_recog_temp_ssa_var (itype, NULL);
18937389 1992 def_stmt
3af51fe9 1993 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
127cb1cd 1994 build_int_cst (itype, post_shift));
6d741312 1995 append_pattern_def_seq (stmt_vinfo, def_stmt);
18937389 1996 }
127cb1cd 1997 else
3af51fe9 1998 t3 = t2;
127cb1cd 1999
3af51fe9 2000 /* t4 = oprnd0 >> (prec - 1); */
2001 t4 = vect_recog_temp_ssa_var (itype, NULL);
18937389 2002 def_stmt
3af51fe9 2003 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
127cb1cd 2004 build_int_cst (itype, prec - 1));
6d741312 2005 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 2006
3af51fe9 2007 /* q = t3 - t4; or q = t4 - t3; */
127cb1cd 2008 q = vect_recog_temp_ssa_var (itype, NULL);
2009 pattern_stmt
3af51fe9 2010 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2011 d < 0 ? t3 : t4);
127cb1cd 2012 }
2013
2014 if (rhs_code == TRUNC_MOD_EXPR)
2015 {
2016 tree r, t1;
2017
2018 /* We divided. Now finish by:
2019 t1 = q * oprnd1;
2020 r = oprnd0 - t1; */
2021 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2022
2023 t1 = vect_recog_temp_ssa_var (itype, NULL);
18937389 2024 def_stmt
127cb1cd 2025 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
6d741312 2026 append_pattern_def_seq (stmt_vinfo, def_stmt);
18937389 2027
127cb1cd 2028 r = vect_recog_temp_ssa_var (itype, NULL);
18937389 2029 pattern_stmt
127cb1cd 2030 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
18937389 2031 }
2032
127cb1cd 2033 /* Pattern detected. */
6d8fb6cf 2034 if (dump_enabled_p ())
7bd765d4 2035 {
2036 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2037 "vect_recog_divmod_pattern: detected: ");
2038 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
2039 }
18937389 2040
f1f41a6c 2041 stmts->safe_push (last_stmt);
18937389 2042
2043 *type_in = vectype;
2044 *type_out = vectype;
2045 return pattern_stmt;
2046}
2047
84557284 2048/* Function vect_recog_mixed_size_cond_pattern
2049
2050 Try to find the following pattern:
2051
2052 type x_t, y_t;
2053 TYPE a_T, b_T, c_T;
2054 loop:
2055 S1 a_T = x_t CMP y_t ? b_T : c_T;
2056
2057 where type 'TYPE' is an integral type which has different size
087903db 2058 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
84557284 2059 than 'type', the constants need to fit into an integer type
087903db 2060 with the same width as 'type') or results of conversion from 'type'.
84557284 2061
2062 Input:
2063
2064 * LAST_STMT: A stmt from which the pattern search begins.
2065
2066 Output:
2067
2068 * TYPE_IN: The type of the input arguments to the pattern.
2069
2070 * TYPE_OUT: The type of the output of this pattern.
2071
2072 * Return value: A new stmt that will be used to replace the pattern.
2073 Additionally a def_stmt is added.
2074
2075 a_it = x_t CMP y_t ? b_it : c_it;
2076 a_T = (TYPE) a_it; */
2077
2078static gimple
f1f41a6c 2079vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
84557284 2080 tree *type_out)
2081{
f1f41a6c 2082 gimple last_stmt = (*stmts)[0];
84557284 2083 tree cond_expr, then_clause, else_clause;
2084 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
087903db 2085 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
84557284 2086 enum machine_mode cmpmode;
2087 gimple pattern_stmt, def_stmt;
2088 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 2089 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
087903db 2090 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2091 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2092 bool promotion;
2093 tree comp_scalar_type;
84557284 2094
2095 if (!is_gimple_assign (last_stmt)
2096 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2097 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2098 return NULL;
2099
2100 cond_expr = gimple_assign_rhs1 (last_stmt);
2101 then_clause = gimple_assign_rhs2 (last_stmt);
2102 else_clause = gimple_assign_rhs3 (last_stmt);
2103
f39dd90c 2104 if (!COMPARISON_CLASS_P (cond_expr))
2105 return NULL;
2106
087903db 2107 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2108 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
f39dd90c 2109 if (comp_vectype == NULL_TREE)
84557284 2110 return NULL;
2111
2112 type = gimple_expr_type (last_stmt);
087903db 2113 if (types_compatible_p (type, comp_scalar_type)
2114 || ((TREE_CODE (then_clause) != INTEGER_CST
2115 || TREE_CODE (else_clause) != INTEGER_CST)
2116 && !INTEGRAL_TYPE_P (comp_scalar_type))
2117 || !INTEGRAL_TYPE_P (type))
2118 return NULL;
2119
2120 if ((TREE_CODE (then_clause) != INTEGER_CST
2121 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2122 &def_stmt0, &promotion))
2123 || (TREE_CODE (else_clause) != INTEGER_CST
2124 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2125 &def_stmt1, &promotion)))
2126 return NULL;
2127
2128 if (orig_type0 && orig_type1
2129 && !types_compatible_p (orig_type0, orig_type1))
2130 return NULL;
2131
2132 if (orig_type0)
2133 {
2134 if (!types_compatible_p (orig_type0, comp_scalar_type))
2135 return NULL;
2136 then_clause = gimple_assign_rhs1 (def_stmt0);
2137 itype = orig_type0;
2138 }
2139
2140 if (orig_type1)
2141 {
2142 if (!types_compatible_p (orig_type1, comp_scalar_type))
2143 return NULL;
2144 else_clause = gimple_assign_rhs1 (def_stmt1);
2145 itype = orig_type1;
2146 }
2147
84557284 2148 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2149
2150 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2151 return NULL;
2152
2153 vectype = get_vectype_for_scalar_type (type);
2154 if (vectype == NULL_TREE)
2155 return NULL;
2156
2157 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2158 return NULL;
2159
087903db 2160 if (itype == NULL_TREE)
2161 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2162 TYPE_UNSIGNED (type));
2163
84557284 2164 if (itype == NULL_TREE
2165 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2166 return NULL;
2167
2168 vecitype = get_vectype_for_scalar_type (itype);
2169 if (vecitype == NULL_TREE)
2170 return NULL;
2171
2172 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2173 return NULL;
2174
2175 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2176 {
087903db 2177 if ((TREE_CODE (then_clause) == INTEGER_CST
2178 && !int_fits_type_p (then_clause, itype))
2179 || (TREE_CODE (else_clause) == INTEGER_CST
2180 && !int_fits_type_p (else_clause, itype)))
84557284 2181 return NULL;
2182 }
2183
2184 def_stmt
446e85eb 2185 = gimple_build_assign_with_ops (COND_EXPR,
2186 vect_recog_temp_ssa_var (itype, NULL),
2187 unshare_expr (cond_expr),
2188 fold_convert (itype, then_clause),
2189 fold_convert (itype, else_clause));
84557284 2190 pattern_stmt
2191 = gimple_build_assign_with_ops (NOP_EXPR,
2192 vect_recog_temp_ssa_var (type, NULL),
2193 gimple_assign_lhs (def_stmt), NULL_TREE);
2194
6d741312 2195 new_pattern_def_seq (stmt_vinfo, def_stmt);
4c0c783a 2196 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
84557284 2197 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2198 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2199 *type_in = vecitype;
2200 *type_out = vectype;
2201
6d8fb6cf 2202 if (dump_enabled_p ())
7bd765d4 2203 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2204 "vect_recog_mixed_size_cond_pattern: detected: ");
4c0c783a 2205
84557284 2206 return pattern_stmt;
2207}
2208
2209
50f85e2e 2210/* Helper function of vect_recog_bool_pattern. Called recursively, return
2211 true if bool VAR can be optimized that way. */
2212
2213static bool
4c0c783a 2214check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
50f85e2e 2215{
2216 gimple def_stmt;
2217 enum vect_def_type dt;
2218 tree def, rhs1;
2219 enum tree_code rhs_code;
2220
4c0c783a 2221 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2222 &dt))
50f85e2e 2223 return false;
2224
2225 if (dt != vect_internal_def)
2226 return false;
2227
2228 if (!is_gimple_assign (def_stmt))
2229 return false;
2230
2231 if (!has_single_use (def))
2232 return false;
2233
2234 rhs1 = gimple_assign_rhs1 (def_stmt);
2235 rhs_code = gimple_assign_rhs_code (def_stmt);
2236 switch (rhs_code)
2237 {
2238 case SSA_NAME:
4c0c783a 2239 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
50f85e2e 2240
2241 CASE_CONVERT:
2242 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2243 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2244 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2245 return false;
4c0c783a 2246 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
50f85e2e 2247
2248 case BIT_NOT_EXPR:
4c0c783a 2249 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
50f85e2e 2250
2251 case BIT_AND_EXPR:
2252 case BIT_IOR_EXPR:
2253 case BIT_XOR_EXPR:
4c0c783a 2254 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
50f85e2e 2255 return false;
4c0c783a 2256 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2257 bb_vinfo);
50f85e2e 2258
2259 default:
2260 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2261 {
2262 tree vecitype, comp_vectype;
2263
3a542b98 2264 /* If the comparison can throw, then is_gimple_condexpr will be
2265 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2266 if (stmt_could_throw_p (def_stmt))
2267 return false;
2268
50f85e2e 2269 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2270 if (comp_vectype == NULL_TREE)
2271 return false;
2272
2273 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2274 {
2275 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2276 tree itype
d6152abc 2277 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
50f85e2e 2278 vecitype = get_vectype_for_scalar_type (itype);
2279 if (vecitype == NULL_TREE)
2280 return false;
2281 }
2282 else
2283 vecitype = comp_vectype;
2284 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2285 }
2286 return false;
2287 }
2288}
2289
2290
2291/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2292 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
18937389 2293 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
50f85e2e 2294
2295static tree
2296adjust_bool_pattern_cast (tree type, tree var)
2297{
2298 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2299 gimple cast_stmt, pattern_stmt;
2300
18937389 2301 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
50f85e2e 2302 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
6d741312 2303 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
50f85e2e 2304 cast_stmt
2305 = gimple_build_assign_with_ops (NOP_EXPR,
2306 vect_recog_temp_ssa_var (type, NULL),
2307 gimple_assign_lhs (pattern_stmt),
2308 NULL_TREE);
2309 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2310 return gimple_assign_lhs (cast_stmt);
2311}
2312
2313
2314/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2315 recursively. VAR is an SSA_NAME that should be transformed from bool
2316 to a wider integer type, OUT_TYPE is the desired final integer type of
2317 the whole pattern, TRUEVAL should be NULL unless optimizing
2318 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2319 in the then_clause, STMTS is where statements with added pattern stmts
2320 should be pushed to. */
2321
2322static tree
2323adjust_bool_pattern (tree var, tree out_type, tree trueval,
f1f41a6c 2324 vec<gimple> *stmts)
50f85e2e 2325{
2326 gimple stmt = SSA_NAME_DEF_STMT (var);
2327 enum tree_code rhs_code, def_rhs_code;
2328 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2329 location_t loc;
2330 gimple pattern_stmt, def_stmt;
2331
2332 rhs1 = gimple_assign_rhs1 (stmt);
2333 rhs2 = gimple_assign_rhs2 (stmt);
2334 rhs_code = gimple_assign_rhs_code (stmt);
2335 loc = gimple_location (stmt);
2336 switch (rhs_code)
2337 {
2338 case SSA_NAME:
2339 CASE_CONVERT:
2340 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2341 itype = TREE_TYPE (irhs1);
2342 pattern_stmt
2343 = gimple_build_assign_with_ops (SSA_NAME,
2344 vect_recog_temp_ssa_var (itype, NULL),
2345 irhs1, NULL_TREE);
2346 break;
2347
2348 case BIT_NOT_EXPR:
2349 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2350 itype = TREE_TYPE (irhs1);
2351 pattern_stmt
2352 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2353 vect_recog_temp_ssa_var (itype, NULL),
2354 irhs1, build_int_cst (itype, 1));
2355 break;
2356
2357 case BIT_AND_EXPR:
2358 /* Try to optimize x = y & (a < b ? 1 : 0); into
2359 x = (a < b ? y : 0);
2360
2361 E.g. for:
2362 bool a_b, b_b, c_b;
2363 TYPE d_T;
2364
2365 S1 a_b = x1 CMP1 y1;
2366 S2 b_b = x2 CMP2 y2;
2367 S3 c_b = a_b & b_b;
2368 S4 d_T = (TYPE) c_b;
2369
2370 we would normally emit:
2371
2372 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2373 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2374 S3' c_T = a_T & b_T;
2375 S4' d_T = c_T;
2376
2377 but we can save one stmt by using the
2378 result of one of the COND_EXPRs in the other COND_EXPR and leave
2379 BIT_AND_EXPR stmt out:
2380
2381 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2382 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2383 S4' f_T = c_T;
2384
2385 At least when VEC_COND_EXPR is implemented using masks
2386 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2387 computes the comparison masks and ands it, in one case with
2388 all ones vector, in the other case with a vector register.
2389 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2390 often more expensive. */
2391 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2392 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2393 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2394 {
2395 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2396 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2397 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2398 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2399 {
2400 gimple tstmt;
2401 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2402 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
f1f41a6c 2403 tstmt = stmts->pop ();
50f85e2e 2404 gcc_assert (tstmt == def_stmt);
f1f41a6c 2405 stmts->quick_push (stmt);
50f85e2e 2406 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2407 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
18937389 2408 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
50f85e2e 2409 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2410 return irhs2;
2411 }
2412 else
2413 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2414 goto and_ior_xor;
2415 }
2416 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2417 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2418 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2419 {
2420 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2421 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2422 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2423 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2424 {
2425 gimple tstmt;
2426 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2427 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
f1f41a6c 2428 tstmt = stmts->pop ();
50f85e2e 2429 gcc_assert (tstmt == def_stmt);
f1f41a6c 2430 stmts->quick_push (stmt);
50f85e2e 2431 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2432 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
18937389 2433 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
50f85e2e 2434 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2435 return irhs1;
2436 }
2437 else
2438 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2439 goto and_ior_xor;
2440 }
2441 /* FALLTHRU */
2442 case BIT_IOR_EXPR:
2443 case BIT_XOR_EXPR:
2444 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2445 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2446 and_ior_xor:
2447 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2448 != TYPE_PRECISION (TREE_TYPE (irhs2)))
2449 {
2450 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
2451 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
2452 int out_prec = TYPE_PRECISION (out_type);
2453 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
2454 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
2455 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
2456 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
2457 else
2458 {
2459 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
2460 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
2461 }
2462 }
2463 itype = TREE_TYPE (irhs1);
2464 pattern_stmt
2465 = gimple_build_assign_with_ops (rhs_code,
2466 vect_recog_temp_ssa_var (itype, NULL),
2467 irhs1, irhs2);
2468 break;
2469
2470 default:
2471 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
2472 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
78fb8a4f 2473 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
2474 || (TYPE_PRECISION (TREE_TYPE (rhs1))
2475 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
50f85e2e 2476 {
2477 enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
2478 itype
d6152abc 2479 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
50f85e2e 2480 }
2481 else
2482 itype = TREE_TYPE (rhs1);
2483 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
2484 if (trueval == NULL_TREE)
2485 trueval = build_int_cst (itype, 1);
2486 else
2487 gcc_checking_assert (useless_type_conversion_p (itype,
2488 TREE_TYPE (trueval)));
2489 pattern_stmt
446e85eb 2490 = gimple_build_assign_with_ops (COND_EXPR,
2491 vect_recog_temp_ssa_var (itype, NULL),
2492 cond_expr, trueval,
2493 build_int_cst (itype, 0));
50f85e2e 2494 break;
2495 }
2496
f1f41a6c 2497 stmts->safe_push (stmt);
50f85e2e 2498 gimple_set_location (pattern_stmt, loc);
2499 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
2500 return gimple_assign_lhs (pattern_stmt);
2501}
2502
2503
2504/* Function vect_recog_bool_pattern
2505
2506 Try to find pattern like following:
2507
2508 bool a_b, b_b, c_b, d_b, e_b;
2509 TYPE f_T;
2510 loop:
2511 S1 a_b = x1 CMP1 y1;
2512 S2 b_b = x2 CMP2 y2;
2513 S3 c_b = a_b & b_b;
2514 S4 d_b = x3 CMP3 y3;
2515 S5 e_b = c_b | d_b;
2516 S6 f_T = (TYPE) e_b;
2517
2518 where type 'TYPE' is an integral type.
2519
2520 Input:
2521
2522 * LAST_STMT: A stmt at the end from which the pattern
2523 search begins, i.e. cast of a bool to
2524 an integer type.
2525
2526 Output:
2527
2528 * TYPE_IN: The type of the input arguments to the pattern.
2529
2530 * TYPE_OUT: The type of the output of this pattern.
2531
2532 * Return value: A new stmt that will be used to replace the pattern.
2533
2534 Assuming size of TYPE is the same as size of all comparisons
2535 (otherwise some casts would be added where needed), the above
2536 sequence we create related pattern stmts:
2537 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2538 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2539 S4' d_T = x3 CMP3 y3 ? 1 : 0;
2540 S5' e_T = c_T | d_T;
2541 S6' f_T = e_T;
2542
2543 Instead of the above S3' we could emit:
2544 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2545 S3' c_T = a_T | b_T;
2546 but the above is more efficient. */
2547
2548static gimple
f1f41a6c 2549vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
50f85e2e 2550 tree *type_out)
2551{
f1f41a6c 2552 gimple last_stmt = stmts->pop ();
50f85e2e 2553 enum tree_code rhs_code;
2554 tree var, lhs, rhs, vectype;
2555 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2556 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 2557 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
50f85e2e 2558 gimple pattern_stmt;
2559
2560 if (!is_gimple_assign (last_stmt))
2561 return NULL;
2562
2563 var = gimple_assign_rhs1 (last_stmt);
2564 lhs = gimple_assign_lhs (last_stmt);
2565
2566 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
2567 || !TYPE_UNSIGNED (TREE_TYPE (var)))
2568 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
2569 return NULL;
2570
2571 rhs_code = gimple_assign_rhs_code (last_stmt);
2572 if (CONVERT_EXPR_CODE_P (rhs_code))
2573 {
3b515af5 2574 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
2575 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
50f85e2e 2576 return NULL;
2577 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
2578 if (vectype == NULL_TREE)
2579 return NULL;
2580
4c0c783a 2581 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
50f85e2e 2582 return NULL;
2583
2584 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
2585 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2586 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2587 pattern_stmt
2588 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
2589 else
2590 pattern_stmt
2591 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
2592 *type_out = vectype;
2593 *type_in = vectype;
f1f41a6c 2594 stmts->safe_push (last_stmt);
6d8fb6cf 2595 if (dump_enabled_p ())
7bd765d4 2596 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2597 "vect_recog_bool_pattern: detected: ");
4c0c783a 2598
50f85e2e 2599 return pattern_stmt;
2600 }
d6152abc 2601 else if (rhs_code == SSA_NAME
2602 && STMT_VINFO_DATA_REF (stmt_vinfo))
2603 {
2604 stmt_vec_info pattern_stmt_info;
2605 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
2606 gcc_assert (vectype != NULL_TREE);
16c8a484 2607 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
2608 return NULL;
4c0c783a 2609 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
d6152abc 2610 return NULL;
2611
2612 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
2613 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
2614 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
2615 {
2616 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
2617 gimple cast_stmt
2618 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs, NULL_TREE);
6d741312 2619 new_pattern_def_seq (stmt_vinfo, cast_stmt);
d6152abc 2620 rhs = rhs2;
2621 }
2622 pattern_stmt
2623 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
4c0c783a 2624 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2625 bb_vinfo);
d6152abc 2626 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2627 STMT_VINFO_DATA_REF (pattern_stmt_info)
2628 = STMT_VINFO_DATA_REF (stmt_vinfo);
2629 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
2630 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
2631 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
2632 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
2633 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
2634 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
2635 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
2636 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3b515af5 2637 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
d6152abc 2638 *type_out = vectype;
2639 *type_in = vectype;
f1f41a6c 2640 stmts->safe_push (last_stmt);
6d8fb6cf 2641 if (dump_enabled_p ())
7bd765d4 2642 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2643 "vect_recog_bool_pattern: detected: ");
d6152abc 2644 return pattern_stmt;
2645 }
50f85e2e 2646 else
2647 return NULL;
2648}
2649
2650
45eea33f 2651/* Mark statements that are involved in a pattern. */
2652
2653static inline void
2654vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
2655 tree pattern_vectype)
2656{
2657 stmt_vec_info pattern_stmt_info, def_stmt_info;
2658 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
2659 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
4c0c783a 2660 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
45eea33f 2661 gimple def_stmt;
2662
45eea33f 2663 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
d6152abc 2664 if (pattern_stmt_info == NULL)
2665 {
4c0c783a 2666 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
2667 bb_vinfo);
d6152abc 2668 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
2669 }
2670 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
45eea33f 2671
2672 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
2673 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
d6152abc 2674 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
45eea33f 2675 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
2676 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
2677 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
18937389 2678 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
2679 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
2680 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
45eea33f 2681 {
18937389 2682 gimple_stmt_iterator si;
2683 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
2684 !gsi_end_p (si); gsi_next (&si))
84557284 2685 {
18937389 2686 def_stmt = gsi_stmt (si);
2687 def_stmt_info = vinfo_for_stmt (def_stmt);
2688 if (def_stmt_info == NULL)
2689 {
4c0c783a 2690 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
2691 bb_vinfo);
18937389 2692 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2693 }
2694 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
2695 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
2696 STMT_VINFO_DEF_TYPE (def_stmt_info)
2697 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
2698 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
2699 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
84557284 2700 }
45eea33f 2701 }
2702}
2703
48e1416a 2704/* Function vect_pattern_recog_1
4a61a337 2705
2706 Input:
2707 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
2708 computation pattern.
2709 STMT: A stmt from which the pattern search should start.
2710
2711 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
48e1416a 2712 expression that computes the same functionality and can be used to
2713 replace the sequence of stmts that are involved in the pattern.
4a61a337 2714
2715 Output:
48e1416a 2716 This function checks if the expression returned by PATTERN_RECOG_FUNC is
2717 supported in vector form by the target. We use 'TYPE_IN' to obtain the
2718 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4a61a337 2719 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
2720 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
2721 to the available target pattern.
2722
48e1416a 2723 This function also does some bookkeeping, as explained in the documentation
4a61a337 2724 for vect_recog_pattern. */
2725
2726static void
32497628 2727vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
2728 gimple_stmt_iterator si,
f1f41a6c 2729 vec<gimple> *stmts_to_replace)
4a61a337 2730{
75a70cf9 2731 gimple stmt = gsi_stmt (si), pattern_stmt;
0eee81bc 2732 stmt_vec_info stmt_info;
0eee81bc 2733 loop_vec_info loop_vinfo;
4a61a337 2734 tree pattern_vectype;
2735 tree type_in, type_out;
4a61a337 2736 enum tree_code code;
eefa05c8 2737 int i;
2738 gimple next;
4a61a337 2739
f1f41a6c 2740 stmts_to_replace->truncate (0);
2741 stmts_to_replace->quick_push (stmt);
929a4812 2742 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
75a70cf9 2743 if (!pattern_stmt)
48e1416a 2744 return;
2745
f1f41a6c 2746 stmt = stmts_to_replace->last ();
0eee81bc 2747 stmt_info = vinfo_for_stmt (stmt);
2748 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
2749
48e1416a 2750 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
2751 {
2752 /* No need to check target support (already checked by the pattern
2753 recognition function). */
b334cbba 2754 pattern_vectype = type_out ? type_out : type_in;
4a61a337 2755 }
2756 else
2757 {
8458f4ca 2758 enum machine_mode vec_mode;
4a61a337 2759 enum insn_code icode;
2760 optab optab;
2761
2762 /* Check target support */
b334cbba 2763 type_in = get_vectype_for_scalar_type (type_in);
2764 if (!type_in)
2765 return;
2766 if (type_out)
2767 type_out = get_vectype_for_scalar_type (type_out);
2768 else
2769 type_out = type_in;
ed67497f 2770 if (!type_out)
2771 return;
b334cbba 2772 pattern_vectype = type_out;
f031fa03 2773
75a70cf9 2774 if (is_gimple_assign (pattern_stmt))
2775 code = gimple_assign_rhs_code (pattern_stmt);
2776 else
2777 {
2778 gcc_assert (is_gimple_call (pattern_stmt));
2779 code = CALL_EXPR;
2780 }
2781
b334cbba 2782 optab = optab_for_tree_code (code, type_in, optab_default);
2783 vec_mode = TYPE_MODE (type_in);
4a61a337 2784 if (!optab
d6bf3b14 2785 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b334cbba 2786 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4a61a337 2787 return;
2788 }
2789
2790 /* Found a vectorizable pattern. */
6d8fb6cf 2791 if (dump_enabled_p ())
4a61a337 2792 {
7bd765d4 2793 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2794 "pattern recognized: ");
2795 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
4a61a337 2796 }
48e1416a 2797
75a70cf9 2798 /* Mark the stmts that are involved in the pattern. */
45eea33f 2799 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4a61a337 2800
eefa05c8 2801 /* Patterns cannot be vectorized using SLP, because they change the order of
2802 computation. */
4c0c783a 2803 if (loop_vinfo)
f1f41a6c 2804 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4c0c783a 2805 if (next == stmt)
f1f41a6c 2806 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
0187b74e 2807
45eea33f 2808 /* It is possible that additional pattern stmts are created and inserted in
2809 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
2810 relevant statements. */
f1f41a6c 2811 for (i = 0; stmts_to_replace->iterate (i, &stmt)
2812 && (unsigned) i < (stmts_to_replace->length () - 1);
0187b74e 2813 i++)
2814 {
2815 stmt_info = vinfo_for_stmt (stmt);
2816 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6d8fb6cf 2817 if (dump_enabled_p ())
0187b74e 2818 {
7bd765d4 2819 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2820 "additional pattern stmt: ");
2821 dump_gimple_stmt (MSG_OPTIMIZED_LOCATIONS, TDF_SLIM, pattern_stmt, 0);
0187b74e 2822 }
2823
45eea33f 2824 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
0187b74e 2825 }
4a61a337 2826}
2827
2828
2829/* Function vect_pattern_recog
2830
2831 Input:
2832 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
2833 computation idioms.
2834
cfdcf183 2835 Output - for each computation idiom that is detected we create a new stmt
2836 that provides the same functionality and that can be vectorized. We
4a61a337 2837 also record some information in the struct_stmt_info of the relevant
2838 stmts, as explained below:
2839
2840 At the entry to this function we have the following stmts, with the
2841 following initial value in the STMT_VINFO fields:
2842
2843 stmt in_pattern_p related_stmt vec_stmt
2844 S1: a_i = .... - - -
2845 S2: a_2 = ..use(a_i).. - - -
2846 S3: a_1 = ..use(a_2).. - - -
2847 S4: a_0 = ..use(a_1).. - - -
2848 S5: ... = ..use(a_0).. - - -
2849
2850 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
cfdcf183 2851 represented by a single stmt. We then:
2852 - create a new stmt S6 equivalent to the pattern (the stmt is not
2853 inserted into the code)
4a61a337 2854 - fill in the STMT_VINFO fields as follows:
2855
2856 in_pattern_p related_stmt vec_stmt
48e1416a 2857 S1: a_i = .... - - -
4a61a337 2858 S2: a_2 = ..use(a_i).. - - -
2859 S3: a_1 = ..use(a_2).. - - -
4a61a337 2860 S4: a_0 = ..use(a_1).. true S6 -
cfdcf183 2861 '---> S6: a_new = .... - S4 -
4a61a337 2862 S5: ... = ..use(a_0).. - - -
2863
2864 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
cfdcf183 2865 to each other through the RELATED_STMT field).
4a61a337 2866
2867 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
2868 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
2869 remain irrelevant unless used by stmts other than S4.
2870
2871 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
cfdcf183 2872 (because they are marked as irrelevant). It will vectorize S6, and record
8bf58742 2873 a pointer to the new vector stmt VS6 from S6 (as usual).
2874 S4 will be skipped, and S5 will be vectorized as usual:
4a61a337 2875
2876 in_pattern_p related_stmt vec_stmt
2877 S1: a_i = .... - - -
2878 S2: a_2 = ..use(a_i).. - - -
2879 S3: a_1 = ..use(a_2).. - - -
2880 > VS6: va_new = .... - - -
4a61a337 2881 S4: a_0 = ..use(a_1).. true S6 VS6
cfdcf183 2882 '---> S6: a_new = .... - S4 VS6
4a61a337 2883 > VS5: ... = ..vuse(va_new).. - - -
2884 S5: ... = ..use(a_0).. - - -
2885
cfdcf183 2886 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4a61a337 2887 elsewhere), and we'll end up with:
2888
48e1416a 2889 VS6: va_new = ....
8bf58742 2890 VS5: ... = ..vuse(va_new)..
2891
2892 In case of more than one pattern statements, e.g., widen-mult with
2893 intermediate type:
2894
2895 S1 a_t = ;
2896 S2 a_T = (TYPE) a_t;
2897 '--> S3: a_it = (interm_type) a_t;
2898 S4 prod_T = a_T * CONST;
2899 '--> S5: prod_T' = a_it w* CONST;
2900
2901 there may be other users of a_T outside the pattern. In that case S2 will
2902 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
2903 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
2904 be recorded in S3. */
4a61a337 2905
2906void
4c0c783a 2907vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
4a61a337 2908{
4c0c783a 2909 struct loop *loop;
eca8fccf 2910 basic_block *bbs;
4c0c783a 2911 unsigned int nbbs;
75a70cf9 2912 gimple_stmt_iterator si;
4a61a337 2913 unsigned int i, j;
32497628 2914 vect_recog_func_ptr vect_recog_func;
f1f41a6c 2915 vec<gimple> stmts_to_replace;
2916 stmts_to_replace.create (1);
4c0c783a 2917 gimple stmt;
4a61a337 2918
6d8fb6cf 2919 if (dump_enabled_p ())
7bd765d4 2920 dump_printf_loc (MSG_NOTE, vect_location,
2921 "=== vect_pattern_recog ===");
4a61a337 2922
4c0c783a 2923 if (loop_vinfo)
2924 {
2925 loop = LOOP_VINFO_LOOP (loop_vinfo);
2926 bbs = LOOP_VINFO_BBS (loop_vinfo);
2927 nbbs = loop->num_nodes;
2928 }
2929 else
2930 {
eca8fccf 2931 bbs = &BB_VINFO_BB (bb_vinfo);
4c0c783a 2932 nbbs = 1;
4c0c783a 2933 }
2934
4a61a337 2935 /* Scan through the loop stmts, applying the pattern recognition
2936 functions starting at each stmt visited: */
2937 for (i = 0; i < nbbs; i++)
2938 {
2939 basic_block bb = bbs[i];
75a70cf9 2940 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4a61a337 2941 {
4c0c783a 2942 if (bb_vinfo && (stmt = gsi_stmt (si))
2943 && vinfo_for_stmt (stmt)
2944 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
2945 continue;
2946
4a61a337 2947 /* Scan over all generic vect_recog_xxx_pattern functions. */
2948 for (j = 0; j < NUM_PATTERNS; j++)
2949 {
32497628 2950 vect_recog_func = vect_vect_recog_func_ptrs[j];
2951 vect_pattern_recog_1 (vect_recog_func, si,
929a4812 2952 &stmts_to_replace);
4a61a337 2953 }
2954 }
2955 }
929a4812 2956
f1f41a6c 2957 stmts_to_replace.release ();
4a61a337 2958}