]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
* gimple.h (gimple_build_assign_with_ops): Add unary arg overload.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
4a61a337 1/* Analysis Utilities for Loop Vectorization.
3aea1f79 2 Copyright (C) 2006-2014 Free Software Foundation, Inc.
4a61a337 3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
8c4c00c1 9Software Foundation; either version 3, or (at your option) any later
4a61a337 10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
4a61a337 20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
4a61a337 25#include "tree.h"
9ed99284 26#include "stor-layout.h"
4a61a337 27#include "target.h"
94ea8568 28#include "predict.h"
29#include "vec.h"
30#include "hashtab.h"
31#include "hash-set.h"
32#include "machmode.h"
33#include "hard-reg-set.h"
34#include "input.h"
35#include "function.h"
36#include "dominance.h"
4a61a337 37#include "basic-block.h"
ce084dfc 38#include "gimple-pretty-print.h"
bc61cadb 39#include "tree-ssa-alias.h"
40#include "internal-fn.h"
41#include "tree-eh.h"
42#include "gimple-expr.h"
43#include "is-a.h"
073c1fd5 44#include "gimple.h"
a8783bee 45#include "gimplify.h"
dcf1a1ec 46#include "gimple-iterator.h"
073c1fd5 47#include "gimple-ssa.h"
48#include "tree-phinodes.h"
49#include "ssa-iterators.h"
9ed99284 50#include "stringpool.h"
073c1fd5 51#include "tree-ssanames.h"
4a61a337 52#include "cfgloop.h"
53#include "expr.h"
34517c64 54#include "insn-codes.h"
4a61a337 55#include "optabs.h"
56#include "params.h"
57#include "tree-data-ref.h"
58#include "tree-vectorizer.h"
b9ed1410 59#include "recog.h" /* FIXME: for insn_data */
0b205f4c 60#include "diagnostic-core.h"
b9ed1410 61#include "dumpfile.h"
f7715905 62#include "builtins.h"
4a61a337 63
4a61a337 64/* Pattern recognition functions */
f1f41a6c 65static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
0187b74e 66 tree *);
f1f41a6c 67static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
0187b74e 68 tree *);
f1f41a6c 69static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
0187b74e 70 tree *);
a2287001 71static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
72 tree *);
f1f41a6c 73static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
74static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
45eea33f 75 tree *);
f1f41a6c 76static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
6083c152 77 tree *, tree *);
cb4792d2 78static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
f1f41a6c 79static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
40dfedc8 80 tree *, tree *);
f1f41a6c 81static gimple vect_recog_divmod_pattern (vec<gimple> *,
127cb1cd 82 tree *, tree *);
f1f41a6c 83static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
84557284 84 tree *, tree *);
f1f41a6c 85static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
4a61a337 86static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
87 vect_recog_widen_mult_pattern,
88 vect_recog_widen_sum_pattern,
c37bea13 89 vect_recog_dot_prod_pattern,
a2287001 90 vect_recog_sad_pattern,
45eea33f 91 vect_recog_pow_pattern,
6083c152 92 vect_recog_widen_shift_pattern,
da5b41a4 93 vect_recog_over_widening_pattern,
cb4792d2 94 vect_recog_rotate_pattern,
40dfedc8 95 vect_recog_vector_vector_shift_pattern,
127cb1cd 96 vect_recog_divmod_pattern,
50f85e2e 97 vect_recog_mixed_size_cond_pattern,
98 vect_recog_bool_pattern};
4a61a337 99
6d741312 100static inline void
101append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
102{
e0d98d5f 103 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
104 stmt);
6d741312 105}
106
107static inline void
108new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
109{
110 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
111 append_pattern_def_seq (stmt_info, stmt);
112}
113
33f33894 114/* Check whether STMT2 is in the same loop or basic block as STMT1.
115 Which of the two applies depends on whether we're currently doing
116 loop-based or basic-block-based vectorization, as determined by
117 the vinfo_for_stmt for STMT1 (which must be defined).
118
119 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
120 to be defined as well. */
121
122static bool
123vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
124{
125 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
126 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
127 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
128
129 if (!gimple_bb (stmt2))
130 return false;
131
132 if (loop_vinfo)
133 {
134 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
135 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
136 return false;
137 }
138 else
139 {
140 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
141 || gimple_code (stmt2) == GIMPLE_PHI)
142 return false;
143 }
144
145 gcc_assert (vinfo_for_stmt (stmt2));
146 return true;
147}
148
019bbf38 149/* If the LHS of DEF_STMT has a single use, and that statement is
150 in the same loop or basic block, return it. */
151
152static gimple
153vect_single_imm_use (gimple def_stmt)
154{
155 tree lhs = gimple_assign_lhs (def_stmt);
156 use_operand_p use_p;
157 gimple use_stmt;
158
159 if (!single_imm_use (lhs, &use_p, &use_stmt))
160 return NULL;
161
162 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
163 return NULL;
164
165 return use_stmt;
166}
167
087903db 168/* Check whether NAME, an ssa-name used in USE_STMT,
a2287001 169 is a result of a type promotion, such that:
4a61a337 170 DEF_STMT: NAME = NOP (name0)
0eee81bc 171 If CHECK_SIGN is TRUE, check that either both types are signed or both are
172 unsigned. */
4a61a337 173
174static bool
087903db 175type_conversion_p (tree name, gimple use_stmt, bool check_sign,
176 tree *orig_type, gimple *def_stmt, bool *promotion)
4a61a337 177{
178 tree dummy;
75a70cf9 179 gimple dummy_gimple;
4a61a337 180 loop_vec_info loop_vinfo;
181 stmt_vec_info stmt_vinfo;
4a61a337 182 tree type = TREE_TYPE (name);
183 tree oprnd0;
184 enum vect_def_type dt;
185 tree def;
4c0c783a 186 bb_vec_info bb_vinfo;
4a61a337 187
188 stmt_vinfo = vinfo_for_stmt (use_stmt);
189 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 190 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
191 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
192 &def, &dt))
4a61a337 193 return false;
194
f083cd24 195 if (dt != vect_internal_def
196 && dt != vect_external_def && dt != vect_constant_def)
4a61a337 197 return false;
198
087903db 199 if (!*def_stmt)
4a61a337 200 return false;
201
75a70cf9 202 if (!is_gimple_assign (*def_stmt))
4a61a337 203 return false;
204
087903db 205 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
4a61a337 206 return false;
207
75a70cf9 208 oprnd0 = gimple_assign_rhs1 (*def_stmt);
4a61a337 209
087903db 210 *orig_type = TREE_TYPE (oprnd0);
211 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
212 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
213 return false;
214
215 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
216 *promotion = true;
087903db 217 else
a2287001 218 *promotion = false;
4a61a337 219
bed8b93b 220 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
4c0c783a 221 bb_vinfo, &dummy_gimple, &dummy, &dt))
4a61a337 222 return false;
223
4a61a337 224 return true;
225}
226
75a70cf9 227/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
228 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
229
230static tree
231vect_recog_temp_ssa_var (tree type, gimple stmt)
232{
03d37e4e 233 return make_temp_ssa_name (type, stmt, "patt");
75a70cf9 234}
4a61a337 235
236/* Function vect_recog_dot_prod_pattern
237
238 Try to find the following pattern:
239
240 type x_t, y_t;
241 TYPE1 prod;
242 TYPE2 sum = init;
243 loop:
244 sum_0 = phi <init, sum_1>
245 S1 x_t = ...
246 S2 y_t = ...
247 S3 x_T = (TYPE1) x_t;
248 S4 y_T = (TYPE1) y_t;
249 S5 prod = x_T * y_T;
250 [S6 prod = (TYPE2) prod; #optional]
251 S7 sum_1 = prod + sum_0;
252
48e1416a 253 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
254 same size of 'TYPE1' or bigger. This is a special case of a reduction
4a61a337 255 computation.
48e1416a 256
4a61a337 257 Input:
258
0187b74e 259 * STMTS: Contains a stmt from which the pattern search begins. In the
260 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
261 will be detected.
4a61a337 262
263 Output:
264
265 * TYPE_IN: The type of the input arguments to the pattern.
266
267 * TYPE_OUT: The type of the output of this pattern.
268
269 * Return value: A new stmt that will be used to replace the sequence of
270 stmts that constitute the pattern. In this case it will be:
271 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
221e9a92 272
273 Note: The dot-prod idiom is a widening reduction pattern that is
274 vectorized without preserving all the intermediate results. It
275 produces only N/2 (widened) results (by summing up pairs of
276 intermediate results) rather than all N results. Therefore, we
277 cannot allow this pattern when we want to get all the results and in
278 the correct order (as is the case when this computation is in an
279 inner-loop nested in an outer-loop that us being vectorized). */
4a61a337 280
75a70cf9 281static gimple
f1f41a6c 282vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
0187b74e 283 tree *type_out)
4a61a337 284{
f1f41a6c 285 gimple stmt, last_stmt = (*stmts)[0];
4a61a337 286 tree oprnd0, oprnd1;
287 tree oprnd00, oprnd01;
0187b74e 288 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
4a61a337 289 tree type, half_type;
75a70cf9 290 gimple pattern_stmt;
4a61a337 291 tree prod_type;
221e9a92 292 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 293 struct loop *loop;
c86930b0 294 tree var;
087903db 295 bool promotion;
4a61a337 296
4c0c783a 297 if (!loop_info)
298 return NULL;
299
300 loop = LOOP_VINFO_LOOP (loop_info);
301
0187b74e 302 if (!is_gimple_assign (last_stmt))
4a61a337 303 return NULL;
304
0187b74e 305 type = gimple_expr_type (last_stmt);
4a61a337 306
48e1416a 307 /* Look for the following pattern
4a61a337 308 DX = (TYPE1) X;
309 DY = (TYPE1) Y;
48e1416a 310 DPROD = DX * DY;
4a61a337 311 DDPROD = (TYPE2) DPROD;
312 sum_1 = DDPROD + sum_0;
48e1416a 313 In which
4a61a337 314 - DX is double the size of X
315 - DY is double the size of Y
316 - DX, DY, DPROD all have the same type
317 - sum is the same size of DPROD or bigger
318 - sum has been recognized as a reduction variable.
319
320 This is equivalent to:
321 DPROD = X w* Y; #widen mult
322 sum_1 = DPROD w+ sum_0; #widen summation
323 or
324 DPROD = X w* Y; #widen mult
325 sum_1 = DPROD + sum_0; #summation
326 */
327
328 /* Starting from LAST_STMT, follow the defs of its uses in search
329 of the above pattern. */
330
0187b74e 331 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
4a61a337 332 return NULL;
333
334 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
335 {
336 /* Has been detected as widening-summation? */
337
338 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
75a70cf9 339 type = gimple_expr_type (stmt);
340 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
4a61a337 341 return NULL;
75a70cf9 342 oprnd0 = gimple_assign_rhs1 (stmt);
343 oprnd1 = gimple_assign_rhs2 (stmt);
4a61a337 344 half_type = TREE_TYPE (oprnd0);
345 }
346 else
347 {
75a70cf9 348 gimple def_stmt;
4a61a337 349
350 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
351 return NULL;
0187b74e 352 oprnd0 = gimple_assign_rhs1 (last_stmt);
353 oprnd1 = gimple_assign_rhs2 (last_stmt);
1ea6a73c 354 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
355 || !types_compatible_p (TREE_TYPE (oprnd1), type))
4a61a337 356 return NULL;
0187b74e 357 stmt = last_stmt;
4a61a337 358
087903db 359 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
360 &promotion)
361 && promotion)
4a61a337 362 {
363 stmt = def_stmt;
75a70cf9 364 oprnd0 = gimple_assign_rhs1 (stmt);
4a61a337 365 }
366 else
367 half_type = type;
368 }
369
0187b74e 370 /* So far so good. Since last_stmt was detected as a (summation) reduction,
4a61a337 371 we know that oprnd1 is the reduction variable (defined by a loop-header
372 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
373 Left to check that oprnd0 is defined by a (widen_)mult_expr */
1991698f 374 if (TREE_CODE (oprnd0) != SSA_NAME)
375 return NULL;
4a61a337 376
377 prod_type = half_type;
378 stmt = SSA_NAME_DEF_STMT (oprnd0);
4af22cd9 379
380 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
4ba74662 381 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
4af22cd9 382 return NULL;
383
48e1416a 384 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
e3e3ee4b 385 inside the loop (in case we are analyzing an outer-loop). */
75a70cf9 386 if (!is_gimple_assign (stmt))
48e1416a 387 return NULL;
4a61a337 388 stmt_vinfo = vinfo_for_stmt (stmt);
389 gcc_assert (stmt_vinfo);
f083cd24 390 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
23d9a167 391 return NULL;
75a70cf9 392 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
4a61a337 393 return NULL;
394 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
395 {
396 /* Has been detected as a widening multiplication? */
397
398 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
75a70cf9 399 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
4a61a337 400 return NULL;
401 stmt_vinfo = vinfo_for_stmt (stmt);
402 gcc_assert (stmt_vinfo);
f083cd24 403 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
75a70cf9 404 oprnd00 = gimple_assign_rhs1 (stmt);
405 oprnd01 = gimple_assign_rhs2 (stmt);
a16d0316 406 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
407 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
4a61a337 408 }
409 else
410 {
411 tree half_type0, half_type1;
75a70cf9 412 gimple def_stmt;
4a61a337 413 tree oprnd0, oprnd1;
414
75a70cf9 415 oprnd0 = gimple_assign_rhs1 (stmt);
416 oprnd1 = gimple_assign_rhs2 (stmt);
1ea6a73c 417 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
418 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
4a61a337 419 return NULL;
087903db 420 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
421 &promotion)
422 || !promotion)
4a61a337 423 return NULL;
75a70cf9 424 oprnd00 = gimple_assign_rhs1 (def_stmt);
f498d0cb 425 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
087903db 426 &promotion)
427 || !promotion)
4a61a337 428 return NULL;
75a70cf9 429 oprnd01 = gimple_assign_rhs1 (def_stmt);
1ea6a73c 430 if (!types_compatible_p (half_type0, half_type1))
4a61a337 431 return NULL;
432 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
433 return NULL;
434 }
435
436 half_type = TREE_TYPE (oprnd00);
437 *type_in = half_type;
438 *type_out = type;
48e1416a 439
4a61a337 440 /* Pattern detected. Create a stmt to be used to replace the pattern: */
75a70cf9 441 var = vect_recog_temp_ssa_var (type, NULL);
446e85eb 442 pattern_stmt = gimple_build_assign_with_ops (DOT_PROD_EXPR, var,
443 oprnd00, oprnd01, oprnd1);
48e1416a 444
6d8fb6cf 445 if (dump_enabled_p ())
4a61a337 446 {
b055bc88 447 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 448 "vect_recog_dot_prod_pattern: detected: ");
b055bc88 449 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
78bb46f5 450 dump_printf (MSG_NOTE, "\n");
4a61a337 451 }
221e9a92 452
453 /* We don't allow changing the order of the computation in the inner-loop
454 when doing outer-loop vectorization. */
0187b74e 455 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
221e9a92 456
75a70cf9 457 return pattern_stmt;
4a61a337 458}
48e1416a 459
0187b74e 460
a2287001 461/* Function vect_recog_sad_pattern
462
463 Try to find the following Sum of Absolute Difference (SAD) pattern:
464
465 type x_t, y_t;
466 signed TYPE1 diff, abs_diff;
467 TYPE2 sum = init;
468 loop:
469 sum_0 = phi <init, sum_1>
470 S1 x_t = ...
471 S2 y_t = ...
472 S3 x_T = (TYPE1) x_t;
473 S4 y_T = (TYPE1) y_t;
474 S5 diff = x_T - y_T;
475 S6 abs_diff = ABS_EXPR <diff>;
476 [S7 abs_diff = (TYPE2) abs_diff; #optional]
477 S8 sum_1 = abs_diff + sum_0;
478
479 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
480 same size of 'TYPE1' or bigger. This is a special case of a reduction
481 computation.
482
483 Input:
484
485 * STMTS: Contains a stmt from which the pattern search begins. In the
486 example, when this function is called with S8, the pattern
487 {S3,S4,S5,S6,S7,S8} will be detected.
488
489 Output:
490
491 * TYPE_IN: The type of the input arguments to the pattern.
492
493 * TYPE_OUT: The type of the output of this pattern.
494
495 * Return value: A new stmt that will be used to replace the sequence of
496 stmts that constitute the pattern. In this case it will be:
497 SAD_EXPR <x_t, y_t, sum_0>
498 */
499
500static gimple
501vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
502 tree *type_out)
503{
504 gimple last_stmt = (*stmts)[0];
505 tree sad_oprnd0, sad_oprnd1;
506 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
507 tree half_type;
508 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
509 struct loop *loop;
510 bool promotion;
511
512 if (!loop_info)
513 return NULL;
514
515 loop = LOOP_VINFO_LOOP (loop_info);
516
517 if (!is_gimple_assign (last_stmt))
518 return NULL;
519
520 tree sum_type = gimple_expr_type (last_stmt);
521
522 /* Look for the following pattern
523 DX = (TYPE1) X;
524 DY = (TYPE1) Y;
525 DDIFF = DX - DY;
526 DAD = ABS_EXPR <DDIFF>;
527 DDPROD = (TYPE2) DPROD;
528 sum_1 = DAD + sum_0;
529 In which
530 - DX is at least double the size of X
531 - DY is at least double the size of Y
532 - DX, DY, DDIFF, DAD all have the same type
533 - sum is the same size of DAD or bigger
534 - sum has been recognized as a reduction variable.
535
536 This is equivalent to:
537 DDIFF = X w- Y; #widen sub
538 DAD = ABS_EXPR <DDIFF>;
539 sum_1 = DAD w+ sum_0; #widen summation
540 or
541 DDIFF = X w- Y; #widen sub
542 DAD = ABS_EXPR <DDIFF>;
543 sum_1 = DAD + sum_0; #summation
544 */
545
546 /* Starting from LAST_STMT, follow the defs of its uses in search
547 of the above pattern. */
548
549 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
550 return NULL;
551
552 tree plus_oprnd0, plus_oprnd1;
553
554 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
555 {
556 /* Has been detected as widening-summation? */
557
558 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
559 sum_type = gimple_expr_type (stmt);
560 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
561 return NULL;
562 plus_oprnd0 = gimple_assign_rhs1 (stmt);
563 plus_oprnd1 = gimple_assign_rhs2 (stmt);
564 half_type = TREE_TYPE (plus_oprnd0);
565 }
566 else
567 {
568 gimple def_stmt;
569
570 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
571 return NULL;
572 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
573 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
574 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
575 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
576 return NULL;
577
578 /* The type conversion could be promotion, demotion,
579 or just signed -> unsigned. */
580 if (type_conversion_p (plus_oprnd0, last_stmt, false,
581 &half_type, &def_stmt, &promotion))
582 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
583 else
584 half_type = sum_type;
585 }
586
587 /* So far so good. Since last_stmt was detected as a (summation) reduction,
588 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
589 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
590 Then check that plus_oprnd0 is defined by an abs_expr. */
591
592 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
593 return NULL;
594
595 tree abs_type = half_type;
596 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
597
598 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
599 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
600 return NULL;
601
602 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
603 inside the loop (in case we are analyzing an outer-loop). */
604 if (!is_gimple_assign (abs_stmt))
605 return NULL;
606
607 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
608 gcc_assert (abs_stmt_vinfo);
609 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
610 return NULL;
611 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
612 return NULL;
613
614 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
615 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
616 return NULL;
617 if (TYPE_UNSIGNED (abs_type))
618 return NULL;
619
620 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
621
622 if (TREE_CODE (abs_oprnd) != SSA_NAME)
623 return NULL;
624
625 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
626
627 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
628 if (!gimple_bb (diff_stmt)
629 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
630 return NULL;
631
632 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
633 inside the loop (in case we are analyzing an outer-loop). */
634 if (!is_gimple_assign (diff_stmt))
635 return NULL;
636
637 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
638 gcc_assert (diff_stmt_vinfo);
639 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
640 return NULL;
641 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
642 return NULL;
643
644 tree half_type0, half_type1;
645 gimple def_stmt;
646
647 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
648 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
649
650 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
651 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
652 return NULL;
653 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
654 &half_type0, &def_stmt, &promotion)
655 || !promotion)
656 return NULL;
657 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
658
659 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
660 &half_type1, &def_stmt, &promotion)
661 || !promotion)
662 return NULL;
663 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
664
665 if (!types_compatible_p (half_type0, half_type1))
666 return NULL;
667 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
668 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
669 return NULL;
670
671 *type_in = TREE_TYPE (sad_oprnd0);
672 *type_out = sum_type;
673
674 /* Pattern detected. Create a stmt to be used to replace the pattern: */
675 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
676 gimple pattern_stmt = gimple_build_assign_with_ops
677 (SAD_EXPR, var, sad_oprnd0, sad_oprnd1, plus_oprnd1);
678
679 if (dump_enabled_p ())
680 {
681 dump_printf_loc (MSG_NOTE, vect_location,
682 "vect_recog_sad_pattern: detected: ");
683 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
684 dump_printf (MSG_NOTE, "\n");
685 }
686
687 /* We don't allow changing the order of the computation in the inner-loop
688 when doing outer-loop vectorization. */
689 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
690
691 return pattern_stmt;
692}
693
694
6083c152 695/* Handle widening operation by a constant. At the moment we support MULT_EXPR
696 and LSHIFT_EXPR.
697
698 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
699 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
0187b74e 700
701 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
6083c152 702 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
703 that satisfies the above restrictions, we can perform a widening opeartion
704 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
705 with a_it = (interm_type) a_t; */
0187b74e 706
707static bool
6083c152 708vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
709 tree const_oprnd, tree *oprnd,
f1f41a6c 710 vec<gimple> *stmts, tree type,
6083c152 711 tree *half_type, gimple def_stmt)
0187b74e 712{
03d37e4e 713 tree new_type, new_oprnd;
0187b74e 714 gimple new_stmt;
715
6083c152 716 if (code != MULT_EXPR && code != LSHIFT_EXPR)
717 return false;
718
719 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
720 || (code == LSHIFT_EXPR
721 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
722 != 1))
723 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
0187b74e 724 {
725 /* CONST_OPRND is a constant of HALF_TYPE. */
726 *oprnd = gimple_assign_rhs1 (def_stmt);
727 return true;
728 }
729
33f33894 730 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
731 return false;
732
733 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
0187b74e 734 return false;
735
6083c152 736 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
0187b74e 737 a type 2 times bigger than HALF_TYPE. */
738 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
739 TYPE_UNSIGNED (type));
6083c152 740 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
741 || (code == LSHIFT_EXPR
742 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
0187b74e 743 return false;
744
6083c152 745 /* Use NEW_TYPE for widening operation. */
0187b74e 746 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
747 {
748 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
749 /* Check if the already created pattern stmt is what we need. */
750 if (!is_gimple_assign (new_stmt)
d09ef31a 751 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
0187b74e 752 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != new_type)
753 return false;
754
f1f41a6c 755 stmts->safe_push (def_stmt);
0187b74e 756 *oprnd = gimple_assign_lhs (new_stmt);
757 }
758 else
759 {
760 /* Create a_T = (NEW_TYPE) a_t; */
761 *oprnd = gimple_assign_rhs1 (def_stmt);
03d37e4e 762 new_oprnd = make_ssa_name (new_type, NULL);
806413d2 763 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd, *oprnd);
0187b74e 764 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
f1f41a6c 765 stmts->safe_push (def_stmt);
0187b74e 766 *oprnd = new_oprnd;
767 }
768
769 *half_type = new_type;
770 return true;
771}
772
773
4a61a337 774/* Function vect_recog_widen_mult_pattern
775
776 Try to find the following pattern:
777
c9156338 778 type1 a_t;
779 type2 b_t;
4a61a337 780 TYPE a_T, b_T, prod_T;
781
782 S1 a_t = ;
783 S2 b_t = ;
784 S3 a_T = (TYPE) a_t;
785 S4 b_T = (TYPE) b_t;
786 S5 prod_T = a_T * b_T;
787
c9156338 788 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
4a61a337 789
c2c4377d 790 Also detect unsigned cases:
0eee81bc 791
c9156338 792 unsigned type1 a_t;
793 unsigned type2 b_t;
0eee81bc 794 unsigned TYPE u_prod_T;
795 TYPE a_T, b_T, prod_T;
796
797 S1 a_t = ;
798 S2 b_t = ;
799 S3 a_T = (TYPE) a_t;
800 S4 b_T = (TYPE) b_t;
801 S5 prod_T = a_T * b_T;
802 S6 u_prod_T = (unsigned TYPE) prod_T;
803
804 and multiplication by constants:
805
806 type a_t;
807 TYPE a_T, prod_T;
808
809 S1 a_t = ;
810 S3 a_T = (TYPE) a_t;
811 S5 prod_T = a_T * CONST;
812
0187b74e 813 A special case of multiplication by constants is when 'TYPE' is 4 times
814 bigger than 'type', but CONST fits an intermediate type 2 times smaller
815 than 'TYPE'. In that case we create an additional pattern stmt for S3
816 to create a variable of the intermediate type, and perform widen-mult
817 on the intermediate type as well:
818
819 type a_t;
820 interm_type a_it;
821 TYPE a_T, prod_T, prod_T';
822
823 S1 a_t = ;
824 S3 a_T = (TYPE) a_t;
825 '--> a_it = (interm_type) a_t;
826 S5 prod_T = a_T * CONST;
827 '--> prod_T' = a_it w* CONST;
4a61a337 828
0187b74e 829 Input/Output:
830
831 * STMTS: Contains a stmt from which the pattern search begins. In the
832 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
833 is detected. In case of unsigned widen-mult, the original stmt (S5) is
834 replaced with S6 in STMTS. In case of multiplication by a constant
835 of an intermediate type (the last case above), STMTS also contains S3
836 (inserted before S5).
4a61a337 837
838 Output:
839
840 * TYPE_IN: The type of the input arguments to the pattern.
841
0eee81bc 842 * TYPE_OUT: The type of the output of this pattern.
4a61a337 843
844 * Return value: A new stmt that will be used to replace the sequence of
0eee81bc 845 stmts that constitute the pattern. In this case it will be:
4a61a337 846 WIDEN_MULT <a_t, b_t>
c9156338 847 If the result of WIDEN_MULT needs to be converted to a larger type, the
848 returned stmt will be this type conversion stmt.
4a61a337 849*/
850
75a70cf9 851static gimple
f1f41a6c 852vect_recog_widen_mult_pattern (vec<gimple> *stmts,
0187b74e 853 tree *type_in, tree *type_out)
4a61a337 854{
f1f41a6c 855 gimple last_stmt = stmts->pop ();
75a70cf9 856 gimple def_stmt0, def_stmt1;
c6c91d61 857 tree oprnd0, oprnd1;
858 tree type, half_type0, half_type1;
c9156338 859 gimple new_stmt = NULL, pattern_stmt = NULL;
860 tree vectype, vecitype;
75a70cf9 861 tree var;
c6c91d61 862 enum tree_code dummy_code;
862bb3cd 863 int dummy_int;
f1f41a6c 864 vec<tree> dummy_vec;
6083c152 865 bool op1_ok;
087903db 866 bool promotion;
c6c91d61 867
0187b74e 868 if (!is_gimple_assign (last_stmt))
c6c91d61 869 return NULL;
870
0187b74e 871 type = gimple_expr_type (last_stmt);
c6c91d61 872
873 /* Starting from LAST_STMT, follow the defs of its uses in search
874 of the above pattern. */
875
0187b74e 876 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
c6c91d61 877 return NULL;
878
0187b74e 879 oprnd0 = gimple_assign_rhs1 (last_stmt);
880 oprnd1 = gimple_assign_rhs2 (last_stmt);
1ea6a73c 881 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
882 || !types_compatible_p (TREE_TYPE (oprnd1), type))
c6c91d61 883 return NULL;
884
0eee81bc 885 /* Check argument 0. */
087903db 886 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
887 &promotion)
888 || !promotion)
889 return NULL;
0eee81bc 890 /* Check argument 1. */
087903db 891 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
892 &def_stmt1, &promotion);
c6c91d61 893
087903db 894 if (op1_ok && promotion)
0eee81bc 895 {
896 oprnd0 = gimple_assign_rhs1 (def_stmt0);
897 oprnd1 = gimple_assign_rhs1 (def_stmt1);
898 }
6083c152 899 else
0eee81bc 900 {
0187b74e 901 if (TREE_CODE (oprnd1) == INTEGER_CST
0eee81bc 902 && TREE_CODE (half_type0) == INTEGER_TYPE
6083c152 903 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
904 &oprnd0, stmts, type,
905 &half_type0, def_stmt0))
d82cd73d 906 {
907 half_type1 = half_type0;
908 oprnd1 = fold_convert (half_type1, oprnd1);
909 }
0eee81bc 910 else
911 return NULL;
912 }
913
c9156338 914 /* If the two arguments have different sizes, convert the one with
915 the smaller type into the larger type. */
916 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
917 {
918 tree* oprnd = NULL;
919 gimple def_stmt = NULL;
920
921 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
922 {
923 def_stmt = def_stmt0;
924 half_type0 = half_type1;
925 oprnd = &oprnd0;
926 }
927 else
928 {
929 def_stmt = def_stmt1;
930 half_type1 = half_type0;
931 oprnd = &oprnd1;
932 }
933
934 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
935 tree new_oprnd = make_ssa_name (half_type0, NULL);
936 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
806413d2 937 old_oprnd);
c9156338 938 *oprnd = new_oprnd;
939 }
940
0eee81bc 941 /* Handle unsigned case. Look for
942 S6 u_prod_T = (unsigned TYPE) prod_T;
943 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
944 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
945 {
019bbf38 946 gimple use_stmt;
947 tree use_lhs;
0eee81bc 948 tree use_type;
949
950 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
951 return NULL;
952
019bbf38 953 use_stmt = vect_single_imm_use (last_stmt);
954 if (!use_stmt || !is_gimple_assign (use_stmt)
d09ef31a 955 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
0eee81bc 956 return NULL;
957
958 use_lhs = gimple_assign_lhs (use_stmt);
959 use_type = TREE_TYPE (use_lhs);
960 if (!INTEGRAL_TYPE_P (use_type)
961 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
962 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
963 return NULL;
964
965 type = use_type;
0187b74e 966 last_stmt = use_stmt;
0eee81bc 967 }
c6c91d61 968
1ea6a73c 969 if (!types_compatible_p (half_type0, half_type1))
c6c91d61 970 return NULL;
971
c9156338 972 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
973 to get an intermediate result of type ITYPE. In this case we need
974 to build a statement to convert this intermediate result to type TYPE. */
975 tree itype = type;
976 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
977 itype = build_nonstandard_integer_type
978 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
979 TYPE_UNSIGNED (type));
980
c6c91d61 981 /* Pattern detected. */
6d8fb6cf 982 if (dump_enabled_p ())
b055bc88 983 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 984 "vect_recog_widen_mult_pattern: detected:\n");
c6c91d61 985
986 /* Check target support */
987 vectype = get_vectype_for_scalar_type (half_type0);
c9156338 988 vecitype = get_vectype_for_scalar_type (itype);
f031fa03 989 if (!vectype
c9156338 990 || !vecitype
0187b74e 991 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
c9156338 992 vecitype, vectype,
087dde2d 993 &dummy_code, &dummy_code,
994 &dummy_int, &dummy_vec))
c6c91d61 995 return NULL;
996
997 *type_in = vectype;
c9156338 998 *type_out = get_vectype_for_scalar_type (type);
c6c91d61 999
1000 /* Pattern supported. Create a stmt to be used to replace the pattern: */
c9156338 1001 var = vect_recog_temp_ssa_var (itype, NULL);
75a70cf9 1002 pattern_stmt = gimple_build_assign_with_ops (WIDEN_MULT_EXPR, var, oprnd0,
1003 oprnd1);
75a70cf9 1004
c9156338 1005 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1006 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1007 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1008 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1009
1010 /* If the original two operands have different sizes, we may need to convert
1011 the smaller one into the larget type. If this is the case, at this point
1012 the new stmt is already built. */
1013 if (new_stmt)
1014 {
1015 append_pattern_def_seq (stmt_vinfo, new_stmt);
1016 stmt_vec_info new_stmt_info
1017 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1018 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1019 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1020 }
1021
1022 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1023 the result of the widen-mult operation into type TYPE. */
1024 if (itype != type)
1025 {
1026 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1027 stmt_vec_info pattern_stmt_info
1028 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1029 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1030 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
1031 pattern_stmt
1032 = gimple_build_assign_with_ops (NOP_EXPR,
1033 vect_recog_temp_ssa_var (type, NULL),
806413d2 1034 gimple_assign_lhs (pattern_stmt));
c9156338 1035 }
1036
6d8fb6cf 1037 if (dump_enabled_p ())
7bd765d4 1038 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
75a70cf9 1039
f1f41a6c 1040 stmts->safe_push (last_stmt);
75a70cf9 1041 return pattern_stmt;
4a61a337 1042}
1043
1044
c37bea13 1045/* Function vect_recog_pow_pattern
1046
1047 Try to find the following pattern:
1048
1049 x = POW (y, N);
1050
1051 with POW being one of pow, powf, powi, powif and N being
1052 either 2 or 0.5.
1053
1054 Input:
1055
1056 * LAST_STMT: A stmt from which the pattern search begins.
1057
1058 Output:
1059
1060 * TYPE_IN: The type of the input arguments to the pattern.
1061
1062 * TYPE_OUT: The type of the output of this pattern.
1063
1064 * Return value: A new stmt that will be used to replace the sequence of
1065 stmts that constitute the pattern. In this case it will be:
75a70cf9 1066 x = x * x
c37bea13 1067 or
75a70cf9 1068 x = sqrt (x)
c37bea13 1069*/
1070
75a70cf9 1071static gimple
f1f41a6c 1072vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
0187b74e 1073 tree *type_out)
c37bea13 1074{
f1f41a6c 1075 gimple last_stmt = (*stmts)[0];
75a70cf9 1076 tree fn, base, exp = NULL;
1077 gimple stmt;
1078 tree var;
c37bea13 1079
0187b74e 1080 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
c37bea13 1081 return NULL;
1082
0187b74e 1083 fn = gimple_call_fndecl (last_stmt);
1bc5f74f 1084 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1085 return NULL;
1086
c37bea13 1087 switch (DECL_FUNCTION_CODE (fn))
1088 {
1089 case BUILT_IN_POWIF:
1090 case BUILT_IN_POWI:
1091 case BUILT_IN_POWF:
1092 case BUILT_IN_POW:
0187b74e 1093 base = gimple_call_arg (last_stmt, 0);
1094 exp = gimple_call_arg (last_stmt, 1);
c37bea13 1095 if (TREE_CODE (exp) != REAL_CST
1096 && TREE_CODE (exp) != INTEGER_CST)
75a70cf9 1097 return NULL;
c37bea13 1098 break;
1099
75a70cf9 1100 default:
1101 return NULL;
c37bea13 1102 }
1103
1104 /* We now have a pow or powi builtin function call with a constant
1105 exponent. */
1106
c37bea13 1107 *type_out = NULL_TREE;
1108
1109 /* Catch squaring. */
e913b5cd 1110 if ((tree_fits_shwi_p (exp)
1111 && tree_to_shwi (exp) == 2)
c37bea13 1112 || (TREE_CODE (exp) == REAL_CST
1113 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
4957645d 1114 {
1115 *type_in = TREE_TYPE (base);
75a70cf9 1116
1117 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
1118 stmt = gimple_build_assign_with_ops (MULT_EXPR, var, base, base);
75a70cf9 1119 return stmt;
4957645d 1120 }
c37bea13 1121
1122 /* Catch square root. */
1123 if (TREE_CODE (exp) == REAL_CST
1124 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1125 {
1126 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
4957645d 1127 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1128 if (*type_in)
1129 {
75a70cf9 1130 gimple stmt = gimple_build_call (newfn, 1, base);
1131 if (vectorizable_function (stmt, *type_in, *type_in)
1132 != NULL_TREE)
1133 {
1134 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
48e1416a 1135 gimple_call_set_lhs (stmt, var);
75a70cf9 1136 return stmt;
1137 }
4957645d 1138 }
c37bea13 1139 }
1140
75a70cf9 1141 return NULL;
c37bea13 1142}
1143
1144
4a61a337 1145/* Function vect_recog_widen_sum_pattern
1146
1147 Try to find the following pattern:
1148
48e1416a 1149 type x_t;
4a61a337 1150 TYPE x_T, sum = init;
1151 loop:
1152 sum_0 = phi <init, sum_1>
1153 S1 x_t = *p;
1154 S2 x_T = (TYPE) x_t;
1155 S3 sum_1 = x_T + sum_0;
1156
48e1416a 1157 where type 'TYPE' is at least double the size of type 'type', i.e - we're
4a61a337 1158 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
9ca2c29a 1159 a special case of a reduction computation.
4a61a337 1160
1161 Input:
1162
1163 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1164 when this function is called with S3, the pattern {S2,S3} will be detected.
48e1416a 1165
4a61a337 1166 Output:
48e1416a 1167
4a61a337 1168 * TYPE_IN: The type of the input arguments to the pattern.
1169
1170 * TYPE_OUT: The type of the output of this pattern.
1171
1172 * Return value: A new stmt that will be used to replace the sequence of
1173 stmts that constitute the pattern. In this case it will be:
1174 WIDEN_SUM <x_t, sum_0>
221e9a92 1175
48e1416a 1176 Note: The widening-sum idiom is a widening reduction pattern that is
221e9a92 1177 vectorized without preserving all the intermediate results. It
48e1416a 1178 produces only N/2 (widened) results (by summing up pairs of
1179 intermediate results) rather than all N results. Therefore, we
1180 cannot allow this pattern when we want to get all the results and in
1181 the correct order (as is the case when this computation is in an
221e9a92 1182 inner-loop nested in an outer-loop that us being vectorized). */
4a61a337 1183
75a70cf9 1184static gimple
f1f41a6c 1185vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
0187b74e 1186 tree *type_out)
4a61a337 1187{
f1f41a6c 1188 gimple stmt, last_stmt = (*stmts)[0];
4a61a337 1189 tree oprnd0, oprnd1;
0187b74e 1190 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
4a61a337 1191 tree type, half_type;
75a70cf9 1192 gimple pattern_stmt;
221e9a92 1193 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 1194 struct loop *loop;
75a70cf9 1195 tree var;
087903db 1196 bool promotion;
4a61a337 1197
4c0c783a 1198 if (!loop_info)
1199 return NULL;
1200
1201 loop = LOOP_VINFO_LOOP (loop_info);
1202
0187b74e 1203 if (!is_gimple_assign (last_stmt))
4a61a337 1204 return NULL;
1205
0187b74e 1206 type = gimple_expr_type (last_stmt);
4a61a337 1207
1208 /* Look for the following pattern
1209 DX = (TYPE) X;
1210 sum_1 = DX + sum_0;
1211 In which DX is at least double the size of X, and sum_1 has been
1212 recognized as a reduction variable.
1213 */
1214
1215 /* Starting from LAST_STMT, follow the defs of its uses in search
1216 of the above pattern. */
1217
0187b74e 1218 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
4a61a337 1219 return NULL;
1220
1221 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_reduction_def)
1222 return NULL;
1223
0187b74e 1224 oprnd0 = gimple_assign_rhs1 (last_stmt);
1225 oprnd1 = gimple_assign_rhs2 (last_stmt);
1ea6a73c 1226 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1227 || !types_compatible_p (TREE_TYPE (oprnd1), type))
4a61a337 1228 return NULL;
1229
0187b74e 1230 /* So far so good. Since last_stmt was detected as a (summation) reduction,
4a61a337 1231 we know that oprnd1 is the reduction variable (defined by a loop-header
1232 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1233 Left to check that oprnd0 is defined by a cast from type 'type' to type
1234 'TYPE'. */
1235
087903db 1236 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1237 &promotion)
1238 || !promotion)
1239 return NULL;
4a61a337 1240
75a70cf9 1241 oprnd0 = gimple_assign_rhs1 (stmt);
4a61a337 1242 *type_in = half_type;
1243 *type_out = type;
1244
1245 /* Pattern detected. Create a stmt to be used to replace the pattern: */
75a70cf9 1246 var = vect_recog_temp_ssa_var (type, NULL);
1247 pattern_stmt = gimple_build_assign_with_ops (WIDEN_SUM_EXPR, var,
1248 oprnd0, oprnd1);
75a70cf9 1249
6d8fb6cf 1250 if (dump_enabled_p ())
4a61a337 1251 {
b055bc88 1252 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 1253 "vect_recog_widen_sum_pattern: detected: ");
b055bc88 1254 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
78bb46f5 1255 dump_printf (MSG_NOTE, "\n");
4a61a337 1256 }
221e9a92 1257
1258 /* We don't allow changing the order of the computation in the inner-loop
1259 when doing outer-loop vectorization. */
0187b74e 1260 gcc_assert (!nested_in_vect_loop_p (loop, last_stmt));
221e9a92 1261
75a70cf9 1262 return pattern_stmt;
4a61a337 1263}
1264
1265
45eea33f 1266/* Return TRUE if the operation in STMT can be performed on a smaller type.
1267
1268 Input:
1269 STMT - a statement to check.
1270 DEF - we support operations with two operands, one of which is constant.
1271 The other operand can be defined by a demotion operation, or by a
1272 previous statement in a sequence of over-promoted operations. In the
1273 later case DEF is used to replace that operand. (It is defined by a
1274 pattern statement we created for the previous statement in the
1275 sequence).
1276
1277 Input/output:
1278 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1279 NULL, it's the type of DEF.
1280 STMTS - additional pattern statements. If a pattern statement (type
1281 conversion) is created in this function, its original statement is
1282 added to STMTS.
1283
1284 Output:
1285 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1286 operands to use in the new pattern statement for STMT (will be created
1287 in vect_recog_over_widening_pattern ()).
1288 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1289 statements for STMT: the first one is a type promotion and the second
1290 one is the operation itself. We return the type promotion statement
18937389 1291 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
45eea33f 1292 the second pattern statement. */
1293
1294static bool
1295vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1296 tree *op0, tree *op1, gimple *new_def_stmt,
f1f41a6c 1297 vec<gimple> *stmts)
45eea33f 1298{
1299 enum tree_code code;
1300 tree const_oprnd, oprnd;
03d37e4e 1301 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
45eea33f 1302 gimple def_stmt, new_stmt;
1303 bool first = false;
087903db 1304 bool promotion;
4c0c783a 1305
bc4577c4 1306 *op0 = NULL_TREE;
1307 *op1 = NULL_TREE;
45eea33f 1308 *new_def_stmt = NULL;
1309
1310 if (!is_gimple_assign (stmt))
1311 return false;
1312
1313 code = gimple_assign_rhs_code (stmt);
1314 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1315 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1316 return false;
1317
1318 oprnd = gimple_assign_rhs1 (stmt);
1319 const_oprnd = gimple_assign_rhs2 (stmt);
1320 type = gimple_expr_type (stmt);
1321
1322 if (TREE_CODE (oprnd) != SSA_NAME
1323 || TREE_CODE (const_oprnd) != INTEGER_CST)
1324 return false;
1325
897c6e08 1326 /* If oprnd has other uses besides that in stmt we cannot mark it
1327 as being part of a pattern only. */
1328 if (!has_single_use (oprnd))
1329 return false;
1330
45eea33f 1331 /* If we are in the middle of a sequence, we use DEF from a previous
1332 statement. Otherwise, OPRND has to be a result of type promotion. */
1333 if (*new_type)
1334 {
1335 half_type = *new_type;
1336 oprnd = def;
1337 }
1338 else
1339 {
1340 first = true;
087903db 1341 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
33f33894 1342 &promotion)
1343 || !promotion
1344 || !vect_same_loop_or_bb_p (stmt, def_stmt))
45eea33f 1345 return false;
1346 }
1347
1348 /* Can we perform the operation on a smaller type? */
1349 switch (code)
1350 {
1351 case BIT_IOR_EXPR:
1352 case BIT_XOR_EXPR:
1353 case BIT_AND_EXPR:
1354 if (!int_fits_type_p (const_oprnd, half_type))
1355 {
1356 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1357 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1358 return false;
1359
1360 interm_type = build_nonstandard_integer_type (
1361 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1362 if (!int_fits_type_p (const_oprnd, interm_type))
1363 return false;
1364 }
1365
1366 break;
1367
1368 case LSHIFT_EXPR:
1369 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1370 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1371 return false;
1372
1373 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1374 (e.g., if the original value was char, the shift amount is at most 8
1375 if we want to use short). */
1376 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1377 return false;
1378
1379 interm_type = build_nonstandard_integer_type (
1380 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1381
1382 if (!vect_supportable_shift (code, interm_type))
1383 return false;
1384
1385 break;
1386
1387 case RSHIFT_EXPR:
1388 if (vect_supportable_shift (code, half_type))
1389 break;
1390
1391 /* Try intermediate type - HALF_TYPE is not supported. */
1392 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1393 return false;
1394
1395 interm_type = build_nonstandard_integer_type (
1396 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1397
1398 if (!vect_supportable_shift (code, interm_type))
1399 return false;
1400
1401 break;
1402
1403 default:
1404 gcc_unreachable ();
1405 }
1406
1407 /* There are four possible cases:
1408 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1409 the first statement in the sequence)
1410 a. The original, HALF_TYPE, is not enough - we replace the promotion
1411 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1412 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1413 promotion.
1414 2. OPRND is defined by a pattern statement we created.
1415 a. Its type is not sufficient for the operation, we create a new stmt:
1416 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1417 this statement in NEW_DEF_STMT, and it is later put in
18937389 1418 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
45eea33f 1419 b. OPRND is good to use in the new statement. */
1420 if (first)
1421 {
1422 if (interm_type)
1423 {
1424 /* Replace the original type conversion HALF_TYPE->TYPE with
1425 HALF_TYPE->INTERM_TYPE. */
1426 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1427 {
1428 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1429 /* Check if the already created pattern stmt is what we need. */
1430 if (!is_gimple_assign (new_stmt)
d09ef31a 1431 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
45eea33f 1432 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1433 return false;
1434
f1f41a6c 1435 stmts->safe_push (def_stmt);
45eea33f 1436 oprnd = gimple_assign_lhs (new_stmt);
1437 }
1438 else
1439 {
1440 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1441 oprnd = gimple_assign_rhs1 (def_stmt);
03d37e4e 1442 new_oprnd = make_ssa_name (interm_type, NULL);
45eea33f 1443 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
806413d2 1444 oprnd);
45eea33f 1445 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
f1f41a6c 1446 stmts->safe_push (def_stmt);
45eea33f 1447 oprnd = new_oprnd;
1448 }
1449 }
1450 else
1451 {
1452 /* Retrieve the operand before the type promotion. */
1453 oprnd = gimple_assign_rhs1 (def_stmt);
1454 }
1455 }
1456 else
1457 {
1458 if (interm_type)
1459 {
1460 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
03d37e4e 1461 new_oprnd = make_ssa_name (interm_type, NULL);
45eea33f 1462 new_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
806413d2 1463 oprnd);
45eea33f 1464 oprnd = new_oprnd;
1465 *new_def_stmt = new_stmt;
1466 }
1467
1468 /* Otherwise, OPRND is already set. */
1469 }
1470
1471 if (interm_type)
1472 *new_type = interm_type;
1473 else
1474 *new_type = half_type;
1475
1476 *op0 = oprnd;
1477 *op1 = fold_convert (*new_type, const_oprnd);
1478
1479 return true;
1480}
1481
1482
1483/* Try to find a statement or a sequence of statements that can be performed
1484 on a smaller type:
1485
1486 type x_t;
1487 TYPE x_T, res0_T, res1_T;
1488 loop:
1489 S1 x_t = *p;
1490 S2 x_T = (TYPE) x_t;
1491 S3 res0_T = op (x_T, C0);
1492 S4 res1_T = op (res0_T, C1);
1493 S5 ... = () res1_T; - type demotion
1494
1495 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1496 constants.
1497 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1498 be 'type' or some intermediate type. For now, we expect S5 to be a type
50f85e2e 1499 demotion operation. We also check that S3 and S4 have only one use. */
45eea33f 1500
45eea33f 1501static gimple
f1f41a6c 1502vect_recog_over_widening_pattern (vec<gimple> *stmts,
45eea33f 1503 tree *type_in, tree *type_out)
1504{
f1f41a6c 1505 gimple stmt = stmts->pop ();
45eea33f 1506 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
019bbf38 1507 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
03d37e4e 1508 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
45eea33f 1509 bool first;
6a5f222a 1510 tree type = NULL;
45eea33f 1511
1512 first = true;
1513 while (1)
1514 {
1515 if (!vinfo_for_stmt (stmt)
1516 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1517 return NULL;
1518
1519 new_def_stmt = NULL;
1520 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1521 &op0, &op1, &new_def_stmt,
1522 stmts))
1523 {
1524 if (first)
1525 return NULL;
1526 else
1527 break;
1528 }
1529
1530 /* STMT can be performed on a smaller type. Check its uses. */
019bbf38 1531 use_stmt = vect_single_imm_use (stmt);
1532 if (!use_stmt || !is_gimple_assign (use_stmt))
45eea33f 1533 return NULL;
1534
1535 /* Create pattern statement for STMT. */
1536 vectype = get_vectype_for_scalar_type (new_type);
1537 if (!vectype)
1538 return NULL;
1539
1540 /* We want to collect all the statements for which we create pattern
1541 statetments, except for the case when the last statement in the
1542 sequence doesn't have a corresponding pattern statement. In such
1543 case we associate the last pattern statement with the last statement
6083c152 1544 in the sequence. Therefore, we only add the original statement to
45eea33f 1545 the list if we know that it is not the last. */
1546 if (prev_stmt)
f1f41a6c 1547 stmts->safe_push (prev_stmt);
45eea33f 1548
1549 var = vect_recog_temp_ssa_var (new_type, NULL);
f6bc1dff 1550 pattern_stmt
1551 = gimple_build_assign_with_ops (gimple_assign_rhs_code (stmt), var,
1552 op0, op1);
45eea33f 1553 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
6d741312 1554 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
45eea33f 1555
6d8fb6cf 1556 if (dump_enabled_p ())
45eea33f 1557 {
b055bc88 1558 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 1559 "created pattern stmt: ");
b055bc88 1560 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
78bb46f5 1561 dump_printf (MSG_NOTE, "\n");
45eea33f 1562 }
1563
6a5f222a 1564 type = gimple_expr_type (stmt);
45eea33f 1565 prev_stmt = stmt;
1566 stmt = use_stmt;
1567
1568 first = false;
1569 }
1570
1571 /* We got a sequence. We expect it to end with a type demotion operation.
1572 Otherwise, we quit (for now). There are three possible cases: the
1573 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1574 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1575 NEW_TYPE differs (we create a new conversion statement). */
1576 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1577 {
1578 use_lhs = gimple_assign_lhs (use_stmt);
1579 use_type = TREE_TYPE (use_lhs);
6175d24a 1580 /* Support only type demotion or signedess change. */
45eea33f 1581 if (!INTEGRAL_TYPE_P (use_type)
6175d24a 1582 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
45eea33f 1583 return NULL;
1584
6175d24a 1585 /* Check that NEW_TYPE is not bigger than the conversion result. */
1586 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1587 return NULL;
1588
45eea33f 1589 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1590 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1591 {
1592 /* Create NEW_TYPE->USE_TYPE conversion. */
03d37e4e 1593 new_oprnd = make_ssa_name (use_type, NULL);
45eea33f 1594 pattern_stmt = gimple_build_assign_with_ops (NOP_EXPR, new_oprnd,
806413d2 1595 var);
45eea33f 1596 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1597
1598 *type_in = get_vectype_for_scalar_type (new_type);
1599 *type_out = get_vectype_for_scalar_type (use_type);
1600
1601 /* We created a pattern statement for the last statement in the
1602 sequence, so we don't need to associate it with the pattern
1603 statement created for PREV_STMT. Therefore, we add PREV_STMT
1604 to the list in order to mark it later in vect_pattern_recog_1. */
1605 if (prev_stmt)
f1f41a6c 1606 stmts->safe_push (prev_stmt);
45eea33f 1607 }
1608 else
1609 {
1610 if (prev_stmt)
18937389 1611 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1612 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
45eea33f 1613
1614 *type_in = vectype;
1615 *type_out = NULL_TREE;
1616 }
1617
f1f41a6c 1618 stmts->safe_push (use_stmt);
45eea33f 1619 }
1620 else
1621 /* TODO: support general case, create a conversion to the correct type. */
1622 return NULL;
1623
1624 /* Pattern detected. */
6d8fb6cf 1625 if (dump_enabled_p ())
45eea33f 1626 {
b055bc88 1627 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 1628 "vect_recog_over_widening_pattern: detected: ");
b055bc88 1629 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
78bb46f5 1630 dump_printf (MSG_NOTE, "\n");
45eea33f 1631 }
1632
1633 return pattern_stmt;
1634}
1635
6083c152 1636/* Detect widening shift pattern:
1637
1638 type a_t;
1639 TYPE a_T, res_T;
1640
1641 S1 a_t = ;
1642 S2 a_T = (TYPE) a_t;
1643 S3 res_T = a_T << CONST;
1644
1645 where type 'TYPE' is at least double the size of type 'type'.
1646
da5b41a4 1647 Also detect cases where the shift result is immediately converted
1648 to another type 'result_type' that is no larger in size than 'TYPE'.
1649 In those cases we perform a widen-shift that directly results in
1650 'result_type', to avoid a possible over-widening situation:
6083c152 1651
da5b41a4 1652 type a_t;
6083c152 1653 TYPE a_T, res_T;
da5b41a4 1654 result_type res_result;
6083c152 1655
1656 S1 a_t = ;
1657 S2 a_T = (TYPE) a_t;
1658 S3 res_T = a_T << CONST;
da5b41a4 1659 S4 res_result = (result_type) res_T;
1660 '--> res_result' = a_t w<< CONST;
6083c152 1661
1662 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1663 create an additional pattern stmt for S2 to create a variable of an
1664 intermediate type, and perform widen-shift on the intermediate type:
1665
1666 type a_t;
1667 interm_type a_it;
1668 TYPE a_T, res_T, res_T';
1669
1670 S1 a_t = ;
1671 S2 a_T = (TYPE) a_t;
1672 '--> a_it = (interm_type) a_t;
1673 S3 res_T = a_T << CONST;
1674 '--> res_T' = a_it <<* CONST;
1675
1676 Input/Output:
1677
1678 * STMTS: Contains a stmt from which the pattern search begins.
1679 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1680 in STMTS. When an intermediate type is used and a pattern statement is
1681 created for S2, we also put S2 here (before S3).
1682
1683 Output:
1684
1685 * TYPE_IN: The type of the input arguments to the pattern.
1686
1687 * TYPE_OUT: The type of the output of this pattern.
1688
1689 * Return value: A new stmt that will be used to replace the sequence of
1690 stmts that constitute the pattern. In this case it will be:
1691 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1692
1693static gimple
f1f41a6c 1694vect_recog_widen_shift_pattern (vec<gimple> *stmts,
6083c152 1695 tree *type_in, tree *type_out)
1696{
f1f41a6c 1697 gimple last_stmt = stmts->pop ();
6083c152 1698 gimple def_stmt0;
1699 tree oprnd0, oprnd1;
1700 tree type, half_type0;
da5b41a4 1701 gimple pattern_stmt;
6083c152 1702 tree vectype, vectype_out = NULL_TREE;
6083c152 1703 tree var;
1704 enum tree_code dummy_code;
1705 int dummy_int;
f1f41a6c 1706 vec<tree> dummy_vec;
da5b41a4 1707 gimple use_stmt;
087903db 1708 bool promotion;
6083c152 1709
1710 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1711 return NULL;
1712
6083c152 1713 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
da5b41a4 1714 return NULL;
6083c152 1715
1716 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1717 return NULL;
1718
1719 oprnd0 = gimple_assign_rhs1 (last_stmt);
1720 oprnd1 = gimple_assign_rhs2 (last_stmt);
1721 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1722 return NULL;
1723
1724 /* Check operand 0: it has to be defined by a type promotion. */
087903db 1725 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
1726 &promotion)
1727 || !promotion)
1728 return NULL;
6083c152 1729
1730 /* Check operand 1: has to be positive. We check that it fits the type
1731 in vect_handle_widen_op_by_const (). */
1732 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1733 return NULL;
1734
1735 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1736 type = gimple_expr_type (last_stmt);
1737
da5b41a4 1738 /* Check for subsequent conversion to another type. */
1739 use_stmt = vect_single_imm_use (last_stmt);
1740 if (use_stmt && is_gimple_assign (use_stmt)
1741 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1742 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1743 {
1744 tree use_lhs = gimple_assign_lhs (use_stmt);
1745 tree use_type = TREE_TYPE (use_lhs);
1746
1747 if (INTEGRAL_TYPE_P (use_type)
1748 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1749 {
1750 last_stmt = use_stmt;
1751 type = use_type;
1752 }
1753 }
1754
6083c152 1755 /* Check if this a widening operation. */
1756 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
1757 &oprnd0, stmts,
1758 type, &half_type0, def_stmt0))
1759 return NULL;
1760
6083c152 1761 /* Pattern detected. */
6d8fb6cf 1762 if (dump_enabled_p ())
b055bc88 1763 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 1764 "vect_recog_widen_shift_pattern: detected:\n");
6083c152 1765
1766 /* Check target support. */
1767 vectype = get_vectype_for_scalar_type (half_type0);
1768 vectype_out = get_vectype_for_scalar_type (type);
1769
1770 if (!vectype
1771 || !vectype_out
1772 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1773 vectype_out, vectype,
087dde2d 1774 &dummy_code, &dummy_code,
1775 &dummy_int, &dummy_vec))
6083c152 1776 return NULL;
1777
1778 *type_in = vectype;
1779 *type_out = vectype_out;
1780
1781 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1782 var = vect_recog_temp_ssa_var (type, NULL);
1783 pattern_stmt =
1784 gimple_build_assign_with_ops (WIDEN_LSHIFT_EXPR, var, oprnd0, oprnd1);
1785
6d8fb6cf 1786 if (dump_enabled_p ())
7bd765d4 1787 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
cb4792d2 1788
1789 stmts->safe_push (last_stmt);
1790 return pattern_stmt;
1791}
1792
1793/* Detect a rotate pattern wouldn't be otherwise vectorized:
1794
1795 type a_t, b_t, c_t;
1796
1797 S0 a_t = b_t r<< c_t;
1798
1799 Input/Output:
1800
1801 * STMTS: Contains a stmt from which the pattern search begins,
1802 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1803 with a sequence:
1804
1805 S1 d_t = -c_t;
1806 S2 e_t = d_t & (B - 1);
1807 S3 f_t = b_t << c_t;
1808 S4 g_t = b_t >> e_t;
1809 S0 a_t = f_t | g_t;
1810
1811 where B is element bitsize of type.
1812
1813 Output:
1814
1815 * TYPE_IN: The type of the input arguments to the pattern.
1816
1817 * TYPE_OUT: The type of the output of this pattern.
1818
1819 * Return value: A new stmt that will be used to replace the rotate
1820 S0 stmt. */
1821
1822static gimple
1823vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1824{
1825 gimple last_stmt = stmts->pop ();
1826 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1827 gimple pattern_stmt, def_stmt;
1828 enum tree_code rhs_code;
1829 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1830 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1831 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1832 enum vect_def_type dt;
1833 optab optab1, optab2;
0a015fc7 1834 edge ext_def = NULL;
cb4792d2 1835
1836 if (!is_gimple_assign (last_stmt))
1837 return NULL;
1838
1839 rhs_code = gimple_assign_rhs_code (last_stmt);
1840 switch (rhs_code)
1841 {
1842 case LROTATE_EXPR:
1843 case RROTATE_EXPR:
1844 break;
1845 default:
1846 return NULL;
1847 }
1848
1849 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1850 return NULL;
1851
1852 lhs = gimple_assign_lhs (last_stmt);
1853 oprnd0 = gimple_assign_rhs1 (last_stmt);
1854 type = TREE_TYPE (oprnd0);
1855 oprnd1 = gimple_assign_rhs2 (last_stmt);
1856 if (TREE_CODE (oprnd0) != SSA_NAME
1857 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1858 || !INTEGRAL_TYPE_P (type)
1859 || !TYPE_UNSIGNED (type))
1860 return NULL;
1861
1862 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1863 &def, &dt))
1864 return NULL;
1865
1866 if (dt != vect_internal_def
1867 && dt != vect_constant_def
1868 && dt != vect_external_def)
1869 return NULL;
1870
1871 vectype = get_vectype_for_scalar_type (type);
1872 if (vectype == NULL_TREE)
1873 return NULL;
1874
1875 /* If vector/vector or vector/scalar rotate is supported by the target,
1876 don't do anything here. */
1877 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1878 if (optab1
1879 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1880 return NULL;
1881
1882 if (bb_vinfo != NULL || dt != vect_internal_def)
1883 {
1884 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1885 if (optab2
1886 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1887 return NULL;
1888 }
1889
1890 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1891 don't do anything here either. */
1892 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1893 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1894 if (!optab1
1895 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1896 || !optab2
1897 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1898 {
1899 if (bb_vinfo == NULL && dt == vect_internal_def)
1900 return NULL;
1901 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1902 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1903 if (!optab1
1904 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1905 || !optab2
1906 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1907 return NULL;
1908 }
1909
1910 *type_in = vectype;
1911 *type_out = vectype;
1912 if (*type_in == NULL_TREE)
1913 return NULL;
1914
0a015fc7 1915 if (dt == vect_external_def
1916 && TREE_CODE (oprnd1) == SSA_NAME
1917 && loop_vinfo)
1918 {
1919 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1920 ext_def = loop_preheader_edge (loop);
1921 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1922 {
1923 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1924 if (bb == NULL
1925 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1926 ext_def = NULL;
1927 }
1928 }
1929
cb4792d2 1930 def = NULL_TREE;
1931 if (TREE_CODE (oprnd1) == INTEGER_CST
1932 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1933 def = oprnd1;
1934 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1935 {
1936 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1937 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1938 && TYPE_PRECISION (TREE_TYPE (rhs1))
1939 == TYPE_PRECISION (type))
1940 def = rhs1;
1941 }
1942
1943 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1944 if (def == NULL_TREE)
1945 {
1946 def = vect_recog_temp_ssa_var (type, NULL);
806413d2 1947 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1);
0a015fc7 1948 if (ext_def)
1949 {
1950 basic_block new_bb
1951 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1952 gcc_assert (!new_bb);
1953 }
1954 else
1955 append_pattern_def_seq (stmt_vinfo, def_stmt);
cb4792d2 1956 }
1957 stype = TREE_TYPE (def);
1958
1959 if (TREE_CODE (def) == INTEGER_CST)
1960 {
e913b5cd 1961 if (!tree_fits_uhwi_p (def)
aa59f000 1962 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
cb4792d2 1963 || integer_zerop (def))
1964 return NULL;
1965 def2 = build_int_cst (stype,
1966 GET_MODE_PRECISION (TYPE_MODE (type))
e913b5cd 1967 - tree_to_uhwi (def));
cb4792d2 1968 }
1969 else
1970 {
1971 tree vecstype = get_vectype_for_scalar_type (stype);
1972 stmt_vec_info def_stmt_vinfo;
1973
1974 if (vecstype == NULL_TREE)
1975 return NULL;
1976 def2 = vect_recog_temp_ssa_var (stype, NULL);
806413d2 1977 def_stmt = gimple_build_assign_with_ops (NEGATE_EXPR, def2, def);
0a015fc7 1978 if (ext_def)
1979 {
1980 basic_block new_bb
1981 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1982 gcc_assert (!new_bb);
1983 }
1984 else
1985 {
1986 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1987 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1988 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1989 append_pattern_def_seq (stmt_vinfo, def_stmt);
1990 }
cb4792d2 1991
1992 def2 = vect_recog_temp_ssa_var (stype, NULL);
1993 tree mask
1994 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
1995 def_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, def2,
1996 gimple_assign_lhs (def_stmt),
1997 mask);
0a015fc7 1998 if (ext_def)
1999 {
2000 basic_block new_bb
2001 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
2002 gcc_assert (!new_bb);
2003 }
2004 else
2005 {
2006 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2007 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2008 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2009 append_pattern_def_seq (stmt_vinfo, def_stmt);
2010 }
cb4792d2 2011 }
2012
2013 var1 = vect_recog_temp_ssa_var (type, NULL);
2014 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2015 ? LSHIFT_EXPR : RSHIFT_EXPR,
2016 var1, oprnd0, def);
2017 append_pattern_def_seq (stmt_vinfo, def_stmt);
2018
2019 var2 = vect_recog_temp_ssa_var (type, NULL);
2020 def_stmt = gimple_build_assign_with_ops (rhs_code == LROTATE_EXPR
2021 ? RSHIFT_EXPR : LSHIFT_EXPR,
2022 var2, oprnd0, def2);
2023 append_pattern_def_seq (stmt_vinfo, def_stmt);
2024
2025 /* Pattern detected. */
2026 if (dump_enabled_p ())
2027 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2028 "vect_recog_rotate_pattern: detected:\n");
cb4792d2 2029
2030 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2031 var = vect_recog_temp_ssa_var (type, NULL);
2032 pattern_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, var, var1, var2);
2033
2034 if (dump_enabled_p ())
2035 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
6083c152 2036
f1f41a6c 2037 stmts->safe_push (last_stmt);
6083c152 2038 return pattern_stmt;
2039}
45eea33f 2040
40dfedc8 2041/* Detect a vector by vector shift pattern that wouldn't be otherwise
2042 vectorized:
2043
2044 type a_t;
2045 TYPE b_T, res_T;
2046
2047 S1 a_t = ;
2048 S2 b_T = ;
2049 S3 res_T = b_T op a_t;
2050
2051 where type 'TYPE' is a type with different size than 'type',
2052 and op is <<, >> or rotate.
2053
2054 Also detect cases:
2055
2056 type a_t;
2057 TYPE b_T, c_T, res_T;
2058
2059 S0 c_T = ;
2060 S1 a_t = (type) c_T;
2061 S2 b_T = ;
2062 S3 res_T = b_T op a_t;
2063
2064 Input/Output:
2065
2066 * STMTS: Contains a stmt from which the pattern search begins,
2067 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2068 with a shift/rotate which has same type on both operands, in the
2069 second case just b_T op c_T, in the first case with added cast
18937389 2070 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
40dfedc8 2071
2072 Output:
2073
2074 * TYPE_IN: The type of the input arguments to the pattern.
2075
2076 * TYPE_OUT: The type of the output of this pattern.
2077
2078 * Return value: A new stmt that will be used to replace the shift/rotate
2079 S3 stmt. */
2080
2081static gimple
f1f41a6c 2082vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
40dfedc8 2083 tree *type_in, tree *type_out)
2084{
f1f41a6c 2085 gimple last_stmt = stmts->pop ();
40dfedc8 2086 tree oprnd0, oprnd1, lhs, var;
2087 gimple pattern_stmt, def_stmt;
2088 enum tree_code rhs_code;
2089 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2090 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 2091 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
40dfedc8 2092 enum vect_def_type dt;
2093 tree def;
2094
2095 if (!is_gimple_assign (last_stmt))
2096 return NULL;
2097
2098 rhs_code = gimple_assign_rhs_code (last_stmt);
2099 switch (rhs_code)
2100 {
2101 case LSHIFT_EXPR:
2102 case RSHIFT_EXPR:
2103 case LROTATE_EXPR:
2104 case RROTATE_EXPR:
2105 break;
2106 default:
2107 return NULL;
2108 }
2109
2110 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2111 return NULL;
2112
2113 lhs = gimple_assign_lhs (last_stmt);
2114 oprnd0 = gimple_assign_rhs1 (last_stmt);
2115 oprnd1 = gimple_assign_rhs2 (last_stmt);
2116 if (TREE_CODE (oprnd0) != SSA_NAME
2117 || TREE_CODE (oprnd1) != SSA_NAME
2118 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2119 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2120 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2121 || TYPE_PRECISION (TREE_TYPE (lhs))
2122 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2123 return NULL;
2124
4c0c783a 2125 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
bed8b93b 2126 &def, &dt))
40dfedc8 2127 return NULL;
2128
2129 if (dt != vect_internal_def)
2130 return NULL;
2131
2132 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2133 *type_out = *type_in;
2134 if (*type_in == NULL_TREE)
2135 return NULL;
2136
2137 def = NULL_TREE;
2138 if (gimple_assign_cast_p (def_stmt))
2139 {
2140 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2141 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2142 && TYPE_PRECISION (TREE_TYPE (rhs1))
2143 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2144 def = rhs1;
2145 }
2146
2147 if (def == NULL_TREE)
2148 {
2149 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
806413d2 2150 def_stmt = gimple_build_assign_with_ops (NOP_EXPR, def, oprnd1);
6d741312 2151 new_pattern_def_seq (stmt_vinfo, def_stmt);
40dfedc8 2152 }
2153
2154 /* Pattern detected. */
6d8fb6cf 2155 if (dump_enabled_p ())
b055bc88 2156 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2157 "vect_recog_vector_vector_shift_pattern: detected:\n");
40dfedc8 2158
2159 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2160 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
2161 pattern_stmt = gimple_build_assign_with_ops (rhs_code, var, oprnd0, def);
2162
6d8fb6cf 2163 if (dump_enabled_p ())
7bd765d4 2164 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
40dfedc8 2165
f1f41a6c 2166 stmts->safe_push (last_stmt);
40dfedc8 2167 return pattern_stmt;
2168}
2169
127cb1cd 2170/* Detect a signed division by a constant that wouldn't be
18937389 2171 otherwise vectorized:
2172
2173 type a_t, b_t;
2174
2175 S1 a_t = b_t / N;
2176
127cb1cd 2177 where type 'type' is an integral type and N is a constant.
18937389 2178
127cb1cd 2179 Similarly handle modulo by a constant:
18937389 2180
2181 S4 a_t = b_t % N;
2182
2183 Input/Output:
2184
2185 * STMTS: Contains a stmt from which the pattern search begins,
127cb1cd 2186 i.e. the division stmt. S1 is replaced by if N is a power
2187 of two constant and type is signed:
18937389 2188 S3 y_t = b_t < 0 ? N - 1 : 0;
2189 S2 x_t = b_t + y_t;
2190 S1' a_t = x_t >> log2 (N);
2191
127cb1cd 2192 S4 is replaced if N is a power of two constant and
2193 type is signed by (where *_T temporaries have unsigned type):
18937389 2194 S9 y_T = b_t < 0 ? -1U : 0U;
2195 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2196 S7 z_t = (type) z_T;
2197 S6 w_t = b_t + z_t;
2198 S5 x_t = w_t & (N - 1);
2199 S4' a_t = x_t - z_t;
2200
2201 Output:
2202
2203 * TYPE_IN: The type of the input arguments to the pattern.
2204
2205 * TYPE_OUT: The type of the output of this pattern.
2206
2207 * Return value: A new stmt that will be used to replace the division
2208 S1 or modulo S4 stmt. */
2209
2210static gimple
f1f41a6c 2211vect_recog_divmod_pattern (vec<gimple> *stmts,
127cb1cd 2212 tree *type_in, tree *type_out)
18937389 2213{
f1f41a6c 2214 gimple last_stmt = stmts->pop ();
3af51fe9 2215 tree oprnd0, oprnd1, vectype, itype, cond;
18937389 2216 gimple pattern_stmt, def_stmt;
2217 enum tree_code rhs_code;
2218 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2219 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
127cb1cd 2220 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
18937389 2221 optab optab;
ebf4f764 2222 tree q;
127cb1cd 2223 int dummy_int, prec;
127cb1cd 2224 stmt_vec_info def_stmt_vinfo;
18937389 2225
2226 if (!is_gimple_assign (last_stmt))
2227 return NULL;
2228
2229 rhs_code = gimple_assign_rhs_code (last_stmt);
2230 switch (rhs_code)
2231 {
2232 case TRUNC_DIV_EXPR:
2233 case TRUNC_MOD_EXPR:
2234 break;
2235 default:
2236 return NULL;
2237 }
2238
2239 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2240 return NULL;
2241
2242 oprnd0 = gimple_assign_rhs1 (last_stmt);
2243 oprnd1 = gimple_assign_rhs2 (last_stmt);
2244 itype = TREE_TYPE (oprnd0);
2245 if (TREE_CODE (oprnd0) != SSA_NAME
2246 || TREE_CODE (oprnd1) != INTEGER_CST
2247 || TREE_CODE (itype) != INTEGER_TYPE
127cb1cd 2248 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
18937389 2249 return NULL;
2250
2251 vectype = get_vectype_for_scalar_type (itype);
2252 if (vectype == NULL_TREE)
2253 return NULL;
2254
2255 /* If the target can handle vectorized division or modulo natively,
2256 don't attempt to optimize this. */
2257 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
6cdd383a 2258 if (optab != unknown_optab)
18937389 2259 {
3754d046 2260 machine_mode vec_mode = TYPE_MODE (vectype);
18937389 2261 int icode = (int) optab_handler (optab, vec_mode);
82e76116 2262 if (icode != CODE_FOR_nothing)
18937389 2263 return NULL;
2264 }
2265
127cb1cd 2266 prec = TYPE_PRECISION (itype);
2267 if (integer_pow2p (oprnd1))
18937389 2268 {
127cb1cd 2269 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2270 return NULL;
18937389 2271
127cb1cd 2272 /* Pattern detected. */
6d8fb6cf 2273 if (dump_enabled_p ())
b055bc88 2274 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2275 "vect_recog_divmod_pattern: detected:\n");
127cb1cd 2276
2277 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2278 build_int_cst (itype, 0));
2279 if (rhs_code == TRUNC_DIV_EXPR)
2280 {
2281 tree var = vect_recog_temp_ssa_var (itype, NULL);
2282 tree shift;
2283 def_stmt
446e85eb 2284 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2285 fold_build2 (MINUS_EXPR, itype,
2286 oprnd1,
2287 build_int_cst (itype,
2288 1)),
2289 build_int_cst (itype, 0));
127cb1cd 2290 new_pattern_def_seq (stmt_vinfo, def_stmt);
2291 var = vect_recog_temp_ssa_var (itype, NULL);
2292 def_stmt
2293 = gimple_build_assign_with_ops (PLUS_EXPR, var, oprnd0,
2294 gimple_assign_lhs (def_stmt));
2295 append_pattern_def_seq (stmt_vinfo, def_stmt);
2296
2297 shift = build_int_cst (itype, tree_log2 (oprnd1));
2298 pattern_stmt
2299 = gimple_build_assign_with_ops (RSHIFT_EXPR,
2300 vect_recog_temp_ssa_var (itype,
2301 NULL),
2302 var, shift);
2303 }
2304 else
2305 {
2306 tree signmask;
2307 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2308 if (compare_tree_int (oprnd1, 2) == 0)
2309 {
2310 signmask = vect_recog_temp_ssa_var (itype, NULL);
2311 def_stmt
446e85eb 2312 = gimple_build_assign_with_ops (COND_EXPR, signmask, cond,
2313 build_int_cst (itype, 1),
2314 build_int_cst (itype, 0));
127cb1cd 2315 append_pattern_def_seq (stmt_vinfo, def_stmt);
2316 }
2317 else
2318 {
2319 tree utype
2320 = build_nonstandard_integer_type (prec, 1);
2321 tree vecutype = get_vectype_for_scalar_type (utype);
2322 tree shift
2323 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2324 - tree_log2 (oprnd1));
2325 tree var = vect_recog_temp_ssa_var (utype, NULL);
2326
2327 def_stmt
446e85eb 2328 = gimple_build_assign_with_ops (COND_EXPR, var, cond,
2329 build_int_cst (utype, -1),
2330 build_int_cst (utype, 0));
127cb1cd 2331 def_stmt_vinfo
2332 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2333 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2334 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2335 append_pattern_def_seq (stmt_vinfo, def_stmt);
2336 var = vect_recog_temp_ssa_var (utype, NULL);
2337 def_stmt
2338 = gimple_build_assign_with_ops (RSHIFT_EXPR, var,
2339 gimple_assign_lhs (def_stmt),
2340 shift);
2341 def_stmt_vinfo
2342 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2343 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2344 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2345 append_pattern_def_seq (stmt_vinfo, def_stmt);
2346 signmask = vect_recog_temp_ssa_var (itype, NULL);
2347 def_stmt
806413d2 2348 = gimple_build_assign_with_ops (NOP_EXPR, signmask, var);
127cb1cd 2349 append_pattern_def_seq (stmt_vinfo, def_stmt);
2350 }
2351 def_stmt
2352 = gimple_build_assign_with_ops (PLUS_EXPR,
2353 vect_recog_temp_ssa_var (itype,
2354 NULL),
2355 oprnd0, signmask);
2356 append_pattern_def_seq (stmt_vinfo, def_stmt);
2357 def_stmt
2358 = gimple_build_assign_with_ops (BIT_AND_EXPR,
2359 vect_recog_temp_ssa_var (itype,
2360 NULL),
2361 gimple_assign_lhs (def_stmt),
2362 fold_build2 (MINUS_EXPR, itype,
2363 oprnd1,
2364 build_int_cst (itype,
2365 1)));
2366 append_pattern_def_seq (stmt_vinfo, def_stmt);
2367
2368 pattern_stmt
2369 = gimple_build_assign_with_ops (MINUS_EXPR,
2370 vect_recog_temp_ssa_var (itype,
2371 NULL),
2372 gimple_assign_lhs (def_stmt),
2373 signmask);
2374 }
2375
6d8fb6cf 2376 if (dump_enabled_p ())
7bd765d4 2377 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2378 0);
127cb1cd 2379
f1f41a6c 2380 stmts->safe_push (last_stmt);
127cb1cd 2381
2382 *type_in = vectype;
2383 *type_out = vectype;
2384 return pattern_stmt;
18937389 2385 }
127cb1cd 2386
20448fd9 2387 if (prec > HOST_BITS_PER_WIDE_INT
2388 || integer_zerop (oprnd1))
127cb1cd 2389 return NULL;
2390
ebf4f764 2391 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2392 return NULL;
127cb1cd 2393
2394 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2395
2396 if (TYPE_UNSIGNED (itype))
18937389 2397 {
127cb1cd 2398 unsigned HOST_WIDE_INT mh, ml;
2399 int pre_shift, post_shift;
f9ae6f95 2400 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2401 & GET_MODE_MASK (TYPE_MODE (itype)));
3af51fe9 2402 tree t1, t2, t3, t4;
127cb1cd 2403
2404 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2405 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2406 return NULL;
2407
2408 /* Find a suitable multiplier and right shift count
2409 instead of multiplying with D. */
2410 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2411
2412 /* If the suggested multiplier is more than SIZE bits, we can do better
2413 for even divisors, using an initial right shift. */
2414 if (mh != 0 && (d & 1) == 0)
18937389 2415 {
127cb1cd 2416 pre_shift = floor_log2 (d & -d);
2417 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2418 &ml, &post_shift, &dummy_int);
2419 gcc_assert (!mh);
2420 }
2421 else
2422 pre_shift = 0;
2423
2424 if (mh != 0)
2425 {
2426 if (post_shift - 1 >= prec)
2427 return NULL;
2428
3af51fe9 2429 /* t1 = oprnd0 h* ml;
2430 t2 = oprnd0 - t1;
2431 t3 = t2 >> 1;
2432 t4 = t1 + t3;
2433 q = t4 >> (post_shift - 1); */
2434 t1 = vect_recog_temp_ssa_var (itype, NULL);
18937389 2435 def_stmt
3af51fe9 2436 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
127cb1cd 2437 build_int_cst (itype, ml));
2438 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 2439
3af51fe9 2440 t2 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 2441 def_stmt
3af51fe9 2442 = gimple_build_assign_with_ops (MINUS_EXPR, t2, oprnd0, t1);
6d741312 2443 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 2444
2445 t3 = vect_recog_temp_ssa_var (itype, NULL);
2446 def_stmt
3af51fe9 2447 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
127cb1cd 2448 integer_one_node);
2449 append_pattern_def_seq (stmt_vinfo, def_stmt);
2450
3af51fe9 2451 t4 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 2452 def_stmt
3af51fe9 2453 = gimple_build_assign_with_ops (PLUS_EXPR, t4, t1, t3);
127cb1cd 2454
2455 if (post_shift != 1)
2456 {
2457 append_pattern_def_seq (stmt_vinfo, def_stmt);
2458
3af51fe9 2459 q = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 2460 pattern_stmt
3af51fe9 2461 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t4,
127cb1cd 2462 build_int_cst (itype,
2463 post_shift
2464 - 1));
2465 }
2466 else
2467 {
3af51fe9 2468 q = t4;
127cb1cd 2469 pattern_stmt = def_stmt;
2470 }
18937389 2471 }
2472 else
2473 {
127cb1cd 2474 if (pre_shift >= prec || post_shift >= prec)
2475 return NULL;
2476
2477 /* t1 = oprnd0 >> pre_shift;
3af51fe9 2478 t2 = t1 h* ml;
2479 q = t2 >> post_shift; */
127cb1cd 2480 if (pre_shift)
2481 {
2482 t1 = vect_recog_temp_ssa_var (itype, NULL);
2483 def_stmt
2484 = gimple_build_assign_with_ops (RSHIFT_EXPR, t1, oprnd0,
2485 build_int_cst (NULL,
2486 pre_shift));
2487 append_pattern_def_seq (stmt_vinfo, def_stmt);
2488 }
2489 else
2490 t1 = oprnd0;
18937389 2491
3af51fe9 2492 t2 = vect_recog_temp_ssa_var (itype, NULL);
18937389 2493 def_stmt
3af51fe9 2494 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t2, t1,
127cb1cd 2495 build_int_cst (itype, ml));
127cb1cd 2496
3af51fe9 2497 if (post_shift)
2498 {
2499 append_pattern_def_seq (stmt_vinfo, def_stmt);
127cb1cd 2500
3af51fe9 2501 q = vect_recog_temp_ssa_var (itype, NULL);
2502 def_stmt
2503 = gimple_build_assign_with_ops (RSHIFT_EXPR, q, t2,
2504 build_int_cst (itype,
2505 post_shift));
2506 }
2507 else
2508 q = t2;
2509
2510 pattern_stmt = def_stmt;
127cb1cd 2511 }
2512 }
2513 else
2514 {
2515 unsigned HOST_WIDE_INT ml;
60420e1c 2516 int post_shift;
f9ae6f95 2517 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
127cb1cd 2518 unsigned HOST_WIDE_INT abs_d;
2519 bool add = false;
3af51fe9 2520 tree t1, t2, t3, t4;
127cb1cd 2521
2522 /* Give up for -1. */
2523 if (d == -1)
2524 return NULL;
2525
127cb1cd 2526 /* Since d might be INT_MIN, we have to cast to
2527 unsigned HOST_WIDE_INT before negating to avoid
2528 undefined signed overflow. */
2529 abs_d = (d >= 0
2530 ? (unsigned HOST_WIDE_INT) d
2531 : - (unsigned HOST_WIDE_INT) d);
2532
2533 /* n rem d = n rem -d */
2534 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2535 {
2536 d = abs_d;
2537 oprnd1 = build_int_cst (itype, abs_d);
2538 }
2539 else if (HOST_BITS_PER_WIDE_INT >= prec
2540 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2541 /* This case is not handled correctly below. */
2542 return NULL;
2543
60420e1c 2544 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
127cb1cd 2545 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2546 {
2547 add = true;
2548 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2549 }
2550 if (post_shift >= prec)
2551 return NULL;
2552
de6af029 2553 /* t1 = oprnd0 h* ml; */
3af51fe9 2554 t1 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 2555 def_stmt
3af51fe9 2556 = gimple_build_assign_with_ops (MULT_HIGHPART_EXPR, t1, oprnd0,
127cb1cd 2557 build_int_cst (itype, ml));
127cb1cd 2558
2559 if (add)
2560 {
3af51fe9 2561 /* t2 = t1 + oprnd0; */
de6af029 2562 append_pattern_def_seq (stmt_vinfo, def_stmt);
3af51fe9 2563 t2 = vect_recog_temp_ssa_var (itype, NULL);
127cb1cd 2564 def_stmt
3af51fe9 2565 = gimple_build_assign_with_ops (PLUS_EXPR, t2, t1, oprnd0);
127cb1cd 2566 }
2567 else
3af51fe9 2568 t2 = t1;
127cb1cd 2569
3af51fe9 2570 if (post_shift)
127cb1cd 2571 {
3af51fe9 2572 /* t3 = t2 >> post_shift; */
de6af029 2573 append_pattern_def_seq (stmt_vinfo, def_stmt);
3af51fe9 2574 t3 = vect_recog_temp_ssa_var (itype, NULL);
18937389 2575 def_stmt
3af51fe9 2576 = gimple_build_assign_with_ops (RSHIFT_EXPR, t3, t2,
127cb1cd 2577 build_int_cst (itype, post_shift));
18937389 2578 }
127cb1cd 2579 else
3af51fe9 2580 t3 = t2;
127cb1cd 2581
9c1be15e 2582 wide_int oprnd0_min, oprnd0_max;
de6af029 2583 int msb = 1;
2584 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2585 {
9c1be15e 2586 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
de6af029 2587 msb = 0;
9c1be15e 2588 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
de6af029 2589 msb = -1;
2590 }
127cb1cd 2591
de6af029 2592 if (msb == 0 && d >= 0)
2593 {
2594 /* q = t3; */
2595 q = t3;
2596 pattern_stmt = def_stmt;
2597 }
2598 else
2599 {
2600 /* t4 = oprnd0 >> (prec - 1);
2601 or if we know from VRP that oprnd0 >= 0
2602 t4 = 0;
2603 or if we know from VRP that oprnd0 < 0
2604 t4 = -1; */
2605 append_pattern_def_seq (stmt_vinfo, def_stmt);
2606 t4 = vect_recog_temp_ssa_var (itype, NULL);
2607 if (msb != 1)
2608 def_stmt
2609 = gimple_build_assign_with_ops (INTEGER_CST,
806413d2 2610 t4, build_int_cst (itype, msb));
de6af029 2611 else
2612 def_stmt
2613 = gimple_build_assign_with_ops (RSHIFT_EXPR, t4, oprnd0,
2614 build_int_cst (itype, prec - 1));
2615 append_pattern_def_seq (stmt_vinfo, def_stmt);
2616
2617 /* q = t3 - t4; or q = t4 - t3; */
2618 q = vect_recog_temp_ssa_var (itype, NULL);
2619 pattern_stmt
2620 = gimple_build_assign_with_ops (MINUS_EXPR, q, d < 0 ? t4 : t3,
2621 d < 0 ? t3 : t4);
2622 }
127cb1cd 2623 }
2624
2625 if (rhs_code == TRUNC_MOD_EXPR)
2626 {
2627 tree r, t1;
2628
2629 /* We divided. Now finish by:
2630 t1 = q * oprnd1;
2631 r = oprnd0 - t1; */
2632 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2633
2634 t1 = vect_recog_temp_ssa_var (itype, NULL);
18937389 2635 def_stmt
127cb1cd 2636 = gimple_build_assign_with_ops (MULT_EXPR, t1, q, oprnd1);
6d741312 2637 append_pattern_def_seq (stmt_vinfo, def_stmt);
18937389 2638
127cb1cd 2639 r = vect_recog_temp_ssa_var (itype, NULL);
18937389 2640 pattern_stmt
127cb1cd 2641 = gimple_build_assign_with_ops (MINUS_EXPR, r, oprnd0, t1);
18937389 2642 }
2643
127cb1cd 2644 /* Pattern detected. */
6d8fb6cf 2645 if (dump_enabled_p ())
7bd765d4 2646 {
b055bc88 2647 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 2648 "vect_recog_divmod_pattern: detected: ");
b055bc88 2649 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
78bb46f5 2650 dump_printf (MSG_NOTE, "\n");
7bd765d4 2651 }
18937389 2652
f1f41a6c 2653 stmts->safe_push (last_stmt);
18937389 2654
2655 *type_in = vectype;
2656 *type_out = vectype;
2657 return pattern_stmt;
2658}
2659
84557284 2660/* Function vect_recog_mixed_size_cond_pattern
2661
2662 Try to find the following pattern:
2663
2664 type x_t, y_t;
2665 TYPE a_T, b_T, c_T;
2666 loop:
2667 S1 a_T = x_t CMP y_t ? b_T : c_T;
2668
2669 where type 'TYPE' is an integral type which has different size
087903db 2670 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
84557284 2671 than 'type', the constants need to fit into an integer type
087903db 2672 with the same width as 'type') or results of conversion from 'type'.
84557284 2673
2674 Input:
2675
2676 * LAST_STMT: A stmt from which the pattern search begins.
2677
2678 Output:
2679
2680 * TYPE_IN: The type of the input arguments to the pattern.
2681
2682 * TYPE_OUT: The type of the output of this pattern.
2683
2684 * Return value: A new stmt that will be used to replace the pattern.
2685 Additionally a def_stmt is added.
2686
2687 a_it = x_t CMP y_t ? b_it : c_it;
2688 a_T = (TYPE) a_it; */
2689
2690static gimple
f1f41a6c 2691vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
84557284 2692 tree *type_out)
2693{
f1f41a6c 2694 gimple last_stmt = (*stmts)[0];
84557284 2695 tree cond_expr, then_clause, else_clause;
2696 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
087903db 2697 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
3754d046 2698 machine_mode cmpmode;
84557284 2699 gimple pattern_stmt, def_stmt;
2700 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 2701 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
087903db 2702 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2703 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2704 bool promotion;
2705 tree comp_scalar_type;
84557284 2706
2707 if (!is_gimple_assign (last_stmt)
2708 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2709 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2710 return NULL;
2711
2712 cond_expr = gimple_assign_rhs1 (last_stmt);
2713 then_clause = gimple_assign_rhs2 (last_stmt);
2714 else_clause = gimple_assign_rhs3 (last_stmt);
2715
f39dd90c 2716 if (!COMPARISON_CLASS_P (cond_expr))
2717 return NULL;
2718
087903db 2719 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2720 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
f39dd90c 2721 if (comp_vectype == NULL_TREE)
84557284 2722 return NULL;
2723
2724 type = gimple_expr_type (last_stmt);
087903db 2725 if (types_compatible_p (type, comp_scalar_type)
2726 || ((TREE_CODE (then_clause) != INTEGER_CST
2727 || TREE_CODE (else_clause) != INTEGER_CST)
2728 && !INTEGRAL_TYPE_P (comp_scalar_type))
2729 || !INTEGRAL_TYPE_P (type))
2730 return NULL;
2731
2732 if ((TREE_CODE (then_clause) != INTEGER_CST
2733 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2734 &def_stmt0, &promotion))
2735 || (TREE_CODE (else_clause) != INTEGER_CST
2736 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2737 &def_stmt1, &promotion)))
2738 return NULL;
2739
2740 if (orig_type0 && orig_type1
2741 && !types_compatible_p (orig_type0, orig_type1))
2742 return NULL;
2743
2744 if (orig_type0)
2745 {
2746 if (!types_compatible_p (orig_type0, comp_scalar_type))
2747 return NULL;
2748 then_clause = gimple_assign_rhs1 (def_stmt0);
2749 itype = orig_type0;
2750 }
2751
2752 if (orig_type1)
2753 {
2754 if (!types_compatible_p (orig_type1, comp_scalar_type))
2755 return NULL;
2756 else_clause = gimple_assign_rhs1 (def_stmt1);
2757 itype = orig_type1;
2758 }
2759
84557284 2760 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2761
2762 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2763 return NULL;
2764
2765 vectype = get_vectype_for_scalar_type (type);
2766 if (vectype == NULL_TREE)
2767 return NULL;
2768
2769 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2770 return NULL;
2771
087903db 2772 if (itype == NULL_TREE)
2773 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2774 TYPE_UNSIGNED (type));
2775
84557284 2776 if (itype == NULL_TREE
2777 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2778 return NULL;
2779
2780 vecitype = get_vectype_for_scalar_type (itype);
2781 if (vecitype == NULL_TREE)
2782 return NULL;
2783
2784 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2785 return NULL;
2786
2787 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2788 {
087903db 2789 if ((TREE_CODE (then_clause) == INTEGER_CST
2790 && !int_fits_type_p (then_clause, itype))
2791 || (TREE_CODE (else_clause) == INTEGER_CST
2792 && !int_fits_type_p (else_clause, itype)))
84557284 2793 return NULL;
2794 }
2795
2796 def_stmt
446e85eb 2797 = gimple_build_assign_with_ops (COND_EXPR,
2798 vect_recog_temp_ssa_var (itype, NULL),
2799 unshare_expr (cond_expr),
2800 fold_convert (itype, then_clause),
2801 fold_convert (itype, else_clause));
84557284 2802 pattern_stmt
2803 = gimple_build_assign_with_ops (NOP_EXPR,
2804 vect_recog_temp_ssa_var (type, NULL),
806413d2 2805 gimple_assign_lhs (def_stmt));
84557284 2806
6d741312 2807 new_pattern_def_seq (stmt_vinfo, def_stmt);
4c0c783a 2808 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
84557284 2809 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2810 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2811 *type_in = vecitype;
2812 *type_out = vectype;
2813
6d8fb6cf 2814 if (dump_enabled_p ())
b055bc88 2815 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 2816 "vect_recog_mixed_size_cond_pattern: detected:\n");
4c0c783a 2817
84557284 2818 return pattern_stmt;
2819}
2820
2821
50f85e2e 2822/* Helper function of vect_recog_bool_pattern. Called recursively, return
2823 true if bool VAR can be optimized that way. */
2824
2825static bool
4c0c783a 2826check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
50f85e2e 2827{
2828 gimple def_stmt;
2829 enum vect_def_type dt;
2830 tree def, rhs1;
2831 enum tree_code rhs_code;
2832
4c0c783a 2833 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2834 &dt))
50f85e2e 2835 return false;
2836
2837 if (dt != vect_internal_def)
2838 return false;
2839
2840 if (!is_gimple_assign (def_stmt))
2841 return false;
2842
2843 if (!has_single_use (def))
2844 return false;
2845
2846 rhs1 = gimple_assign_rhs1 (def_stmt);
2847 rhs_code = gimple_assign_rhs_code (def_stmt);
2848 switch (rhs_code)
2849 {
2850 case SSA_NAME:
4c0c783a 2851 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
50f85e2e 2852
2853 CASE_CONVERT:
2854 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2855 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2856 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2857 return false;
4c0c783a 2858 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
50f85e2e 2859
2860 case BIT_NOT_EXPR:
4c0c783a 2861 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
50f85e2e 2862
2863 case BIT_AND_EXPR:
2864 case BIT_IOR_EXPR:
2865 case BIT_XOR_EXPR:
4c0c783a 2866 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
50f85e2e 2867 return false;
4c0c783a 2868 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2869 bb_vinfo);
50f85e2e 2870
2871 default:
2872 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2873 {
2874 tree vecitype, comp_vectype;
2875
3a542b98 2876 /* If the comparison can throw, then is_gimple_condexpr will be
2877 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2878 if (stmt_could_throw_p (def_stmt))
2879 return false;
2880
50f85e2e 2881 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2882 if (comp_vectype == NULL_TREE)
2883 return false;
2884
2885 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2886 {
3754d046 2887 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
50f85e2e 2888 tree itype
d6152abc 2889 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
50f85e2e 2890 vecitype = get_vectype_for_scalar_type (itype);
2891 if (vecitype == NULL_TREE)
2892 return false;
2893 }
2894 else
2895 vecitype = comp_vectype;
2896 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2897 }
2898 return false;
2899 }
2900}
2901
2902
2903/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2904 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
18937389 2905 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
50f85e2e 2906
2907static tree
2908adjust_bool_pattern_cast (tree type, tree var)
2909{
2910 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2911 gimple cast_stmt, pattern_stmt;
2912
18937389 2913 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
50f85e2e 2914 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
6d741312 2915 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
50f85e2e 2916 cast_stmt
2917 = gimple_build_assign_with_ops (NOP_EXPR,
2918 vect_recog_temp_ssa_var (type, NULL),
806413d2 2919 gimple_assign_lhs (pattern_stmt));
50f85e2e 2920 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2921 return gimple_assign_lhs (cast_stmt);
2922}
2923
2924
2925/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2926 recursively. VAR is an SSA_NAME that should be transformed from bool
2927 to a wider integer type, OUT_TYPE is the desired final integer type of
2928 the whole pattern, TRUEVAL should be NULL unless optimizing
2929 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2930 in the then_clause, STMTS is where statements with added pattern stmts
2931 should be pushed to. */
2932
2933static tree
2934adjust_bool_pattern (tree var, tree out_type, tree trueval,
f1f41a6c 2935 vec<gimple> *stmts)
50f85e2e 2936{
2937 gimple stmt = SSA_NAME_DEF_STMT (var);
2938 enum tree_code rhs_code, def_rhs_code;
2939 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2940 location_t loc;
2941 gimple pattern_stmt, def_stmt;
2942
2943 rhs1 = gimple_assign_rhs1 (stmt);
2944 rhs2 = gimple_assign_rhs2 (stmt);
2945 rhs_code = gimple_assign_rhs_code (stmt);
2946 loc = gimple_location (stmt);
2947 switch (rhs_code)
2948 {
2949 case SSA_NAME:
2950 CASE_CONVERT:
2951 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2952 itype = TREE_TYPE (irhs1);
2953 pattern_stmt
2954 = gimple_build_assign_with_ops (SSA_NAME,
2955 vect_recog_temp_ssa_var (itype, NULL),
806413d2 2956 irhs1);
50f85e2e 2957 break;
2958
2959 case BIT_NOT_EXPR:
2960 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2961 itype = TREE_TYPE (irhs1);
2962 pattern_stmt
2963 = gimple_build_assign_with_ops (BIT_XOR_EXPR,
2964 vect_recog_temp_ssa_var (itype, NULL),
2965 irhs1, build_int_cst (itype, 1));
2966 break;
2967
2968 case BIT_AND_EXPR:
2969 /* Try to optimize x = y & (a < b ? 1 : 0); into
2970 x = (a < b ? y : 0);
2971
2972 E.g. for:
2973 bool a_b, b_b, c_b;
2974 TYPE d_T;
2975
2976 S1 a_b = x1 CMP1 y1;
2977 S2 b_b = x2 CMP2 y2;
2978 S3 c_b = a_b & b_b;
2979 S4 d_T = (TYPE) c_b;
2980
2981 we would normally emit:
2982
2983 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2984 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2985 S3' c_T = a_T & b_T;
2986 S4' d_T = c_T;
2987
2988 but we can save one stmt by using the
2989 result of one of the COND_EXPRs in the other COND_EXPR and leave
2990 BIT_AND_EXPR stmt out:
2991
2992 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2993 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2994 S4' f_T = c_T;
2995
2996 At least when VEC_COND_EXPR is implemented using masks
2997 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2998 computes the comparison masks and ands it, in one case with
2999 all ones vector, in the other case with a vector register.
3000 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3001 often more expensive. */
3002 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3003 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3004 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3005 {
3006 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3007 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3008 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3009 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3010 {
3011 gimple tstmt;
3012 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3013 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
f1f41a6c 3014 tstmt = stmts->pop ();
50f85e2e 3015 gcc_assert (tstmt == def_stmt);
f1f41a6c 3016 stmts->quick_push (stmt);
50f85e2e 3017 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3018 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
18937389 3019 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
50f85e2e 3020 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3021 return irhs2;
3022 }
3023 else
3024 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3025 goto and_ior_xor;
3026 }
3027 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3028 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3029 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3030 {
3031 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3032 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3033 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3034 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3035 {
3036 gimple tstmt;
3037 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3038 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
f1f41a6c 3039 tstmt = stmts->pop ();
50f85e2e 3040 gcc_assert (tstmt == def_stmt);
f1f41a6c 3041 stmts->quick_push (stmt);
50f85e2e 3042 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3043 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
18937389 3044 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
50f85e2e 3045 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3046 return irhs1;
3047 }
3048 else
3049 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3050 goto and_ior_xor;
3051 }
3052 /* FALLTHRU */
3053 case BIT_IOR_EXPR:
3054 case BIT_XOR_EXPR:
3055 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3056 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3057 and_ior_xor:
3058 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3059 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3060 {
3061 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3062 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3063 int out_prec = TYPE_PRECISION (out_type);
3064 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3065 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3066 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3067 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3068 else
3069 {
3070 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3071 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3072 }
3073 }
3074 itype = TREE_TYPE (irhs1);
3075 pattern_stmt
3076 = gimple_build_assign_with_ops (rhs_code,
3077 vect_recog_temp_ssa_var (itype, NULL),
3078 irhs1, irhs2);
3079 break;
3080
3081 default:
3082 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3083 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
78fb8a4f 3084 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3085 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3086 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
50f85e2e 3087 {
3754d046 3088 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
50f85e2e 3089 itype
d6152abc 3090 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
50f85e2e 3091 }
3092 else
3093 itype = TREE_TYPE (rhs1);
3094 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3095 if (trueval == NULL_TREE)
3096 trueval = build_int_cst (itype, 1);
3097 else
3098 gcc_checking_assert (useless_type_conversion_p (itype,
3099 TREE_TYPE (trueval)));
3100 pattern_stmt
446e85eb 3101 = gimple_build_assign_with_ops (COND_EXPR,
3102 vect_recog_temp_ssa_var (itype, NULL),
3103 cond_expr, trueval,
3104 build_int_cst (itype, 0));
50f85e2e 3105 break;
3106 }
3107
f1f41a6c 3108 stmts->safe_push (stmt);
50f85e2e 3109 gimple_set_location (pattern_stmt, loc);
3110 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3111 return gimple_assign_lhs (pattern_stmt);
3112}
3113
3114
3115/* Function vect_recog_bool_pattern
3116
3117 Try to find pattern like following:
3118
3119 bool a_b, b_b, c_b, d_b, e_b;
3120 TYPE f_T;
3121 loop:
3122 S1 a_b = x1 CMP1 y1;
3123 S2 b_b = x2 CMP2 y2;
3124 S3 c_b = a_b & b_b;
3125 S4 d_b = x3 CMP3 y3;
3126 S5 e_b = c_b | d_b;
3127 S6 f_T = (TYPE) e_b;
3128
60711c82 3129 where type 'TYPE' is an integral type. Or a similar pattern
3130 ending in
3131
3132 S6 f_Y = e_b ? r_Y : s_Y;
3133
3134 as results from if-conversion of a complex condition.
50f85e2e 3135
3136 Input:
3137
3138 * LAST_STMT: A stmt at the end from which the pattern
3139 search begins, i.e. cast of a bool to
3140 an integer type.
3141
3142 Output:
3143
3144 * TYPE_IN: The type of the input arguments to the pattern.
3145
3146 * TYPE_OUT: The type of the output of this pattern.
3147
3148 * Return value: A new stmt that will be used to replace the pattern.
3149
3150 Assuming size of TYPE is the same as size of all comparisons
3151 (otherwise some casts would be added where needed), the above
3152 sequence we create related pattern stmts:
3153 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3154 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3155 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3156 S5' e_T = c_T | d_T;
3157 S6' f_T = e_T;
3158
3159 Instead of the above S3' we could emit:
3160 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3161 S3' c_T = a_T | b_T;
3162 but the above is more efficient. */
3163
3164static gimple
f1f41a6c 3165vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
50f85e2e 3166 tree *type_out)
3167{
f1f41a6c 3168 gimple last_stmt = stmts->pop ();
50f85e2e 3169 enum tree_code rhs_code;
3170 tree var, lhs, rhs, vectype;
3171 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3172 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
4c0c783a 3173 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
50f85e2e 3174 gimple pattern_stmt;
3175
3176 if (!is_gimple_assign (last_stmt))
3177 return NULL;
3178
3179 var = gimple_assign_rhs1 (last_stmt);
3180 lhs = gimple_assign_lhs (last_stmt);
3181
3182 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3183 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3184 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3185 return NULL;
3186
3187 rhs_code = gimple_assign_rhs_code (last_stmt);
3188 if (CONVERT_EXPR_CODE_P (rhs_code))
3189 {
3b515af5 3190 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3191 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
50f85e2e 3192 return NULL;
3193 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3194 if (vectype == NULL_TREE)
3195 return NULL;
3196
4c0c783a 3197 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
50f85e2e 3198 return NULL;
3199
3200 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3201 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3202 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3203 pattern_stmt
806413d2 3204 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs);
50f85e2e 3205 else
3206 pattern_stmt
806413d2 3207 = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs);
50f85e2e 3208 *type_out = vectype;
3209 *type_in = vectype;
f1f41a6c 3210 stmts->safe_push (last_stmt);
6d8fb6cf 3211 if (dump_enabled_p ())
b055bc88 3212 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 3213 "vect_recog_bool_pattern: detected:\n");
4c0c783a 3214
60711c82 3215 return pattern_stmt;
3216 }
3217 else if (rhs_code == COND_EXPR
3218 && TREE_CODE (var) == SSA_NAME)
3219 {
3220 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3221 if (vectype == NULL_TREE)
3222 return NULL;
3223
3224 /* Build a scalar type for the boolean result that when
3225 vectorized matches the vector type of the result in
3226 size and number of elements. */
3227 unsigned prec
3228 = wi::udiv_trunc (TYPE_SIZE (vectype),
3229 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3230 tree type
3231 = build_nonstandard_integer_type (prec,
3232 TYPE_UNSIGNED (TREE_TYPE (var)));
3233 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3234 return NULL;
3235
3236 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3237 return NULL;
3238
3239 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3240 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3241 pattern_stmt
3242 = gimple_build_assign_with_ops (COND_EXPR, lhs,
3243 build2 (NE_EXPR, boolean_type_node,
3244 rhs, build_int_cst (type, 0)),
3245 gimple_assign_rhs2 (last_stmt),
3246 gimple_assign_rhs3 (last_stmt));
3247 *type_out = vectype;
3248 *type_in = vectype;
3249 stmts->safe_push (last_stmt);
3250 if (dump_enabled_p ())
3251 dump_printf_loc (MSG_NOTE, vect_location,
3252 "vect_recog_bool_pattern: detected:\n");
3253
50f85e2e 3254 return pattern_stmt;
3255 }
d6152abc 3256 else if (rhs_code == SSA_NAME
3257 && STMT_VINFO_DATA_REF (stmt_vinfo))
3258 {
3259 stmt_vec_info pattern_stmt_info;
3260 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3261 gcc_assert (vectype != NULL_TREE);
16c8a484 3262 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3263 return NULL;
4c0c783a 3264 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
d6152abc 3265 return NULL;
3266
3267 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3268 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3269 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3270 {
3271 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3272 gimple cast_stmt
806413d2 3273 = gimple_build_assign_with_ops (NOP_EXPR, rhs2, rhs);
6d741312 3274 new_pattern_def_seq (stmt_vinfo, cast_stmt);
d6152abc 3275 rhs = rhs2;
3276 }
3277 pattern_stmt
806413d2 3278 = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs);
4c0c783a 3279 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3280 bb_vinfo);
d6152abc 3281 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3282 STMT_VINFO_DATA_REF (pattern_stmt_info)
3283 = STMT_VINFO_DATA_REF (stmt_vinfo);
3284 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3285 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3286 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3287 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3288 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3289 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3290 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3291 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3b515af5 3292 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
d6152abc 3293 *type_out = vectype;
3294 *type_in = vectype;
f1f41a6c 3295 stmts->safe_push (last_stmt);
6d8fb6cf 3296 if (dump_enabled_p ())
b055bc88 3297 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 3298 "vect_recog_bool_pattern: detected:\n");
d6152abc 3299 return pattern_stmt;
3300 }
50f85e2e 3301 else
3302 return NULL;
3303}
3304
3305
45eea33f 3306/* Mark statements that are involved in a pattern. */
3307
3308static inline void
3309vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3310 tree pattern_vectype)
3311{
3312 stmt_vec_info pattern_stmt_info, def_stmt_info;
3313 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
4c0c783a 3315 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
45eea33f 3316 gimple def_stmt;
3317
45eea33f 3318 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
d6152abc 3319 if (pattern_stmt_info == NULL)
3320 {
4c0c783a 3321 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3322 bb_vinfo);
d6152abc 3323 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3324 }
3325 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
45eea33f 3326
3327 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3328 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
d6152abc 3329 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
45eea33f 3330 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3331 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3332 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
18937389 3333 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3334 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3335 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
45eea33f 3336 {
18937389 3337 gimple_stmt_iterator si;
3338 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3339 !gsi_end_p (si); gsi_next (&si))
84557284 3340 {
18937389 3341 def_stmt = gsi_stmt (si);
3342 def_stmt_info = vinfo_for_stmt (def_stmt);
3343 if (def_stmt_info == NULL)
3344 {
4c0c783a 3345 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3346 bb_vinfo);
18937389 3347 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3348 }
3349 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3350 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
a16d0316 3351 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
18937389 3352 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3353 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
84557284 3354 }
45eea33f 3355 }
3356}
3357
48e1416a 3358/* Function vect_pattern_recog_1
4a61a337 3359
3360 Input:
3361 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3362 computation pattern.
3363 STMT: A stmt from which the pattern search should start.
3364
3365 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
48e1416a 3366 expression that computes the same functionality and can be used to
3367 replace the sequence of stmts that are involved in the pattern.
4a61a337 3368
3369 Output:
48e1416a 3370 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3371 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3372 relevant vector type. If 'TYPE_IN' is already a vector type, then this
4a61a337 3373 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3374 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3375 to the available target pattern.
3376
48e1416a 3377 This function also does some bookkeeping, as explained in the documentation
4a61a337 3378 for vect_recog_pattern. */
3379
3380static void
32497628 3381vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3382 gimple_stmt_iterator si,
f1f41a6c 3383 vec<gimple> *stmts_to_replace)
4a61a337 3384{
75a70cf9 3385 gimple stmt = gsi_stmt (si), pattern_stmt;
0eee81bc 3386 stmt_vec_info stmt_info;
0eee81bc 3387 loop_vec_info loop_vinfo;
4a61a337 3388 tree pattern_vectype;
3389 tree type_in, type_out;
4a61a337 3390 enum tree_code code;
eefa05c8 3391 int i;
3392 gimple next;
4a61a337 3393
f1f41a6c 3394 stmts_to_replace->truncate (0);
3395 stmts_to_replace->quick_push (stmt);
929a4812 3396 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
75a70cf9 3397 if (!pattern_stmt)
48e1416a 3398 return;
3399
f1f41a6c 3400 stmt = stmts_to_replace->last ();
0eee81bc 3401 stmt_info = vinfo_for_stmt (stmt);
3402 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3403
48e1416a 3404 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3405 {
3406 /* No need to check target support (already checked by the pattern
3407 recognition function). */
b334cbba 3408 pattern_vectype = type_out ? type_out : type_in;
4a61a337 3409 }
3410 else
3411 {
3754d046 3412 machine_mode vec_mode;
4a61a337 3413 enum insn_code icode;
3414 optab optab;
3415
3416 /* Check target support */
b334cbba 3417 type_in = get_vectype_for_scalar_type (type_in);
3418 if (!type_in)
3419 return;
3420 if (type_out)
3421 type_out = get_vectype_for_scalar_type (type_out);
3422 else
3423 type_out = type_in;
ed67497f 3424 if (!type_out)
3425 return;
b334cbba 3426 pattern_vectype = type_out;
f031fa03 3427
75a70cf9 3428 if (is_gimple_assign (pattern_stmt))
3429 code = gimple_assign_rhs_code (pattern_stmt);
3430 else
3431 {
3432 gcc_assert (is_gimple_call (pattern_stmt));
3433 code = CALL_EXPR;
3434 }
3435
b334cbba 3436 optab = optab_for_tree_code (code, type_in, optab_default);
3437 vec_mode = TYPE_MODE (type_in);
4a61a337 3438 if (!optab
d6bf3b14 3439 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b334cbba 3440 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
4a61a337 3441 return;
3442 }
3443
3444 /* Found a vectorizable pattern. */
6d8fb6cf 3445 if (dump_enabled_p ())
4a61a337 3446 {
b055bc88 3447 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 3448 "pattern recognized: ");
b055bc88 3449 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
78bb46f5 3450 dump_printf (MSG_NOTE, "\n");
4a61a337 3451 }
48e1416a 3452
75a70cf9 3453 /* Mark the stmts that are involved in the pattern. */
45eea33f 3454 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
4a61a337 3455
eefa05c8 3456 /* Patterns cannot be vectorized using SLP, because they change the order of
3457 computation. */
4c0c783a 3458 if (loop_vinfo)
f1f41a6c 3459 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
4c0c783a 3460 if (next == stmt)
f1f41a6c 3461 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
0187b74e 3462
45eea33f 3463 /* It is possible that additional pattern stmts are created and inserted in
3464 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3465 relevant statements. */
f1f41a6c 3466 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3467 && (unsigned) i < (stmts_to_replace->length () - 1);
0187b74e 3468 i++)
3469 {
3470 stmt_info = vinfo_for_stmt (stmt);
3471 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
6d8fb6cf 3472 if (dump_enabled_p ())
0187b74e 3473 {
b055bc88 3474 dump_printf_loc (MSG_NOTE, vect_location,
7bd765d4 3475 "additional pattern stmt: ");
b055bc88 3476 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
78bb46f5 3477 dump_printf (MSG_NOTE, "\n");
0187b74e 3478 }
3479
45eea33f 3480 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
0187b74e 3481 }
4a61a337 3482}
3483
3484
3485/* Function vect_pattern_recog
3486
3487 Input:
3488 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3489 computation idioms.
3490
cfdcf183 3491 Output - for each computation idiom that is detected we create a new stmt
3492 that provides the same functionality and that can be vectorized. We
4a61a337 3493 also record some information in the struct_stmt_info of the relevant
3494 stmts, as explained below:
3495
3496 At the entry to this function we have the following stmts, with the
3497 following initial value in the STMT_VINFO fields:
3498
3499 stmt in_pattern_p related_stmt vec_stmt
3500 S1: a_i = .... - - -
3501 S2: a_2 = ..use(a_i).. - - -
3502 S3: a_1 = ..use(a_2).. - - -
3503 S4: a_0 = ..use(a_1).. - - -
3504 S5: ... = ..use(a_0).. - - -
3505
3506 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
cfdcf183 3507 represented by a single stmt. We then:
3508 - create a new stmt S6 equivalent to the pattern (the stmt is not
3509 inserted into the code)
4a61a337 3510 - fill in the STMT_VINFO fields as follows:
3511
3512 in_pattern_p related_stmt vec_stmt
48e1416a 3513 S1: a_i = .... - - -
4a61a337 3514 S2: a_2 = ..use(a_i).. - - -
3515 S3: a_1 = ..use(a_2).. - - -
4a61a337 3516 S4: a_0 = ..use(a_1).. true S6 -
cfdcf183 3517 '---> S6: a_new = .... - S4 -
4a61a337 3518 S5: ... = ..use(a_0).. - - -
3519
3520 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
cfdcf183 3521 to each other through the RELATED_STMT field).
4a61a337 3522
3523 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3524 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3525 remain irrelevant unless used by stmts other than S4.
3526
3527 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
cfdcf183 3528 (because they are marked as irrelevant). It will vectorize S6, and record
8bf58742 3529 a pointer to the new vector stmt VS6 from S6 (as usual).
3530 S4 will be skipped, and S5 will be vectorized as usual:
4a61a337 3531
3532 in_pattern_p related_stmt vec_stmt
3533 S1: a_i = .... - - -
3534 S2: a_2 = ..use(a_i).. - - -
3535 S3: a_1 = ..use(a_2).. - - -
3536 > VS6: va_new = .... - - -
4a61a337 3537 S4: a_0 = ..use(a_1).. true S6 VS6
cfdcf183 3538 '---> S6: a_new = .... - S4 VS6
4a61a337 3539 > VS5: ... = ..vuse(va_new).. - - -
3540 S5: ... = ..use(a_0).. - - -
3541
cfdcf183 3542 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
4a61a337 3543 elsewhere), and we'll end up with:
3544
48e1416a 3545 VS6: va_new = ....
8bf58742 3546 VS5: ... = ..vuse(va_new)..
3547
3548 In case of more than one pattern statements, e.g., widen-mult with
3549 intermediate type:
3550
3551 S1 a_t = ;
3552 S2 a_T = (TYPE) a_t;
3553 '--> S3: a_it = (interm_type) a_t;
3554 S4 prod_T = a_T * CONST;
3555 '--> S5: prod_T' = a_it w* CONST;
3556
3557 there may be other users of a_T outside the pattern. In that case S2 will
3558 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3559 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3560 be recorded in S3. */
4a61a337 3561
3562void
4c0c783a 3563vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
4a61a337 3564{
4c0c783a 3565 struct loop *loop;
eca8fccf 3566 basic_block *bbs;
4c0c783a 3567 unsigned int nbbs;
75a70cf9 3568 gimple_stmt_iterator si;
4a61a337 3569 unsigned int i, j;
32497628 3570 vect_recog_func_ptr vect_recog_func;
4997014d 3571 auto_vec<gimple, 1> stmts_to_replace;
4c0c783a 3572 gimple stmt;
4a61a337 3573
6d8fb6cf 3574 if (dump_enabled_p ())
7bd765d4 3575 dump_printf_loc (MSG_NOTE, vect_location,
78bb46f5 3576 "=== vect_pattern_recog ===\n");
4a61a337 3577
4c0c783a 3578 if (loop_vinfo)
3579 {
3580 loop = LOOP_VINFO_LOOP (loop_vinfo);
3581 bbs = LOOP_VINFO_BBS (loop_vinfo);
3582 nbbs = loop->num_nodes;
3583 }
3584 else
3585 {
eca8fccf 3586 bbs = &BB_VINFO_BB (bb_vinfo);
4c0c783a 3587 nbbs = 1;
4c0c783a 3588 }
3589
4a61a337 3590 /* Scan through the loop stmts, applying the pattern recognition
3591 functions starting at each stmt visited: */
3592 for (i = 0; i < nbbs; i++)
3593 {
3594 basic_block bb = bbs[i];
75a70cf9 3595 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
4a61a337 3596 {
4c0c783a 3597 if (bb_vinfo && (stmt = gsi_stmt (si))
3598 && vinfo_for_stmt (stmt)
3599 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3600 continue;
3601
4a61a337 3602 /* Scan over all generic vect_recog_xxx_pattern functions. */
3603 for (j = 0; j < NUM_PATTERNS; j++)
3604 {
32497628 3605 vect_recog_func = vect_vect_recog_func_ptrs[j];
3606 vect_pattern_recog_1 (vect_recog_func, si,
929a4812 3607 &stmts_to_replace);
4a61a337 3608 }
3609 }
3610 }
3611}