]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
Daily bump.
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
24#include "tm.h"
40e23961
MC
25#include "alias.h"
26#include "symtab.h"
20f06221 27#include "tree.h"
40e23961 28#include "fold-const.h"
d8a2d370 29#include "stor-layout.h"
20f06221 30#include "target.h"
60393bbc 31#include "predict.h"
60393bbc 32#include "hard-reg-set.h"
60393bbc
AM
33#include "function.h"
34#include "dominance.h"
20f06221 35#include "basic-block.h"
cf835838 36#include "gimple-pretty-print.h"
2fb9a547
AM
37#include "tree-ssa-alias.h"
38#include "internal-fn.h"
39#include "tree-eh.h"
40#include "gimple-expr.h"
18f429e2 41#include "gimple.h"
45b0be94 42#include "gimplify.h"
5be5c238 43#include "gimple-iterator.h"
442b4905
AM
44#include "gimple-ssa.h"
45#include "tree-phinodes.h"
46#include "ssa-iterators.h"
d8a2d370 47#include "stringpool.h"
442b4905 48#include "tree-ssanames.h"
20f06221 49#include "cfgloop.h"
36566b39
PK
50#include "rtl.h"
51#include "flags.h"
36566b39
PK
52#include "insn-config.h"
53#include "expmed.h"
54#include "dojump.h"
55#include "explow.h"
56#include "calls.h"
57#include "emit-rtl.h"
58#include "varasm.h"
59#include "stmt.h"
20f06221 60#include "expr.h"
b0710fe1 61#include "insn-codes.h"
20f06221
DN
62#include "optabs.h"
63#include "params.h"
64#include "tree-data-ref.h"
65#include "tree-vectorizer.h"
7ee2468b 66#include "recog.h" /* FIXME: for insn_data */
718f9c0f 67#include "diagnostic-core.h"
7ee2468b 68#include "dumpfile.h"
9b2b7279 69#include "builtins.h"
20f06221 70
20f06221 71/* Pattern recognition functions */
9771b263 72static gimple vect_recog_widen_sum_pattern (vec<gimple> *, tree *,
51312233 73 tree *);
9771b263 74static gimple vect_recog_widen_mult_pattern (vec<gimple> *, tree *,
51312233 75 tree *);
9771b263 76static gimple vect_recog_dot_prod_pattern (vec<gimple> *, tree *,
51312233 77 tree *);
79d652a5
CH
78static gimple vect_recog_sad_pattern (vec<gimple> *, tree *,
79 tree *);
9771b263
DN
80static gimple vect_recog_pow_pattern (vec<gimple> *, tree *, tree *);
81static gimple vect_recog_over_widening_pattern (vec<gimple> *, tree *,
1107f3ae 82 tree *);
9771b263 83static gimple vect_recog_widen_shift_pattern (vec<gimple> *,
36ba4aae 84 tree *, tree *);
7e9a3abb 85static gimple vect_recog_rotate_pattern (vec<gimple> *, tree *, tree *);
9771b263 86static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
732a0ad3 87 tree *, tree *);
9771b263 88static gimple vect_recog_divmod_pattern (vec<gimple> *,
079c527f 89 tree *, tree *);
9771b263 90static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
69d2aade 91 tree *, tree *);
9771b263 92static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
20f06221
DN
93static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
94 vect_recog_widen_mult_pattern,
95 vect_recog_widen_sum_pattern,
0b2229b0 96 vect_recog_dot_prod_pattern,
79d652a5 97 vect_recog_sad_pattern,
1107f3ae 98 vect_recog_pow_pattern,
36ba4aae 99 vect_recog_widen_shift_pattern,
33018845 100 vect_recog_over_widening_pattern,
7e9a3abb 101 vect_recog_rotate_pattern,
732a0ad3 102 vect_recog_vector_vector_shift_pattern,
079c527f 103 vect_recog_divmod_pattern,
71c92d17
JJ
104 vect_recog_mixed_size_cond_pattern,
105 vect_recog_bool_pattern};
20f06221 106
083481d8
JJ
107static inline void
108append_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
109{
a1a6c5b2
JJ
110 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
111 stmt);
083481d8
JJ
112}
113
114static inline void
115new_pattern_def_seq (stmt_vec_info stmt_info, gimple stmt)
116{
117 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
118 append_pattern_def_seq (stmt_info, stmt);
119}
120
f71cf56a
UW
121/* Check whether STMT2 is in the same loop or basic block as STMT1.
122 Which of the two applies depends on whether we're currently doing
123 loop-based or basic-block-based vectorization, as determined by
124 the vinfo_for_stmt for STMT1 (which must be defined).
125
126 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
127 to be defined as well. */
128
129static bool
130vect_same_loop_or_bb_p (gimple stmt1, gimple stmt2)
131{
132 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
133 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
134 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
135
136 if (!gimple_bb (stmt2))
137 return false;
138
139 if (loop_vinfo)
140 {
141 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
142 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt2)))
143 return false;
144 }
145 else
146 {
147 if (gimple_bb (stmt2) != BB_VINFO_BB (bb_vinfo)
148 || gimple_code (stmt2) == GIMPLE_PHI)
149 return false;
150 }
151
152 gcc_assert (vinfo_for_stmt (stmt2));
153 return true;
154}
155
9a7a4398
UW
156/* If the LHS of DEF_STMT has a single use, and that statement is
157 in the same loop or basic block, return it. */
158
159static gimple
160vect_single_imm_use (gimple def_stmt)
161{
162 tree lhs = gimple_assign_lhs (def_stmt);
163 use_operand_p use_p;
164 gimple use_stmt;
165
166 if (!single_imm_use (lhs, &use_p, &use_stmt))
167 return NULL;
168
169 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
170 return NULL;
171
172 return use_stmt;
173}
174
bc4fb355 175/* Check whether NAME, an ssa-name used in USE_STMT,
79d652a5 176 is a result of a type promotion, such that:
20f06221 177 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
178 If CHECK_SIGN is TRUE, check that either both types are signed or both are
179 unsigned. */
20f06221
DN
180
181static bool
bc4fb355
IR
182type_conversion_p (tree name, gimple use_stmt, bool check_sign,
183 tree *orig_type, gimple *def_stmt, bool *promotion)
20f06221
DN
184{
185 tree dummy;
726a989a 186 gimple dummy_gimple;
20f06221
DN
187 loop_vec_info loop_vinfo;
188 stmt_vec_info stmt_vinfo;
20f06221
DN
189 tree type = TREE_TYPE (name);
190 tree oprnd0;
191 enum vect_def_type dt;
192 tree def;
f5709183 193 bb_vec_info bb_vinfo;
20f06221
DN
194
195 stmt_vinfo = vinfo_for_stmt (use_stmt);
196 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183
IR
197 bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
198 if (!vect_is_simple_use (name, use_stmt, loop_vinfo, bb_vinfo, def_stmt,
199 &def, &dt))
20f06221
DN
200 return false;
201
8644a673
IR
202 if (dt != vect_internal_def
203 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
204 return false;
205
bc4fb355 206 if (!*def_stmt)
20f06221
DN
207 return false;
208
726a989a 209 if (!is_gimple_assign (*def_stmt))
20f06221
DN
210 return false;
211
bc4fb355 212 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
213 return false;
214
726a989a 215 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 216
bc4fb355
IR
217 *orig_type = TREE_TYPE (oprnd0);
218 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
219 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
220 return false;
221
222 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
223 *promotion = true;
bc4fb355 224 else
79d652a5 225 *promotion = false;
20f06221 226
24ee1384 227 if (!vect_is_simple_use (oprnd0, *def_stmt, loop_vinfo,
f5709183 228 bb_vinfo, &dummy_gimple, &dummy, &dt))
20f06221
DN
229 return false;
230
20f06221
DN
231 return true;
232}
233
726a989a
RB
234/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
235 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
236
237static tree
238vect_recog_temp_ssa_var (tree type, gimple stmt)
239{
83d5977e 240 return make_temp_ssa_name (type, stmt, "patt");
726a989a 241}
20f06221
DN
242
243/* Function vect_recog_dot_prod_pattern
244
245 Try to find the following pattern:
246
247 type x_t, y_t;
248 TYPE1 prod;
249 TYPE2 sum = init;
250 loop:
251 sum_0 = phi <init, sum_1>
252 S1 x_t = ...
253 S2 y_t = ...
254 S3 x_T = (TYPE1) x_t;
255 S4 y_T = (TYPE1) y_t;
256 S5 prod = x_T * y_T;
257 [S6 prod = (TYPE2) prod; #optional]
258 S7 sum_1 = prod + sum_0;
259
b8698a0f
L
260 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
261 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 262 computation.
b8698a0f 263
20f06221
DN
264 Input:
265
51312233
IR
266 * STMTS: Contains a stmt from which the pattern search begins. In the
267 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
268 will be detected.
20f06221
DN
269
270 Output:
271
272 * TYPE_IN: The type of the input arguments to the pattern.
273
274 * TYPE_OUT: The type of the output of this pattern.
275
276 * Return value: A new stmt that will be used to replace the sequence of
277 stmts that constitute the pattern. In this case it will be:
278 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
279
280 Note: The dot-prod idiom is a widening reduction pattern that is
281 vectorized without preserving all the intermediate results. It
282 produces only N/2 (widened) results (by summing up pairs of
283 intermediate results) rather than all N results. Therefore, we
284 cannot allow this pattern when we want to get all the results and in
285 the correct order (as is the case when this computation is in an
286 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 287
726a989a 288static gimple
9771b263 289vect_recog_dot_prod_pattern (vec<gimple> *stmts, tree *type_in,
51312233 290 tree *type_out)
20f06221 291{
9771b263 292 gimple stmt, last_stmt = (*stmts)[0];
20f06221
DN
293 tree oprnd0, oprnd1;
294 tree oprnd00, oprnd01;
51312233 295 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 296 tree type, half_type;
726a989a 297 gimple pattern_stmt;
20f06221 298 tree prod_type;
d29de1bf 299 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 300 struct loop *loop;
f471fe72 301 tree var;
bc4fb355 302 bool promotion;
20f06221 303
f5709183
IR
304 if (!loop_info)
305 return NULL;
306
307 loop = LOOP_VINFO_LOOP (loop_info);
308
328dc477
RB
309 /* We don't allow changing the order of the computation in the inner-loop
310 when doing outer-loop vectorization. */
311 if (loop && nested_in_vect_loop_p (loop, last_stmt))
312 return NULL;
313
51312233 314 if (!is_gimple_assign (last_stmt))
20f06221
DN
315 return NULL;
316
51312233 317 type = gimple_expr_type (last_stmt);
20f06221 318
b8698a0f 319 /* Look for the following pattern
20f06221
DN
320 DX = (TYPE1) X;
321 DY = (TYPE1) Y;
b8698a0f 322 DPROD = DX * DY;
20f06221
DN
323 DDPROD = (TYPE2) DPROD;
324 sum_1 = DDPROD + sum_0;
b8698a0f 325 In which
20f06221
DN
326 - DX is double the size of X
327 - DY is double the size of Y
328 - DX, DY, DPROD all have the same type
329 - sum is the same size of DPROD or bigger
330 - sum has been recognized as a reduction variable.
331
332 This is equivalent to:
333 DPROD = X w* Y; #widen mult
334 sum_1 = DPROD w+ sum_0; #widen summation
335 or
336 DPROD = X w* Y; #widen mult
337 sum_1 = DPROD + sum_0; #summation
338 */
339
340 /* Starting from LAST_STMT, follow the defs of its uses in search
341 of the above pattern. */
342
51312233 343 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
344 return NULL;
345
346 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
347 {
348 /* Has been detected as widening-summation? */
349
350 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
351 type = gimple_expr_type (stmt);
352 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 353 return NULL;
726a989a
RB
354 oprnd0 = gimple_assign_rhs1 (stmt);
355 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
356 half_type = TREE_TYPE (oprnd0);
357 }
358 else
359 {
726a989a 360 gimple def_stmt;
20f06221 361
51312233
IR
362 oprnd0 = gimple_assign_rhs1 (last_stmt);
363 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
364 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
365 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 366 return NULL;
51312233 367 stmt = last_stmt;
20f06221 368
bc4fb355
IR
369 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
370 &promotion)
371 && promotion)
20f06221
DN
372 {
373 stmt = def_stmt;
726a989a 374 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
375 }
376 else
377 half_type = type;
378 }
379
51312233 380 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
381 we know that oprnd1 is the reduction variable (defined by a loop-header
382 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
383 Left to check that oprnd0 is defined by a (widen_)mult_expr */
ba02d3bc
RG
384 if (TREE_CODE (oprnd0) != SSA_NAME)
385 return NULL;
20f06221
DN
386
387 prod_type = half_type;
388 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
389
390 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 391 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
392 return NULL;
393
b8698a0f 394 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 395 inside the loop (in case we are analyzing an outer-loop). */
726a989a 396 if (!is_gimple_assign (stmt))
b8698a0f 397 return NULL;
20f06221
DN
398 stmt_vinfo = vinfo_for_stmt (stmt);
399 gcc_assert (stmt_vinfo);
8644a673 400 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 401 return NULL;
726a989a 402 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
403 return NULL;
404 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
405 {
406 /* Has been detected as a widening multiplication? */
407
408 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 409 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
410 return NULL;
411 stmt_vinfo = vinfo_for_stmt (stmt);
412 gcc_assert (stmt_vinfo);
8644a673 413 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
414 oprnd00 = gimple_assign_rhs1 (stmt);
415 oprnd01 = gimple_assign_rhs2 (stmt);
56f8faae
CH
416 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
417 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
20f06221
DN
418 }
419 else
420 {
421 tree half_type0, half_type1;
726a989a 422 gimple def_stmt;
20f06221
DN
423 tree oprnd0, oprnd1;
424
726a989a
RB
425 oprnd0 = gimple_assign_rhs1 (stmt);
426 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
427 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
428 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 429 return NULL;
bc4fb355
IR
430 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
431 &promotion)
432 || !promotion)
20f06221 433 return NULL;
726a989a 434 oprnd00 = gimple_assign_rhs1 (def_stmt);
181f5f3e 435 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
bc4fb355
IR
436 &promotion)
437 || !promotion)
20f06221 438 return NULL;
726a989a 439 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 440 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
441 return NULL;
442 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
443 return NULL;
444 }
445
446 half_type = TREE_TYPE (oprnd00);
447 *type_in = half_type;
448 *type_out = type;
b8698a0f 449
20f06221 450 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 451 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
452 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
453 oprnd00, oprnd01, oprnd1);
b8698a0f 454
73fbfcad 455 if (dump_enabled_p ())
20f06221 456 {
ccb3ad87 457 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 458 "vect_recog_dot_prod_pattern: detected: ");
ccb3ad87 459 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 460 dump_printf (MSG_NOTE, "\n");
20f06221 461 }
d29de1bf 462
726a989a 463 return pattern_stmt;
20f06221 464}
b8698a0f 465
51312233 466
79d652a5
CH
467/* Function vect_recog_sad_pattern
468
469 Try to find the following Sum of Absolute Difference (SAD) pattern:
470
471 type x_t, y_t;
472 signed TYPE1 diff, abs_diff;
473 TYPE2 sum = init;
474 loop:
475 sum_0 = phi <init, sum_1>
476 S1 x_t = ...
477 S2 y_t = ...
478 S3 x_T = (TYPE1) x_t;
479 S4 y_T = (TYPE1) y_t;
480 S5 diff = x_T - y_T;
481 S6 abs_diff = ABS_EXPR <diff>;
482 [S7 abs_diff = (TYPE2) abs_diff; #optional]
483 S8 sum_1 = abs_diff + sum_0;
484
485 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
486 same size of 'TYPE1' or bigger. This is a special case of a reduction
487 computation.
488
489 Input:
490
491 * STMTS: Contains a stmt from which the pattern search begins. In the
492 example, when this function is called with S8, the pattern
493 {S3,S4,S5,S6,S7,S8} will be detected.
494
495 Output:
496
497 * TYPE_IN: The type of the input arguments to the pattern.
498
499 * TYPE_OUT: The type of the output of this pattern.
500
501 * Return value: A new stmt that will be used to replace the sequence of
502 stmts that constitute the pattern. In this case it will be:
503 SAD_EXPR <x_t, y_t, sum_0>
504 */
505
506static gimple
507vect_recog_sad_pattern (vec<gimple> *stmts, tree *type_in,
508 tree *type_out)
509{
510 gimple last_stmt = (*stmts)[0];
511 tree sad_oprnd0, sad_oprnd1;
512 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
513 tree half_type;
514 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
515 struct loop *loop;
516 bool promotion;
517
518 if (!loop_info)
519 return NULL;
520
521 loop = LOOP_VINFO_LOOP (loop_info);
522
328dc477
RB
523 /* We don't allow changing the order of the computation in the inner-loop
524 when doing outer-loop vectorization. */
525 if (loop && nested_in_vect_loop_p (loop, last_stmt))
526 return NULL;
527
79d652a5
CH
528 if (!is_gimple_assign (last_stmt))
529 return NULL;
530
531 tree sum_type = gimple_expr_type (last_stmt);
532
533 /* Look for the following pattern
534 DX = (TYPE1) X;
535 DY = (TYPE1) Y;
536 DDIFF = DX - DY;
537 DAD = ABS_EXPR <DDIFF>;
538 DDPROD = (TYPE2) DPROD;
539 sum_1 = DAD + sum_0;
540 In which
541 - DX is at least double the size of X
542 - DY is at least double the size of Y
543 - DX, DY, DDIFF, DAD all have the same type
544 - sum is the same size of DAD or bigger
545 - sum has been recognized as a reduction variable.
546
547 This is equivalent to:
548 DDIFF = X w- Y; #widen sub
549 DAD = ABS_EXPR <DDIFF>;
550 sum_1 = DAD w+ sum_0; #widen summation
551 or
552 DDIFF = X w- Y; #widen sub
553 DAD = ABS_EXPR <DDIFF>;
554 sum_1 = DAD + sum_0; #summation
555 */
556
557 /* Starting from LAST_STMT, follow the defs of its uses in search
558 of the above pattern. */
559
560 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
561 return NULL;
562
563 tree plus_oprnd0, plus_oprnd1;
564
565 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
566 {
567 /* Has been detected as widening-summation? */
568
569 gimple stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
570 sum_type = gimple_expr_type (stmt);
571 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
572 return NULL;
573 plus_oprnd0 = gimple_assign_rhs1 (stmt);
574 plus_oprnd1 = gimple_assign_rhs2 (stmt);
575 half_type = TREE_TYPE (plus_oprnd0);
576 }
577 else
578 {
579 gimple def_stmt;
580
79d652a5
CH
581 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
582 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
583 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
584 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
585 return NULL;
586
587 /* The type conversion could be promotion, demotion,
588 or just signed -> unsigned. */
589 if (type_conversion_p (plus_oprnd0, last_stmt, false,
590 &half_type, &def_stmt, &promotion))
591 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
592 else
593 half_type = sum_type;
594 }
595
596 /* So far so good. Since last_stmt was detected as a (summation) reduction,
597 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
598 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
599 Then check that plus_oprnd0 is defined by an abs_expr. */
600
601 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
602 return NULL;
603
604 tree abs_type = half_type;
605 gimple abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
606
607 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
608 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
609 return NULL;
610
611 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
612 inside the loop (in case we are analyzing an outer-loop). */
613 if (!is_gimple_assign (abs_stmt))
614 return NULL;
615
616 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
617 gcc_assert (abs_stmt_vinfo);
618 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
619 return NULL;
620 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
621 return NULL;
622
623 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
624 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
625 return NULL;
626 if (TYPE_UNSIGNED (abs_type))
627 return NULL;
628
629 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
630
631 if (TREE_CODE (abs_oprnd) != SSA_NAME)
632 return NULL;
633
634 gimple diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
635
636 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
637 if (!gimple_bb (diff_stmt)
638 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
639 return NULL;
640
641 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
642 inside the loop (in case we are analyzing an outer-loop). */
643 if (!is_gimple_assign (diff_stmt))
644 return NULL;
645
646 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
647 gcc_assert (diff_stmt_vinfo);
648 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
649 return NULL;
650 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
651 return NULL;
652
653 tree half_type0, half_type1;
654 gimple def_stmt;
655
656 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
657 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
658
659 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
660 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
661 return NULL;
662 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
663 &half_type0, &def_stmt, &promotion)
664 || !promotion)
665 return NULL;
666 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
667
668 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
669 &half_type1, &def_stmt, &promotion)
670 || !promotion)
671 return NULL;
672 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
673
674 if (!types_compatible_p (half_type0, half_type1))
675 return NULL;
676 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
677 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
678 return NULL;
679
680 *type_in = TREE_TYPE (sad_oprnd0);
681 *type_out = sum_type;
682
683 /* Pattern detected. Create a stmt to be used to replace the pattern: */
684 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
0d0e4a03
JJ
685 gimple pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
686 sad_oprnd1, plus_oprnd1);
79d652a5
CH
687
688 if (dump_enabled_p ())
689 {
690 dump_printf_loc (MSG_NOTE, vect_location,
691 "vect_recog_sad_pattern: detected: ");
692 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
693 dump_printf (MSG_NOTE, "\n");
694 }
695
79d652a5
CH
696 return pattern_stmt;
697}
698
699
36ba4aae
IR
700/* Handle widening operation by a constant. At the moment we support MULT_EXPR
701 and LSHIFT_EXPR.
702
703 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
704 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
51312233
IR
705
706 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
36ba4aae
IR
707 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
708 that satisfies the above restrictions, we can perform a widening opeartion
709 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
566d377a 710 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
51312233
IR
711
712static bool
36ba4aae
IR
713vect_handle_widen_op_by_const (gimple stmt, enum tree_code code,
714 tree const_oprnd, tree *oprnd,
566d377a 715 gimple *wstmt, tree type,
36ba4aae 716 tree *half_type, gimple def_stmt)
51312233 717{
83d5977e 718 tree new_type, new_oprnd;
51312233 719
36ba4aae
IR
720 if (code != MULT_EXPR && code != LSHIFT_EXPR)
721 return false;
722
723 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
724 || (code == LSHIFT_EXPR
725 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
726 != 1))
727 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
51312233
IR
728 {
729 /* CONST_OPRND is a constant of HALF_TYPE. */
730 *oprnd = gimple_assign_rhs1 (def_stmt);
731 return true;
732 }
733
f71cf56a
UW
734 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
735 return false;
736
737 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
51312233
IR
738 return false;
739
36ba4aae 740 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
51312233
IR
741 a type 2 times bigger than HALF_TYPE. */
742 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
743 TYPE_UNSIGNED (type));
36ba4aae
IR
744 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
745 || (code == LSHIFT_EXPR
746 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
51312233
IR
747 return false;
748
566d377a
RB
749 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
750 *oprnd = gimple_assign_rhs1 (def_stmt);
751 new_oprnd = make_ssa_name (new_type);
752 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
753 *oprnd = new_oprnd;
51312233
IR
754
755 *half_type = new_type;
756 return true;
757}
758
759
20f06221
DN
760/* Function vect_recog_widen_mult_pattern
761
762 Try to find the following pattern:
763
d367387c
CH
764 type1 a_t;
765 type2 b_t;
20f06221
DN
766 TYPE a_T, b_T, prod_T;
767
768 S1 a_t = ;
769 S2 b_t = ;
770 S3 a_T = (TYPE) a_t;
771 S4 b_T = (TYPE) b_t;
772 S5 prod_T = a_T * b_T;
773
d367387c 774 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
20f06221 775
d47657bd 776 Also detect unsigned cases:
383d9c83 777
d367387c
CH
778 unsigned type1 a_t;
779 unsigned type2 b_t;
383d9c83
IR
780 unsigned TYPE u_prod_T;
781 TYPE a_T, b_T, prod_T;
782
783 S1 a_t = ;
784 S2 b_t = ;
785 S3 a_T = (TYPE) a_t;
786 S4 b_T = (TYPE) b_t;
787 S5 prod_T = a_T * b_T;
788 S6 u_prod_T = (unsigned TYPE) prod_T;
789
790 and multiplication by constants:
791
792 type a_t;
793 TYPE a_T, prod_T;
794
795 S1 a_t = ;
796 S3 a_T = (TYPE) a_t;
797 S5 prod_T = a_T * CONST;
798
51312233
IR
799 A special case of multiplication by constants is when 'TYPE' is 4 times
800 bigger than 'type', but CONST fits an intermediate type 2 times smaller
801 than 'TYPE'. In that case we create an additional pattern stmt for S3
802 to create a variable of the intermediate type, and perform widen-mult
803 on the intermediate type as well:
804
805 type a_t;
806 interm_type a_it;
807 TYPE a_T, prod_T, prod_T';
808
809 S1 a_t = ;
810 S3 a_T = (TYPE) a_t;
811 '--> a_it = (interm_type) a_t;
812 S5 prod_T = a_T * CONST;
813 '--> prod_T' = a_it w* CONST;
20f06221 814
51312233
IR
815 Input/Output:
816
817 * STMTS: Contains a stmt from which the pattern search begins. In the
818 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
819 is detected. In case of unsigned widen-mult, the original stmt (S5) is
820 replaced with S6 in STMTS. In case of multiplication by a constant
821 of an intermediate type (the last case above), STMTS also contains S3
822 (inserted before S5).
20f06221
DN
823
824 Output:
825
826 * TYPE_IN: The type of the input arguments to the pattern.
827
383d9c83 828 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
829
830 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 831 stmts that constitute the pattern. In this case it will be:
20f06221 832 WIDEN_MULT <a_t, b_t>
d367387c
CH
833 If the result of WIDEN_MULT needs to be converted to a larger type, the
834 returned stmt will be this type conversion stmt.
20f06221
DN
835*/
836
726a989a 837static gimple
9771b263 838vect_recog_widen_mult_pattern (vec<gimple> *stmts,
51312233 839 tree *type_in, tree *type_out)
20f06221 840{
9771b263 841 gimple last_stmt = stmts->pop ();
726a989a 842 gimple def_stmt0, def_stmt1;
89d67cca
DN
843 tree oprnd0, oprnd1;
844 tree type, half_type0, half_type1;
d367387c
CH
845 gimple new_stmt = NULL, pattern_stmt = NULL;
846 tree vectype, vecitype;
726a989a 847 tree var;
89d67cca 848 enum tree_code dummy_code;
5d593372 849 int dummy_int;
9771b263 850 vec<tree> dummy_vec;
36ba4aae 851 bool op1_ok;
bc4fb355 852 bool promotion;
89d67cca 853
51312233 854 if (!is_gimple_assign (last_stmt))
89d67cca
DN
855 return NULL;
856
51312233 857 type = gimple_expr_type (last_stmt);
89d67cca
DN
858
859 /* Starting from LAST_STMT, follow the defs of its uses in search
860 of the above pattern. */
861
51312233 862 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
89d67cca
DN
863 return NULL;
864
51312233
IR
865 oprnd0 = gimple_assign_rhs1 (last_stmt);
866 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
867 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
868 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
869 return NULL;
870
383d9c83 871 /* Check argument 0. */
bc4fb355
IR
872 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
873 &promotion)
874 || !promotion)
875 return NULL;
383d9c83 876 /* Check argument 1. */
bc4fb355
IR
877 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
878 &def_stmt1, &promotion);
89d67cca 879
bc4fb355 880 if (op1_ok && promotion)
383d9c83
IR
881 {
882 oprnd0 = gimple_assign_rhs1 (def_stmt0);
883 oprnd1 = gimple_assign_rhs1 (def_stmt1);
884 }
36ba4aae 885 else
383d9c83 886 {
51312233 887 if (TREE_CODE (oprnd1) == INTEGER_CST
383d9c83 888 && TREE_CODE (half_type0) == INTEGER_TYPE
36ba4aae 889 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
566d377a 890 &oprnd0, &new_stmt, type,
36ba4aae 891 &half_type0, def_stmt0))
bfdeda2c
JJ
892 {
893 half_type1 = half_type0;
894 oprnd1 = fold_convert (half_type1, oprnd1);
895 }
383d9c83
IR
896 else
897 return NULL;
898 }
899
d367387c
CH
900 /* If the two arguments have different sizes, convert the one with
901 the smaller type into the larger type. */
902 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
903 {
566d377a
RB
904 /* If we already used up the single-stmt slot give up. */
905 if (new_stmt)
906 return NULL;
907
d367387c
CH
908 tree* oprnd = NULL;
909 gimple def_stmt = NULL;
910
911 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
912 {
913 def_stmt = def_stmt0;
914 half_type0 = half_type1;
915 oprnd = &oprnd0;
916 }
917 else
918 {
919 def_stmt = def_stmt1;
920 half_type1 = half_type0;
921 oprnd = &oprnd1;
922 }
923
924 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 925 tree new_oprnd = make_ssa_name (half_type0);
0d0e4a03 926 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
d367387c
CH
927 *oprnd = new_oprnd;
928 }
929
383d9c83
IR
930 /* Handle unsigned case. Look for
931 S6 u_prod_T = (unsigned TYPE) prod_T;
932 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
933 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
934 {
9a7a4398
UW
935 gimple use_stmt;
936 tree use_lhs;
383d9c83
IR
937 tree use_type;
938
939 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
940 return NULL;
941
9a7a4398
UW
942 use_stmt = vect_single_imm_use (last_stmt);
943 if (!use_stmt || !is_gimple_assign (use_stmt)
625a9766 944 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
383d9c83
IR
945 return NULL;
946
947 use_lhs = gimple_assign_lhs (use_stmt);
948 use_type = TREE_TYPE (use_lhs);
949 if (!INTEGRAL_TYPE_P (use_type)
950 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
951 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
952 return NULL;
953
954 type = use_type;
51312233 955 last_stmt = use_stmt;
383d9c83 956 }
89d67cca 957
9600efe1 958 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
959 return NULL;
960
d367387c
CH
961 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
962 to get an intermediate result of type ITYPE. In this case we need
963 to build a statement to convert this intermediate result to type TYPE. */
964 tree itype = type;
965 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
966 itype = build_nonstandard_integer_type
967 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
968 TYPE_UNSIGNED (type));
969
89d67cca 970 /* Pattern detected. */
73fbfcad 971 if (dump_enabled_p ())
ccb3ad87 972 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 973 "vect_recog_widen_mult_pattern: detected:\n");
89d67cca
DN
974
975 /* Check target support */
976 vectype = get_vectype_for_scalar_type (half_type0);
d367387c 977 vecitype = get_vectype_for_scalar_type (itype);
03d3e953 978 if (!vectype
d367387c 979 || !vecitype
51312233 980 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
d367387c 981 vecitype, vectype,
a86ec597
RH
982 &dummy_code, &dummy_code,
983 &dummy_int, &dummy_vec))
89d67cca
DN
984 return NULL;
985
986 *type_in = vectype;
d367387c 987 *type_out = get_vectype_for_scalar_type (type);
89d67cca
DN
988
989 /* Pattern supported. Create a stmt to be used to replace the pattern: */
d367387c 990 var = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 991 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
726a989a 992
d367387c
CH
993 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
994 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
995 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
996 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
997
998 /* If the original two operands have different sizes, we may need to convert
999 the smaller one into the larget type. If this is the case, at this point
1000 the new stmt is already built. */
1001 if (new_stmt)
1002 {
1003 append_pattern_def_seq (stmt_vinfo, new_stmt);
1004 stmt_vec_info new_stmt_info
1005 = new_stmt_vec_info (new_stmt, loop_vinfo, bb_vinfo);
1006 set_vinfo_for_stmt (new_stmt, new_stmt_info);
1007 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1008 }
1009
1010 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
1011 the result of the widen-mult operation into type TYPE. */
1012 if (itype != type)
1013 {
1014 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
1015 stmt_vec_info pattern_stmt_info
1016 = new_stmt_vec_info (pattern_stmt, loop_vinfo, bb_vinfo);
1017 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
1018 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
0d0e4a03
JJ
1019 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
1020 NOP_EXPR,
1021 gimple_assign_lhs (pattern_stmt));
d367387c
CH
1022 }
1023
73fbfcad 1024 if (dump_enabled_p ())
78c60e3d 1025 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
726a989a 1026
9771b263 1027 stmts->safe_push (last_stmt);
726a989a 1028 return pattern_stmt;
20f06221
DN
1029}
1030
1031
0b2229b0
RG
1032/* Function vect_recog_pow_pattern
1033
1034 Try to find the following pattern:
1035
1036 x = POW (y, N);
1037
1038 with POW being one of pow, powf, powi, powif and N being
1039 either 2 or 0.5.
1040
1041 Input:
1042
1043 * LAST_STMT: A stmt from which the pattern search begins.
1044
1045 Output:
1046
1047 * TYPE_IN: The type of the input arguments to the pattern.
1048
1049 * TYPE_OUT: The type of the output of this pattern.
1050
1051 * Return value: A new stmt that will be used to replace the sequence of
1052 stmts that constitute the pattern. In this case it will be:
726a989a 1053 x = x * x
0b2229b0 1054 or
726a989a 1055 x = sqrt (x)
0b2229b0
RG
1056*/
1057
726a989a 1058static gimple
9771b263 1059vect_recog_pow_pattern (vec<gimple> *stmts, tree *type_in,
51312233 1060 tree *type_out)
0b2229b0 1061{
9771b263 1062 gimple last_stmt = (*stmts)[0];
726a989a
RB
1063 tree fn, base, exp = NULL;
1064 gimple stmt;
1065 tree var;
0b2229b0 1066
51312233 1067 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1068 return NULL;
1069
51312233 1070 fn = gimple_call_fndecl (last_stmt);
52bd463c
RG
1071 if (fn == NULL_TREE || DECL_BUILT_IN_CLASS (fn) != BUILT_IN_NORMAL)
1072 return NULL;
1073
0b2229b0
RG
1074 switch (DECL_FUNCTION_CODE (fn))
1075 {
1076 case BUILT_IN_POWIF:
1077 case BUILT_IN_POWI:
1078 case BUILT_IN_POWF:
1079 case BUILT_IN_POW:
51312233
IR
1080 base = gimple_call_arg (last_stmt, 0);
1081 exp = gimple_call_arg (last_stmt, 1);
0b2229b0
RG
1082 if (TREE_CODE (exp) != REAL_CST
1083 && TREE_CODE (exp) != INTEGER_CST)
726a989a 1084 return NULL;
0b2229b0
RG
1085 break;
1086
726a989a
RB
1087 default:
1088 return NULL;
0b2229b0
RG
1089 }
1090
1091 /* We now have a pow or powi builtin function call with a constant
1092 exponent. */
1093
0b2229b0
RG
1094 *type_out = NULL_TREE;
1095
1096 /* Catch squaring. */
9541ffee 1097 if ((tree_fits_shwi_p (exp)
9439e9a1 1098 && tree_to_shwi (exp) == 2)
0b2229b0
RG
1099 || (TREE_CODE (exp) == REAL_CST
1100 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconst2)))
c6b1b49b
RG
1101 {
1102 *type_in = TREE_TYPE (base);
726a989a
RB
1103
1104 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1105 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1106 return stmt;
c6b1b49b 1107 }
0b2229b0
RG
1108
1109 /* Catch square root. */
1110 if (TREE_CODE (exp) == REAL_CST
1111 && REAL_VALUES_EQUAL (TREE_REAL_CST (exp), dconsthalf))
1112 {
1113 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
c6b1b49b
RG
1114 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1115 if (*type_in)
1116 {
538dd0b7 1117 gcall *stmt = gimple_build_call (newfn, 1, base);
726a989a
RB
1118 if (vectorizable_function (stmt, *type_in, *type_in)
1119 != NULL_TREE)
1120 {
1121 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
b8698a0f 1122 gimple_call_set_lhs (stmt, var);
726a989a
RB
1123 return stmt;
1124 }
c6b1b49b 1125 }
0b2229b0
RG
1126 }
1127
726a989a 1128 return NULL;
0b2229b0
RG
1129}
1130
1131
20f06221
DN
1132/* Function vect_recog_widen_sum_pattern
1133
1134 Try to find the following pattern:
1135
b8698a0f 1136 type x_t;
20f06221
DN
1137 TYPE x_T, sum = init;
1138 loop:
1139 sum_0 = phi <init, sum_1>
1140 S1 x_t = *p;
1141 S2 x_T = (TYPE) x_t;
1142 S3 sum_1 = x_T + sum_0;
1143
b8698a0f 1144 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1145 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1146 a special case of a reduction computation.
20f06221
DN
1147
1148 Input:
1149
1150 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1151 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1152
20f06221 1153 Output:
b8698a0f 1154
20f06221
DN
1155 * TYPE_IN: The type of the input arguments to the pattern.
1156
1157 * TYPE_OUT: The type of the output of this pattern.
1158
1159 * Return value: A new stmt that will be used to replace the sequence of
1160 stmts that constitute the pattern. In this case it will be:
1161 WIDEN_SUM <x_t, sum_0>
d29de1bf 1162
b8698a0f 1163 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1164 vectorized without preserving all the intermediate results. It
b8698a0f
L
1165 produces only N/2 (widened) results (by summing up pairs of
1166 intermediate results) rather than all N results. Therefore, we
1167 cannot allow this pattern when we want to get all the results and in
1168 the correct order (as is the case when this computation is in an
d29de1bf 1169 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1170
726a989a 1171static gimple
9771b263 1172vect_recog_widen_sum_pattern (vec<gimple> *stmts, tree *type_in,
51312233 1173 tree *type_out)
20f06221 1174{
9771b263 1175 gimple stmt, last_stmt = (*stmts)[0];
20f06221 1176 tree oprnd0, oprnd1;
51312233 1177 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 1178 tree type, half_type;
726a989a 1179 gimple pattern_stmt;
d29de1bf 1180 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 1181 struct loop *loop;
726a989a 1182 tree var;
bc4fb355 1183 bool promotion;
20f06221 1184
f5709183
IR
1185 if (!loop_info)
1186 return NULL;
1187
1188 loop = LOOP_VINFO_LOOP (loop_info);
1189
328dc477
RB
1190 /* We don't allow changing the order of the computation in the inner-loop
1191 when doing outer-loop vectorization. */
1192 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1193 return NULL;
1194
51312233 1195 if (!is_gimple_assign (last_stmt))
20f06221
DN
1196 return NULL;
1197
51312233 1198 type = gimple_expr_type (last_stmt);
20f06221
DN
1199
1200 /* Look for the following pattern
1201 DX = (TYPE) X;
1202 sum_1 = DX + sum_0;
1203 In which DX is at least double the size of X, and sum_1 has been
1204 recognized as a reduction variable.
1205 */
1206
1207 /* Starting from LAST_STMT, follow the defs of its uses in search
1208 of the above pattern. */
1209
51312233 1210 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
1211 return NULL;
1212
51312233
IR
1213 oprnd0 = gimple_assign_rhs1 (last_stmt);
1214 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
1215 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1216 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
1217 return NULL;
1218
51312233 1219 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1220 we know that oprnd1 is the reduction variable (defined by a loop-header
1221 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1222 Left to check that oprnd0 is defined by a cast from type 'type' to type
1223 'TYPE'. */
1224
bc4fb355
IR
1225 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1226 &promotion)
1227 || !promotion)
1228 return NULL;
20f06221 1229
726a989a 1230 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
1231 *type_in = half_type;
1232 *type_out = type;
1233
1234 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 1235 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1236 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
726a989a 1237
73fbfcad 1238 if (dump_enabled_p ())
20f06221 1239 {
ccb3ad87 1240 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1241 "vect_recog_widen_sum_pattern: detected: ");
ccb3ad87 1242 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1243 dump_printf (MSG_NOTE, "\n");
20f06221 1244 }
d29de1bf 1245
726a989a 1246 return pattern_stmt;
20f06221
DN
1247}
1248
1249
1107f3ae
IR
1250/* Return TRUE if the operation in STMT can be performed on a smaller type.
1251
1252 Input:
1253 STMT - a statement to check.
1254 DEF - we support operations with two operands, one of which is constant.
1255 The other operand can be defined by a demotion operation, or by a
1256 previous statement in a sequence of over-promoted operations. In the
1257 later case DEF is used to replace that operand. (It is defined by a
1258 pattern statement we created for the previous statement in the
1259 sequence).
1260
1261 Input/output:
1262 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1263 NULL, it's the type of DEF.
1264 STMTS - additional pattern statements. If a pattern statement (type
1265 conversion) is created in this function, its original statement is
1266 added to STMTS.
1267
1268 Output:
1269 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1270 operands to use in the new pattern statement for STMT (will be created
1271 in vect_recog_over_widening_pattern ()).
1272 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1273 statements for STMT: the first one is a type promotion and the second
1274 one is the operation itself. We return the type promotion statement
363477c0 1275 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1107f3ae
IR
1276 the second pattern statement. */
1277
1278static bool
1279vect_operation_fits_smaller_type (gimple stmt, tree def, tree *new_type,
1280 tree *op0, tree *op1, gimple *new_def_stmt,
9771b263 1281 vec<gimple> *stmts)
1107f3ae
IR
1282{
1283 enum tree_code code;
1284 tree const_oprnd, oprnd;
83d5977e 1285 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
1107f3ae
IR
1286 gimple def_stmt, new_stmt;
1287 bool first = false;
bc4fb355 1288 bool promotion;
f5709183 1289
d6e1acf6
JJ
1290 *op0 = NULL_TREE;
1291 *op1 = NULL_TREE;
1107f3ae
IR
1292 *new_def_stmt = NULL;
1293
1294 if (!is_gimple_assign (stmt))
1295 return false;
1296
1297 code = gimple_assign_rhs_code (stmt);
1298 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1299 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1300 return false;
1301
1302 oprnd = gimple_assign_rhs1 (stmt);
1303 const_oprnd = gimple_assign_rhs2 (stmt);
1304 type = gimple_expr_type (stmt);
1305
1306 if (TREE_CODE (oprnd) != SSA_NAME
1307 || TREE_CODE (const_oprnd) != INTEGER_CST)
1308 return false;
1309
9ef7adc0
RG
1310 /* If oprnd has other uses besides that in stmt we cannot mark it
1311 as being part of a pattern only. */
1312 if (!has_single_use (oprnd))
1313 return false;
1314
1107f3ae
IR
1315 /* If we are in the middle of a sequence, we use DEF from a previous
1316 statement. Otherwise, OPRND has to be a result of type promotion. */
1317 if (*new_type)
1318 {
1319 half_type = *new_type;
1320 oprnd = def;
1321 }
1322 else
1323 {
1324 first = true;
bc4fb355 1325 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
f71cf56a
UW
1326 &promotion)
1327 || !promotion
1328 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1107f3ae
IR
1329 return false;
1330 }
1331
1332 /* Can we perform the operation on a smaller type? */
1333 switch (code)
1334 {
1335 case BIT_IOR_EXPR:
1336 case BIT_XOR_EXPR:
1337 case BIT_AND_EXPR:
1338 if (!int_fits_type_p (const_oprnd, half_type))
1339 {
1340 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1341 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1342 return false;
1343
1344 interm_type = build_nonstandard_integer_type (
1345 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1346 if (!int_fits_type_p (const_oprnd, interm_type))
1347 return false;
1348 }
1349
1350 break;
1351
1352 case LSHIFT_EXPR:
1353 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1354 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1355 return false;
1356
1357 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1358 (e.g., if the original value was char, the shift amount is at most 8
1359 if we want to use short). */
1360 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1361 return false;
1362
1363 interm_type = build_nonstandard_integer_type (
1364 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1365
1366 if (!vect_supportable_shift (code, interm_type))
1367 return false;
1368
1369 break;
1370
1371 case RSHIFT_EXPR:
1372 if (vect_supportable_shift (code, half_type))
1373 break;
1374
1375 /* Try intermediate type - HALF_TYPE is not supported. */
1376 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1377 return false;
1378
1379 interm_type = build_nonstandard_integer_type (
1380 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1381
1382 if (!vect_supportable_shift (code, interm_type))
1383 return false;
1384
1385 break;
1386
1387 default:
1388 gcc_unreachable ();
1389 }
1390
1391 /* There are four possible cases:
1392 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1393 the first statement in the sequence)
1394 a. The original, HALF_TYPE, is not enough - we replace the promotion
1395 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1396 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1397 promotion.
1398 2. OPRND is defined by a pattern statement we created.
1399 a. Its type is not sufficient for the operation, we create a new stmt:
1400 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1401 this statement in NEW_DEF_STMT, and it is later put in
363477c0 1402 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1107f3ae
IR
1403 b. OPRND is good to use in the new statement. */
1404 if (first)
1405 {
1406 if (interm_type)
1407 {
1408 /* Replace the original type conversion HALF_TYPE->TYPE with
1409 HALF_TYPE->INTERM_TYPE. */
1410 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1411 {
1412 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1413 /* Check if the already created pattern stmt is what we need. */
1414 if (!is_gimple_assign (new_stmt)
625a9766 1415 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1107f3ae
IR
1416 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1417 return false;
1418
9771b263 1419 stmts->safe_push (def_stmt);
1107f3ae
IR
1420 oprnd = gimple_assign_lhs (new_stmt);
1421 }
1422 else
1423 {
1424 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1425 oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 1426 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1427 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae 1428 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
9771b263 1429 stmts->safe_push (def_stmt);
1107f3ae
IR
1430 oprnd = new_oprnd;
1431 }
1432 }
1433 else
1434 {
1435 /* Retrieve the operand before the type promotion. */
1436 oprnd = gimple_assign_rhs1 (def_stmt);
1437 }
1438 }
1439 else
1440 {
1441 if (interm_type)
1442 {
1443 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
b731b390 1444 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1445 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae
IR
1446 oprnd = new_oprnd;
1447 *new_def_stmt = new_stmt;
1448 }
1449
1450 /* Otherwise, OPRND is already set. */
1451 }
1452
1453 if (interm_type)
1454 *new_type = interm_type;
1455 else
1456 *new_type = half_type;
1457
1458 *op0 = oprnd;
1459 *op1 = fold_convert (*new_type, const_oprnd);
1460
1461 return true;
1462}
1463
1464
1465/* Try to find a statement or a sequence of statements that can be performed
1466 on a smaller type:
1467
1468 type x_t;
1469 TYPE x_T, res0_T, res1_T;
1470 loop:
1471 S1 x_t = *p;
1472 S2 x_T = (TYPE) x_t;
1473 S3 res0_T = op (x_T, C0);
1474 S4 res1_T = op (res0_T, C1);
1475 S5 ... = () res1_T; - type demotion
1476
1477 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1478 constants.
1479 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1480 be 'type' or some intermediate type. For now, we expect S5 to be a type
71c92d17 1481 demotion operation. We also check that S3 and S4 have only one use. */
1107f3ae 1482
1107f3ae 1483static gimple
9771b263 1484vect_recog_over_widening_pattern (vec<gimple> *stmts,
1107f3ae
IR
1485 tree *type_in, tree *type_out)
1486{
9771b263 1487 gimple stmt = stmts->pop ();
1107f3ae 1488 gimple pattern_stmt = NULL, new_def_stmt, prev_stmt = NULL, use_stmt = NULL;
9a7a4398 1489 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
83d5977e 1490 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1107f3ae 1491 bool first;
b2a1a74d 1492 tree type = NULL;
1107f3ae
IR
1493
1494 first = true;
1495 while (1)
1496 {
1497 if (!vinfo_for_stmt (stmt)
1498 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1499 return NULL;
1500
1501 new_def_stmt = NULL;
1502 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1503 &op0, &op1, &new_def_stmt,
1504 stmts))
1505 {
1506 if (first)
1507 return NULL;
1508 else
1509 break;
1510 }
1511
1512 /* STMT can be performed on a smaller type. Check its uses. */
9a7a4398
UW
1513 use_stmt = vect_single_imm_use (stmt);
1514 if (!use_stmt || !is_gimple_assign (use_stmt))
1107f3ae
IR
1515 return NULL;
1516
1517 /* Create pattern statement for STMT. */
1518 vectype = get_vectype_for_scalar_type (new_type);
1519 if (!vectype)
1520 return NULL;
1521
1522 /* We want to collect all the statements for which we create pattern
1523 statetments, except for the case when the last statement in the
1524 sequence doesn't have a corresponding pattern statement. In such
1525 case we associate the last pattern statement with the last statement
36ba4aae 1526 in the sequence. Therefore, we only add the original statement to
1107f3ae
IR
1527 the list if we know that it is not the last. */
1528 if (prev_stmt)
9771b263 1529 stmts->safe_push (prev_stmt);
1107f3ae
IR
1530
1531 var = vect_recog_temp_ssa_var (new_type, NULL);
62371b92 1532 pattern_stmt
0d0e4a03 1533 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1107f3ae 1534 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
083481d8 1535 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1107f3ae 1536
73fbfcad 1537 if (dump_enabled_p ())
1107f3ae 1538 {
ccb3ad87 1539 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1540 "created pattern stmt: ");
ccb3ad87 1541 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1542 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1543 }
1544
b2a1a74d 1545 type = gimple_expr_type (stmt);
1107f3ae
IR
1546 prev_stmt = stmt;
1547 stmt = use_stmt;
1548
1549 first = false;
1550 }
1551
1552 /* We got a sequence. We expect it to end with a type demotion operation.
1553 Otherwise, we quit (for now). There are three possible cases: the
1554 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1555 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1556 NEW_TYPE differs (we create a new conversion statement). */
1557 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1558 {
1559 use_lhs = gimple_assign_lhs (use_stmt);
1560 use_type = TREE_TYPE (use_lhs);
82db3d43 1561 /* Support only type demotion or signedess change. */
1107f3ae 1562 if (!INTEGRAL_TYPE_P (use_type)
82db3d43 1563 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1107f3ae
IR
1564 return NULL;
1565
82db3d43
IR
1566 /* Check that NEW_TYPE is not bigger than the conversion result. */
1567 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1568 return NULL;
1569
1107f3ae
IR
1570 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1571 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1572 {
1573 /* Create NEW_TYPE->USE_TYPE conversion. */
b731b390 1574 new_oprnd = make_ssa_name (use_type);
0d0e4a03 1575 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1107f3ae
IR
1576 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1577
1578 *type_in = get_vectype_for_scalar_type (new_type);
1579 *type_out = get_vectype_for_scalar_type (use_type);
1580
1581 /* We created a pattern statement for the last statement in the
1582 sequence, so we don't need to associate it with the pattern
1583 statement created for PREV_STMT. Therefore, we add PREV_STMT
1584 to the list in order to mark it later in vect_pattern_recog_1. */
1585 if (prev_stmt)
9771b263 1586 stmts->safe_push (prev_stmt);
1107f3ae
IR
1587 }
1588 else
1589 {
1590 if (prev_stmt)
363477c0
JJ
1591 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1592 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1107f3ae
IR
1593
1594 *type_in = vectype;
1595 *type_out = NULL_TREE;
1596 }
1597
9771b263 1598 stmts->safe_push (use_stmt);
1107f3ae
IR
1599 }
1600 else
1601 /* TODO: support general case, create a conversion to the correct type. */
1602 return NULL;
1603
1604 /* Pattern detected. */
73fbfcad 1605 if (dump_enabled_p ())
1107f3ae 1606 {
ccb3ad87 1607 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1608 "vect_recog_over_widening_pattern: detected: ");
ccb3ad87 1609 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1610 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1611 }
1612
1613 return pattern_stmt;
1614}
1615
36ba4aae
IR
1616/* Detect widening shift pattern:
1617
1618 type a_t;
1619 TYPE a_T, res_T;
1620
1621 S1 a_t = ;
1622 S2 a_T = (TYPE) a_t;
1623 S3 res_T = a_T << CONST;
1624
1625 where type 'TYPE' is at least double the size of type 'type'.
1626
33018845
UW
1627 Also detect cases where the shift result is immediately converted
1628 to another type 'result_type' that is no larger in size than 'TYPE'.
1629 In those cases we perform a widen-shift that directly results in
1630 'result_type', to avoid a possible over-widening situation:
36ba4aae 1631
33018845 1632 type a_t;
36ba4aae 1633 TYPE a_T, res_T;
33018845 1634 result_type res_result;
36ba4aae
IR
1635
1636 S1 a_t = ;
1637 S2 a_T = (TYPE) a_t;
1638 S3 res_T = a_T << CONST;
33018845
UW
1639 S4 res_result = (result_type) res_T;
1640 '--> res_result' = a_t w<< CONST;
36ba4aae
IR
1641
1642 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1643 create an additional pattern stmt for S2 to create a variable of an
1644 intermediate type, and perform widen-shift on the intermediate type:
1645
1646 type a_t;
1647 interm_type a_it;
1648 TYPE a_T, res_T, res_T';
1649
1650 S1 a_t = ;
1651 S2 a_T = (TYPE) a_t;
1652 '--> a_it = (interm_type) a_t;
1653 S3 res_T = a_T << CONST;
1654 '--> res_T' = a_it <<* CONST;
1655
1656 Input/Output:
1657
1658 * STMTS: Contains a stmt from which the pattern search begins.
1659 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1660 in STMTS. When an intermediate type is used and a pattern statement is
1661 created for S2, we also put S2 here (before S3).
1662
1663 Output:
1664
1665 * TYPE_IN: The type of the input arguments to the pattern.
1666
1667 * TYPE_OUT: The type of the output of this pattern.
1668
1669 * Return value: A new stmt that will be used to replace the sequence of
1670 stmts that constitute the pattern. In this case it will be:
1671 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1672
1673static gimple
9771b263 1674vect_recog_widen_shift_pattern (vec<gimple> *stmts,
36ba4aae
IR
1675 tree *type_in, tree *type_out)
1676{
9771b263 1677 gimple last_stmt = stmts->pop ();
36ba4aae
IR
1678 gimple def_stmt0;
1679 tree oprnd0, oprnd1;
1680 tree type, half_type0;
33018845 1681 gimple pattern_stmt;
36ba4aae 1682 tree vectype, vectype_out = NULL_TREE;
36ba4aae
IR
1683 tree var;
1684 enum tree_code dummy_code;
1685 int dummy_int;
9771b263 1686 vec<tree> dummy_vec;
33018845 1687 gimple use_stmt;
bc4fb355 1688 bool promotion;
36ba4aae
IR
1689
1690 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1691 return NULL;
1692
36ba4aae 1693 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
33018845 1694 return NULL;
36ba4aae
IR
1695
1696 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1697 return NULL;
1698
1699 oprnd0 = gimple_assign_rhs1 (last_stmt);
1700 oprnd1 = gimple_assign_rhs2 (last_stmt);
1701 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1702 return NULL;
1703
1704 /* Check operand 0: it has to be defined by a type promotion. */
bc4fb355 1705 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
566d377a 1706 &promotion)
bc4fb355
IR
1707 || !promotion)
1708 return NULL;
36ba4aae
IR
1709
1710 /* Check operand 1: has to be positive. We check that it fits the type
1711 in vect_handle_widen_op_by_const (). */
1712 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1713 return NULL;
1714
1715 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1716 type = gimple_expr_type (last_stmt);
1717
33018845
UW
1718 /* Check for subsequent conversion to another type. */
1719 use_stmt = vect_single_imm_use (last_stmt);
1720 if (use_stmt && is_gimple_assign (use_stmt)
1721 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1722 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1723 {
1724 tree use_lhs = gimple_assign_lhs (use_stmt);
1725 tree use_type = TREE_TYPE (use_lhs);
1726
1727 if (INTEGRAL_TYPE_P (use_type)
1728 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1729 {
1730 last_stmt = use_stmt;
1731 type = use_type;
1732 }
1733 }
1734
36ba4aae 1735 /* Check if this a widening operation. */
566d377a 1736 gimple wstmt = NULL;
36ba4aae 1737 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
566d377a 1738 &oprnd0, &wstmt,
36ba4aae
IR
1739 type, &half_type0, def_stmt0))
1740 return NULL;
1741
36ba4aae 1742 /* Pattern detected. */
73fbfcad 1743 if (dump_enabled_p ())
ccb3ad87 1744 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1745 "vect_recog_widen_shift_pattern: detected:\n");
36ba4aae
IR
1746
1747 /* Check target support. */
1748 vectype = get_vectype_for_scalar_type (half_type0);
1749 vectype_out = get_vectype_for_scalar_type (type);
1750
1751 if (!vectype
1752 || !vectype_out
1753 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1754 vectype_out, vectype,
a86ec597
RH
1755 &dummy_code, &dummy_code,
1756 &dummy_int, &dummy_vec))
36ba4aae
IR
1757 return NULL;
1758
1759 *type_in = vectype;
1760 *type_out = vectype_out;
1761
1762 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1763 var = vect_recog_temp_ssa_var (type, NULL);
1764 pattern_stmt =
0d0e4a03 1765 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
566d377a
RB
1766 if (wstmt)
1767 {
1768 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1769 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1770 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1771 new_pattern_def_seq (stmt_vinfo, wstmt);
1772 stmt_vec_info new_stmt_info
1773 = new_stmt_vec_info (wstmt, loop_vinfo, bb_vinfo);
1774 set_vinfo_for_stmt (wstmt, new_stmt_info);
1775 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1776 }
36ba4aae 1777
73fbfcad 1778 if (dump_enabled_p ())
78c60e3d 1779 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
7e9a3abb
JJ
1780
1781 stmts->safe_push (last_stmt);
1782 return pattern_stmt;
1783}
1784
1785/* Detect a rotate pattern wouldn't be otherwise vectorized:
1786
1787 type a_t, b_t, c_t;
1788
1789 S0 a_t = b_t r<< c_t;
1790
1791 Input/Output:
1792
1793 * STMTS: Contains a stmt from which the pattern search begins,
1794 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1795 with a sequence:
1796
1797 S1 d_t = -c_t;
1798 S2 e_t = d_t & (B - 1);
1799 S3 f_t = b_t << c_t;
1800 S4 g_t = b_t >> e_t;
1801 S0 a_t = f_t | g_t;
1802
1803 where B is element bitsize of type.
1804
1805 Output:
1806
1807 * TYPE_IN: The type of the input arguments to the pattern.
1808
1809 * TYPE_OUT: The type of the output of this pattern.
1810
1811 * Return value: A new stmt that will be used to replace the rotate
1812 S0 stmt. */
1813
1814static gimple
1815vect_recog_rotate_pattern (vec<gimple> *stmts, tree *type_in, tree *type_out)
1816{
1817 gimple last_stmt = stmts->pop ();
1818 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
1819 gimple pattern_stmt, def_stmt;
1820 enum tree_code rhs_code;
1821 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
1822 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
1823 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
1824 enum vect_def_type dt;
1825 optab optab1, optab2;
68119618 1826 edge ext_def = NULL;
7e9a3abb
JJ
1827
1828 if (!is_gimple_assign (last_stmt))
1829 return NULL;
1830
1831 rhs_code = gimple_assign_rhs_code (last_stmt);
1832 switch (rhs_code)
1833 {
1834 case LROTATE_EXPR:
1835 case RROTATE_EXPR:
1836 break;
1837 default:
1838 return NULL;
1839 }
1840
1841 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1842 return NULL;
1843
1844 lhs = gimple_assign_lhs (last_stmt);
1845 oprnd0 = gimple_assign_rhs1 (last_stmt);
1846 type = TREE_TYPE (oprnd0);
1847 oprnd1 = gimple_assign_rhs2 (last_stmt);
1848 if (TREE_CODE (oprnd0) != SSA_NAME
1849 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1850 || !INTEGRAL_TYPE_P (type)
1851 || !TYPE_UNSIGNED (type))
1852 return NULL;
1853
1854 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
1855 &def, &dt))
1856 return NULL;
1857
1858 if (dt != vect_internal_def
1859 && dt != vect_constant_def
1860 && dt != vect_external_def)
1861 return NULL;
1862
1863 vectype = get_vectype_for_scalar_type (type);
1864 if (vectype == NULL_TREE)
1865 return NULL;
1866
1867 /* If vector/vector or vector/scalar rotate is supported by the target,
1868 don't do anything here. */
1869 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1870 if (optab1
1871 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1872 return NULL;
1873
1874 if (bb_vinfo != NULL || dt != vect_internal_def)
1875 {
1876 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1877 if (optab2
1878 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1879 return NULL;
1880 }
1881
1882 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1883 don't do anything here either. */
1884 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1885 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1886 if (!optab1
1887 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1888 || !optab2
1889 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1890 {
1891 if (bb_vinfo == NULL && dt == vect_internal_def)
1892 return NULL;
1893 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1894 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1895 if (!optab1
1896 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1897 || !optab2
1898 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1899 return NULL;
1900 }
1901
1902 *type_in = vectype;
1903 *type_out = vectype;
1904 if (*type_in == NULL_TREE)
1905 return NULL;
1906
68119618
JJ
1907 if (dt == vect_external_def
1908 && TREE_CODE (oprnd1) == SSA_NAME
1909 && loop_vinfo)
1910 {
1911 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1912 ext_def = loop_preheader_edge (loop);
1913 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1914 {
1915 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1916 if (bb == NULL
1917 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1918 ext_def = NULL;
1919 }
1920 }
1921
7e9a3abb
JJ
1922 def = NULL_TREE;
1923 if (TREE_CODE (oprnd1) == INTEGER_CST
1924 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1925 def = oprnd1;
1926 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1927 {
1928 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1929 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1930 && TYPE_PRECISION (TREE_TYPE (rhs1))
1931 == TYPE_PRECISION (type))
1932 def = rhs1;
1933 }
1934
1935 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1936 if (def == NULL_TREE)
1937 {
1938 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1939 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
68119618
JJ
1940 if (ext_def)
1941 {
1942 basic_block new_bb
1943 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1944 gcc_assert (!new_bb);
1945 }
1946 else
1947 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
1948 }
1949 stype = TREE_TYPE (def);
1950
1951 if (TREE_CODE (def) == INTEGER_CST)
1952 {
cc269bb6 1953 if (!tree_fits_uhwi_p (def)
7d362f6c 1954 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
7e9a3abb
JJ
1955 || integer_zerop (def))
1956 return NULL;
1957 def2 = build_int_cst (stype,
1958 GET_MODE_PRECISION (TYPE_MODE (type))
ae7e9ddd 1959 - tree_to_uhwi (def));
7e9a3abb
JJ
1960 }
1961 else
1962 {
1963 tree vecstype = get_vectype_for_scalar_type (stype);
1964 stmt_vec_info def_stmt_vinfo;
1965
1966 if (vecstype == NULL_TREE)
1967 return NULL;
1968 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 1969 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
1970 if (ext_def)
1971 {
1972 basic_block new_bb
1973 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1974 gcc_assert (!new_bb);
1975 }
1976 else
1977 {
1978 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1979 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1980 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1981 append_pattern_def_seq (stmt_vinfo, def_stmt);
1982 }
7e9a3abb
JJ
1983
1984 def2 = vect_recog_temp_ssa_var (stype, NULL);
1985 tree mask
1986 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
0d0e4a03
JJ
1987 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1988 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
1989 if (ext_def)
1990 {
1991 basic_block new_bb
1992 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1993 gcc_assert (!new_bb);
1994 }
1995 else
1996 {
1997 def_stmt_vinfo = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
1998 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1999 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
2000 append_pattern_def_seq (stmt_vinfo, def_stmt);
2001 }
7e9a3abb
JJ
2002 }
2003
2004 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2005 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
2006 ? LSHIFT_EXPR : RSHIFT_EXPR,
2007 oprnd0, def);
7e9a3abb
JJ
2008 append_pattern_def_seq (stmt_vinfo, def_stmt);
2009
2010 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
2011 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
2012 ? RSHIFT_EXPR : LSHIFT_EXPR,
2013 oprnd0, def2);
7e9a3abb
JJ
2014 append_pattern_def_seq (stmt_vinfo, def_stmt);
2015
2016 /* Pattern detected. */
2017 if (dump_enabled_p ())
2018 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2019 "vect_recog_rotate_pattern: detected:\n");
7e9a3abb
JJ
2020
2021 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2022 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 2023 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb
JJ
2024
2025 if (dump_enabled_p ())
2026 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
36ba4aae 2027
9771b263 2028 stmts->safe_push (last_stmt);
36ba4aae
IR
2029 return pattern_stmt;
2030}
1107f3ae 2031
732a0ad3
JJ
2032/* Detect a vector by vector shift pattern that wouldn't be otherwise
2033 vectorized:
2034
2035 type a_t;
2036 TYPE b_T, res_T;
2037
2038 S1 a_t = ;
2039 S2 b_T = ;
2040 S3 res_T = b_T op a_t;
2041
2042 where type 'TYPE' is a type with different size than 'type',
2043 and op is <<, >> or rotate.
2044
2045 Also detect cases:
2046
2047 type a_t;
2048 TYPE b_T, c_T, res_T;
2049
2050 S0 c_T = ;
2051 S1 a_t = (type) c_T;
2052 S2 b_T = ;
2053 S3 res_T = b_T op a_t;
2054
2055 Input/Output:
2056
2057 * STMTS: Contains a stmt from which the pattern search begins,
2058 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2059 with a shift/rotate which has same type on both operands, in the
2060 second case just b_T op c_T, in the first case with added cast
363477c0 2061 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2062
2063 Output:
2064
2065 * TYPE_IN: The type of the input arguments to the pattern.
2066
2067 * TYPE_OUT: The type of the output of this pattern.
2068
2069 * Return value: A new stmt that will be used to replace the shift/rotate
2070 S3 stmt. */
2071
2072static gimple
9771b263 2073vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
732a0ad3
JJ
2074 tree *type_in, tree *type_out)
2075{
9771b263 2076 gimple last_stmt = stmts->pop ();
732a0ad3
JJ
2077 tree oprnd0, oprnd1, lhs, var;
2078 gimple pattern_stmt, def_stmt;
2079 enum tree_code rhs_code;
2080 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2081 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 2082 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
732a0ad3
JJ
2083 enum vect_def_type dt;
2084 tree def;
2085
2086 if (!is_gimple_assign (last_stmt))
2087 return NULL;
2088
2089 rhs_code = gimple_assign_rhs_code (last_stmt);
2090 switch (rhs_code)
2091 {
2092 case LSHIFT_EXPR:
2093 case RSHIFT_EXPR:
2094 case LROTATE_EXPR:
2095 case RROTATE_EXPR:
2096 break;
2097 default:
2098 return NULL;
2099 }
2100
2101 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2102 return NULL;
2103
2104 lhs = gimple_assign_lhs (last_stmt);
2105 oprnd0 = gimple_assign_rhs1 (last_stmt);
2106 oprnd1 = gimple_assign_rhs2 (last_stmt);
2107 if (TREE_CODE (oprnd0) != SSA_NAME
2108 || TREE_CODE (oprnd1) != SSA_NAME
2109 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2110 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2111 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2112 || TYPE_PRECISION (TREE_TYPE (lhs))
2113 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2114 return NULL;
2115
f5709183 2116 if (!vect_is_simple_use (oprnd1, last_stmt, loop_vinfo, bb_vinfo, &def_stmt,
24ee1384 2117 &def, &dt))
732a0ad3
JJ
2118 return NULL;
2119
2120 if (dt != vect_internal_def)
2121 return NULL;
2122
2123 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2124 *type_out = *type_in;
2125 if (*type_in == NULL_TREE)
2126 return NULL;
2127
2128 def = NULL_TREE;
2129 if (gimple_assign_cast_p (def_stmt))
2130 {
2131 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2132 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2133 && TYPE_PRECISION (TREE_TYPE (rhs1))
2134 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2135 def = rhs1;
2136 }
2137
2138 if (def == NULL_TREE)
2139 {
2140 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2141 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
083481d8 2142 new_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2143 }
2144
2145 /* Pattern detected. */
73fbfcad 2146 if (dump_enabled_p ())
ccb3ad87 2147 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2148 "vect_recog_vector_vector_shift_pattern: detected:\n");
732a0ad3
JJ
2149
2150 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2151 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2152 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2153
73fbfcad 2154 if (dump_enabled_p ())
78c60e3d 2155 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
732a0ad3 2156
9771b263 2157 stmts->safe_push (last_stmt);
732a0ad3
JJ
2158 return pattern_stmt;
2159}
2160
079c527f 2161/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
2162 otherwise vectorized:
2163
2164 type a_t, b_t;
2165
2166 S1 a_t = b_t / N;
2167
079c527f 2168 where type 'type' is an integral type and N is a constant.
363477c0 2169
079c527f 2170 Similarly handle modulo by a constant:
363477c0
JJ
2171
2172 S4 a_t = b_t % N;
2173
2174 Input/Output:
2175
2176 * STMTS: Contains a stmt from which the pattern search begins,
079c527f
JJ
2177 i.e. the division stmt. S1 is replaced by if N is a power
2178 of two constant and type is signed:
363477c0
JJ
2179 S3 y_t = b_t < 0 ? N - 1 : 0;
2180 S2 x_t = b_t + y_t;
2181 S1' a_t = x_t >> log2 (N);
2182
079c527f
JJ
2183 S4 is replaced if N is a power of two constant and
2184 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
2185 S9 y_T = b_t < 0 ? -1U : 0U;
2186 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2187 S7 z_t = (type) z_T;
2188 S6 w_t = b_t + z_t;
2189 S5 x_t = w_t & (N - 1);
2190 S4' a_t = x_t - z_t;
2191
2192 Output:
2193
2194 * TYPE_IN: The type of the input arguments to the pattern.
2195
2196 * TYPE_OUT: The type of the output of this pattern.
2197
2198 * Return value: A new stmt that will be used to replace the division
2199 S1 or modulo S4 stmt. */
2200
2201static gimple
9771b263 2202vect_recog_divmod_pattern (vec<gimple> *stmts,
079c527f 2203 tree *type_in, tree *type_out)
363477c0 2204{
9771b263 2205 gimple last_stmt = stmts->pop ();
5deb57cb 2206 tree oprnd0, oprnd1, vectype, itype, cond;
363477c0
JJ
2207 gimple pattern_stmt, def_stmt;
2208 enum tree_code rhs_code;
2209 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2210 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
079c527f 2211 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
363477c0 2212 optab optab;
00f07b86 2213 tree q;
079c527f 2214 int dummy_int, prec;
079c527f 2215 stmt_vec_info def_stmt_vinfo;
363477c0
JJ
2216
2217 if (!is_gimple_assign (last_stmt))
2218 return NULL;
2219
2220 rhs_code = gimple_assign_rhs_code (last_stmt);
2221 switch (rhs_code)
2222 {
2223 case TRUNC_DIV_EXPR:
2224 case TRUNC_MOD_EXPR:
2225 break;
2226 default:
2227 return NULL;
2228 }
2229
2230 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2231 return NULL;
2232
2233 oprnd0 = gimple_assign_rhs1 (last_stmt);
2234 oprnd1 = gimple_assign_rhs2 (last_stmt);
2235 itype = TREE_TYPE (oprnd0);
2236 if (TREE_CODE (oprnd0) != SSA_NAME
2237 || TREE_CODE (oprnd1) != INTEGER_CST
2238 || TREE_CODE (itype) != INTEGER_TYPE
079c527f 2239 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
363477c0
JJ
2240 return NULL;
2241
2242 vectype = get_vectype_for_scalar_type (itype);
2243 if (vectype == NULL_TREE)
2244 return NULL;
2245
2246 /* If the target can handle vectorized division or modulo natively,
2247 don't attempt to optimize this. */
2248 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2225b9f2 2249 if (optab != unknown_optab)
363477c0 2250 {
ef4bddc2 2251 machine_mode vec_mode = TYPE_MODE (vectype);
363477c0 2252 int icode = (int) optab_handler (optab, vec_mode);
e6d4f8f5 2253 if (icode != CODE_FOR_nothing)
363477c0
JJ
2254 return NULL;
2255 }
2256
079c527f
JJ
2257 prec = TYPE_PRECISION (itype);
2258 if (integer_pow2p (oprnd1))
363477c0 2259 {
079c527f
JJ
2260 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2261 return NULL;
363477c0 2262
079c527f 2263 /* Pattern detected. */
73fbfcad 2264 if (dump_enabled_p ())
ccb3ad87 2265 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2266 "vect_recog_divmod_pattern: detected:\n");
079c527f
JJ
2267
2268 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2269 build_int_cst (itype, 0));
2270 if (rhs_code == TRUNC_DIV_EXPR)
2271 {
2272 tree var = vect_recog_temp_ssa_var (itype, NULL);
2273 tree shift;
2274 def_stmt
0d0e4a03
JJ
2275 = gimple_build_assign (var, COND_EXPR, cond,
2276 fold_build2 (MINUS_EXPR, itype, oprnd1,
2277 build_int_cst (itype, 1)),
2278 build_int_cst (itype, 0));
079c527f
JJ
2279 new_pattern_def_seq (stmt_vinfo, def_stmt);
2280 var = vect_recog_temp_ssa_var (itype, NULL);
2281 def_stmt
0d0e4a03
JJ
2282 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2283 gimple_assign_lhs (def_stmt));
079c527f
JJ
2284 append_pattern_def_seq (stmt_vinfo, def_stmt);
2285
2286 shift = build_int_cst (itype, tree_log2 (oprnd1));
2287 pattern_stmt
0d0e4a03
JJ
2288 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2289 RSHIFT_EXPR, var, shift);
079c527f
JJ
2290 }
2291 else
2292 {
2293 tree signmask;
2294 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2295 if (compare_tree_int (oprnd1, 2) == 0)
2296 {
2297 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2298 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2299 build_int_cst (itype, 1),
2300 build_int_cst (itype, 0));
079c527f
JJ
2301 append_pattern_def_seq (stmt_vinfo, def_stmt);
2302 }
2303 else
2304 {
2305 tree utype
2306 = build_nonstandard_integer_type (prec, 1);
2307 tree vecutype = get_vectype_for_scalar_type (utype);
2308 tree shift
2309 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2310 - tree_log2 (oprnd1));
2311 tree var = vect_recog_temp_ssa_var (utype, NULL);
2312
0d0e4a03
JJ
2313 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2314 build_int_cst (utype, -1),
2315 build_int_cst (utype, 0));
079c527f
JJ
2316 def_stmt_vinfo
2317 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2318 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2319 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2320 append_pattern_def_seq (stmt_vinfo, def_stmt);
2321 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
2322 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2323 gimple_assign_lhs (def_stmt),
2324 shift);
079c527f
JJ
2325 def_stmt_vinfo
2326 = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
2327 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2328 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2329 append_pattern_def_seq (stmt_vinfo, def_stmt);
2330 signmask = vect_recog_temp_ssa_var (itype, NULL);
2331 def_stmt
0d0e4a03 2332 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
2333 append_pattern_def_seq (stmt_vinfo, def_stmt);
2334 }
2335 def_stmt
0d0e4a03
JJ
2336 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2337 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
2338 append_pattern_def_seq (stmt_vinfo, def_stmt);
2339 def_stmt
0d0e4a03
JJ
2340 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2341 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2342 fold_build2 (MINUS_EXPR, itype, oprnd1,
2343 build_int_cst (itype, 1)));
079c527f
JJ
2344 append_pattern_def_seq (stmt_vinfo, def_stmt);
2345
2346 pattern_stmt
0d0e4a03
JJ
2347 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2348 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2349 signmask);
079c527f
JJ
2350 }
2351
73fbfcad 2352 if (dump_enabled_p ())
78c60e3d
SS
2353 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2354 0);
079c527f 2355
9771b263 2356 stmts->safe_push (last_stmt);
079c527f
JJ
2357
2358 *type_in = vectype;
2359 *type_out = vectype;
2360 return pattern_stmt;
363477c0 2361 }
079c527f 2362
6b58915b
RS
2363 if (prec > HOST_BITS_PER_WIDE_INT
2364 || integer_zerop (oprnd1))
079c527f
JJ
2365 return NULL;
2366
00f07b86
RH
2367 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2368 return NULL;
079c527f
JJ
2369
2370 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2371
2372 if (TYPE_UNSIGNED (itype))
363477c0 2373 {
079c527f
JJ
2374 unsigned HOST_WIDE_INT mh, ml;
2375 int pre_shift, post_shift;
6b58915b
RS
2376 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2377 & GET_MODE_MASK (TYPE_MODE (itype)));
5deb57cb 2378 tree t1, t2, t3, t4;
079c527f
JJ
2379
2380 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2381 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2382 return NULL;
2383
2384 /* Find a suitable multiplier and right shift count
2385 instead of multiplying with D. */
2386 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2387
2388 /* If the suggested multiplier is more than SIZE bits, we can do better
2389 for even divisors, using an initial right shift. */
2390 if (mh != 0 && (d & 1) == 0)
363477c0 2391 {
079c527f
JJ
2392 pre_shift = floor_log2 (d & -d);
2393 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2394 &ml, &post_shift, &dummy_int);
2395 gcc_assert (!mh);
2396 }
2397 else
2398 pre_shift = 0;
2399
2400 if (mh != 0)
2401 {
2402 if (post_shift - 1 >= prec)
2403 return NULL;
2404
5deb57cb
JJ
2405 /* t1 = oprnd0 h* ml;
2406 t2 = oprnd0 - t1;
2407 t3 = t2 >> 1;
2408 t4 = t1 + t3;
2409 q = t4 >> (post_shift - 1); */
2410 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2411 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2412 build_int_cst (itype, ml));
079c527f 2413 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2414
5deb57cb 2415 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2416 def_stmt
0d0e4a03 2417 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 2418 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2419
2420 t3 = vect_recog_temp_ssa_var (itype, NULL);
2421 def_stmt
0d0e4a03 2422 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
2423 append_pattern_def_seq (stmt_vinfo, def_stmt);
2424
5deb57cb 2425 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2426 def_stmt
0d0e4a03 2427 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
2428
2429 if (post_shift != 1)
2430 {
2431 append_pattern_def_seq (stmt_vinfo, def_stmt);
2432
5deb57cb 2433 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2434 pattern_stmt
0d0e4a03
JJ
2435 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2436 build_int_cst (itype, post_shift - 1));
079c527f
JJ
2437 }
2438 else
2439 {
5deb57cb 2440 q = t4;
079c527f
JJ
2441 pattern_stmt = def_stmt;
2442 }
363477c0
JJ
2443 }
2444 else
2445 {
079c527f
JJ
2446 if (pre_shift >= prec || post_shift >= prec)
2447 return NULL;
2448
2449 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2450 t2 = t1 h* ml;
2451 q = t2 >> post_shift; */
079c527f
JJ
2452 if (pre_shift)
2453 {
2454 t1 = vect_recog_temp_ssa_var (itype, NULL);
2455 def_stmt
0d0e4a03
JJ
2456 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2457 build_int_cst (NULL, pre_shift));
079c527f
JJ
2458 append_pattern_def_seq (stmt_vinfo, def_stmt);
2459 }
2460 else
2461 t1 = oprnd0;
363477c0 2462
5deb57cb 2463 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2464 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2465 build_int_cst (itype, ml));
079c527f 2466
5deb57cb
JJ
2467 if (post_shift)
2468 {
2469 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2470
5deb57cb
JJ
2471 q = vect_recog_temp_ssa_var (itype, NULL);
2472 def_stmt
0d0e4a03
JJ
2473 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2474 build_int_cst (itype, post_shift));
5deb57cb
JJ
2475 }
2476 else
2477 q = t2;
2478
2479 pattern_stmt = def_stmt;
079c527f
JJ
2480 }
2481 }
2482 else
2483 {
2484 unsigned HOST_WIDE_INT ml;
4ee4c52c 2485 int post_shift;
6b58915b 2486 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2487 unsigned HOST_WIDE_INT abs_d;
2488 bool add = false;
5deb57cb 2489 tree t1, t2, t3, t4;
079c527f
JJ
2490
2491 /* Give up for -1. */
2492 if (d == -1)
2493 return NULL;
2494
079c527f
JJ
2495 /* Since d might be INT_MIN, we have to cast to
2496 unsigned HOST_WIDE_INT before negating to avoid
2497 undefined signed overflow. */
2498 abs_d = (d >= 0
2499 ? (unsigned HOST_WIDE_INT) d
2500 : - (unsigned HOST_WIDE_INT) d);
2501
2502 /* n rem d = n rem -d */
2503 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2504 {
2505 d = abs_d;
2506 oprnd1 = build_int_cst (itype, abs_d);
2507 }
2508 else if (HOST_BITS_PER_WIDE_INT >= prec
2509 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2510 /* This case is not handled correctly below. */
2511 return NULL;
2512
4ee4c52c 2513 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
079c527f
JJ
2514 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2515 {
2516 add = true;
2517 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2518 }
2519 if (post_shift >= prec)
2520 return NULL;
2521
7abed779 2522 /* t1 = oprnd0 h* ml; */
5deb57cb 2523 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2524 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2525 build_int_cst (itype, ml));
079c527f
JJ
2526
2527 if (add)
2528 {
5deb57cb 2529 /* t2 = t1 + oprnd0; */
7abed779 2530 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2531 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2532 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
2533 }
2534 else
5deb57cb 2535 t2 = t1;
079c527f 2536
5deb57cb 2537 if (post_shift)
079c527f 2538 {
5deb57cb 2539 /* t3 = t2 >> post_shift; */
7abed779 2540 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2541 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2542 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2543 build_int_cst (itype, post_shift));
363477c0 2544 }
079c527f 2545 else
5deb57cb 2546 t3 = t2;
079c527f 2547
807e902e 2548 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
2549 int msb = 1;
2550 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2551 {
807e902e 2552 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 2553 msb = 0;
807e902e 2554 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
2555 msb = -1;
2556 }
079c527f 2557
7abed779
JJ
2558 if (msb == 0 && d >= 0)
2559 {
2560 /* q = t3; */
2561 q = t3;
2562 pattern_stmt = def_stmt;
2563 }
2564 else
2565 {
2566 /* t4 = oprnd0 >> (prec - 1);
2567 or if we know from VRP that oprnd0 >= 0
2568 t4 = 0;
2569 or if we know from VRP that oprnd0 < 0
2570 t4 = -1; */
2571 append_pattern_def_seq (stmt_vinfo, def_stmt);
2572 t4 = vect_recog_temp_ssa_var (itype, NULL);
2573 if (msb != 1)
0d0e4a03
JJ
2574 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2575 build_int_cst (itype, msb));
7abed779 2576 else
0d0e4a03
JJ
2577 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2578 build_int_cst (itype, prec - 1));
7abed779
JJ
2579 append_pattern_def_seq (stmt_vinfo, def_stmt);
2580
2581 /* q = t3 - t4; or q = t4 - t3; */
2582 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2583 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2584 d < 0 ? t3 : t4);
7abed779 2585 }
079c527f
JJ
2586 }
2587
2588 if (rhs_code == TRUNC_MOD_EXPR)
2589 {
2590 tree r, t1;
2591
2592 /* We divided. Now finish by:
2593 t1 = q * oprnd1;
2594 r = oprnd0 - t1; */
2595 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2596
2597 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2598 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 2599 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 2600
079c527f 2601 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2602 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
2603 }
2604
079c527f 2605 /* Pattern detected. */
73fbfcad 2606 if (dump_enabled_p ())
78c60e3d 2607 {
ccb3ad87 2608 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2609 "vect_recog_divmod_pattern: detected: ");
ccb3ad87 2610 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 2611 dump_printf (MSG_NOTE, "\n");
78c60e3d 2612 }
363477c0 2613
9771b263 2614 stmts->safe_push (last_stmt);
363477c0
JJ
2615
2616 *type_in = vectype;
2617 *type_out = vectype;
2618 return pattern_stmt;
2619}
2620
69d2aade
JJ
2621/* Function vect_recog_mixed_size_cond_pattern
2622
2623 Try to find the following pattern:
2624
2625 type x_t, y_t;
2626 TYPE a_T, b_T, c_T;
2627 loop:
2628 S1 a_T = x_t CMP y_t ? b_T : c_T;
2629
2630 where type 'TYPE' is an integral type which has different size
bc4fb355 2631 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 2632 than 'type', the constants need to fit into an integer type
bc4fb355 2633 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
2634
2635 Input:
2636
2637 * LAST_STMT: A stmt from which the pattern search begins.
2638
2639 Output:
2640
2641 * TYPE_IN: The type of the input arguments to the pattern.
2642
2643 * TYPE_OUT: The type of the output of this pattern.
2644
2645 * Return value: A new stmt that will be used to replace the pattern.
2646 Additionally a def_stmt is added.
2647
2648 a_it = x_t CMP y_t ? b_it : c_it;
2649 a_T = (TYPE) a_it; */
2650
2651static gimple
9771b263 2652vect_recog_mixed_size_cond_pattern (vec<gimple> *stmts, tree *type_in,
69d2aade
JJ
2653 tree *type_out)
2654{
9771b263 2655 gimple last_stmt = (*stmts)[0];
69d2aade
JJ
2656 tree cond_expr, then_clause, else_clause;
2657 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
bc4fb355 2658 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
ef4bddc2 2659 machine_mode cmpmode;
69d2aade
JJ
2660 gimple pattern_stmt, def_stmt;
2661 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 2662 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
bc4fb355
IR
2663 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
2664 gimple def_stmt0 = NULL, def_stmt1 = NULL;
2665 bool promotion;
2666 tree comp_scalar_type;
69d2aade
JJ
2667
2668 if (!is_gimple_assign (last_stmt)
2669 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2670 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2671 return NULL;
2672
2673 cond_expr = gimple_assign_rhs1 (last_stmt);
2674 then_clause = gimple_assign_rhs2 (last_stmt);
2675 else_clause = gimple_assign_rhs3 (last_stmt);
2676
87aab9b2
JJ
2677 if (!COMPARISON_CLASS_P (cond_expr))
2678 return NULL;
2679
bc4fb355
IR
2680 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2681 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 2682 if (comp_vectype == NULL_TREE)
69d2aade
JJ
2683 return NULL;
2684
2685 type = gimple_expr_type (last_stmt);
bc4fb355
IR
2686 if (types_compatible_p (type, comp_scalar_type)
2687 || ((TREE_CODE (then_clause) != INTEGER_CST
2688 || TREE_CODE (else_clause) != INTEGER_CST)
2689 && !INTEGRAL_TYPE_P (comp_scalar_type))
2690 || !INTEGRAL_TYPE_P (type))
2691 return NULL;
2692
2693 if ((TREE_CODE (then_clause) != INTEGER_CST
2694 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2695 &def_stmt0, &promotion))
2696 || (TREE_CODE (else_clause) != INTEGER_CST
2697 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2698 &def_stmt1, &promotion)))
2699 return NULL;
2700
2701 if (orig_type0 && orig_type1
2702 && !types_compatible_p (orig_type0, orig_type1))
2703 return NULL;
2704
2705 if (orig_type0)
2706 {
2707 if (!types_compatible_p (orig_type0, comp_scalar_type))
2708 return NULL;
2709 then_clause = gimple_assign_rhs1 (def_stmt0);
2710 itype = orig_type0;
2711 }
2712
2713 if (orig_type1)
2714 {
2715 if (!types_compatible_p (orig_type1, comp_scalar_type))
2716 return NULL;
2717 else_clause = gimple_assign_rhs1 (def_stmt1);
2718 itype = orig_type1;
2719 }
2720
69d2aade
JJ
2721 cmpmode = GET_MODE_INNER (TYPE_MODE (comp_vectype));
2722
2723 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == GET_MODE_BITSIZE (cmpmode))
2724 return NULL;
2725
2726 vectype = get_vectype_for_scalar_type (type);
2727 if (vectype == NULL_TREE)
2728 return NULL;
2729
2730 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2731 return NULL;
2732
bc4fb355
IR
2733 if (itype == NULL_TREE)
2734 itype = build_nonstandard_integer_type (GET_MODE_BITSIZE (cmpmode),
2735 TYPE_UNSIGNED (type));
2736
69d2aade
JJ
2737 if (itype == NULL_TREE
2738 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != GET_MODE_BITSIZE (cmpmode))
2739 return NULL;
2740
2741 vecitype = get_vectype_for_scalar_type (itype);
2742 if (vecitype == NULL_TREE)
2743 return NULL;
2744
2745 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2746 return NULL;
2747
2748 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > GET_MODE_BITSIZE (cmpmode))
2749 {
bc4fb355
IR
2750 if ((TREE_CODE (then_clause) == INTEGER_CST
2751 && !int_fits_type_p (then_clause, itype))
2752 || (TREE_CODE (else_clause) == INTEGER_CST
2753 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
2754 return NULL;
2755 }
2756
0d0e4a03
JJ
2757 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2758 COND_EXPR, unshare_expr (cond_expr),
2759 fold_convert (itype, then_clause),
2760 fold_convert (itype, else_clause));
2761 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2762 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 2763
083481d8 2764 new_pattern_def_seq (stmt_vinfo, def_stmt);
f5709183 2765 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo, bb_vinfo);
69d2aade
JJ
2766 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2767 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2768 *type_in = vecitype;
2769 *type_out = vectype;
2770
73fbfcad 2771 if (dump_enabled_p ())
ccb3ad87 2772 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2773 "vect_recog_mixed_size_cond_pattern: detected:\n");
f5709183 2774
69d2aade
JJ
2775 return pattern_stmt;
2776}
2777
2778
71c92d17
JJ
2779/* Helper function of vect_recog_bool_pattern. Called recursively, return
2780 true if bool VAR can be optimized that way. */
2781
2782static bool
f5709183 2783check_bool_pattern (tree var, loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
71c92d17
JJ
2784{
2785 gimple def_stmt;
2786 enum vect_def_type dt;
2787 tree def, rhs1;
2788 enum tree_code rhs_code;
2789
f5709183
IR
2790 if (!vect_is_simple_use (var, NULL, loop_vinfo, bb_vinfo, &def_stmt, &def,
2791 &dt))
71c92d17
JJ
2792 return false;
2793
2794 if (dt != vect_internal_def)
2795 return false;
2796
2797 if (!is_gimple_assign (def_stmt))
2798 return false;
2799
2800 if (!has_single_use (def))
2801 return false;
2802
2803 rhs1 = gimple_assign_rhs1 (def_stmt);
2804 rhs_code = gimple_assign_rhs_code (def_stmt);
2805 switch (rhs_code)
2806 {
2807 case SSA_NAME:
f5709183 2808 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2809
2810 CASE_CONVERT:
2811 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2812 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2813 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2814 return false;
f5709183 2815 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2816
2817 case BIT_NOT_EXPR:
f5709183 2818 return check_bool_pattern (rhs1, loop_vinfo, bb_vinfo);
71c92d17
JJ
2819
2820 case BIT_AND_EXPR:
2821 case BIT_IOR_EXPR:
2822 case BIT_XOR_EXPR:
f5709183 2823 if (!check_bool_pattern (rhs1, loop_vinfo, bb_vinfo))
71c92d17 2824 return false;
f5709183
IR
2825 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo,
2826 bb_vinfo);
71c92d17
JJ
2827
2828 default:
2829 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2830 {
2831 tree vecitype, comp_vectype;
2832
2f326699
JJ
2833 /* If the comparison can throw, then is_gimple_condexpr will be
2834 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2835 if (stmt_could_throw_p (def_stmt))
2836 return false;
2837
71c92d17
JJ
2838 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2839 if (comp_vectype == NULL_TREE)
2840 return false;
2841
2842 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2843 {
ef4bddc2 2844 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 2845 tree itype
ab0ef706 2846 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2847 vecitype = get_vectype_for_scalar_type (itype);
2848 if (vecitype == NULL_TREE)
2849 return false;
2850 }
2851 else
2852 vecitype = comp_vectype;
2853 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2854 }
2855 return false;
2856 }
2857}
2858
2859
2860/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2861 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
363477c0 2862 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
71c92d17
JJ
2863
2864static tree
2865adjust_bool_pattern_cast (tree type, tree var)
2866{
2867 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
2868 gimple cast_stmt, pattern_stmt;
2869
363477c0 2870 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
71c92d17 2871 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
083481d8 2872 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
0d0e4a03
JJ
2873 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2874 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
71c92d17
JJ
2875 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2876 return gimple_assign_lhs (cast_stmt);
2877}
2878
2879
2880/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2881 recursively. VAR is an SSA_NAME that should be transformed from bool
2882 to a wider integer type, OUT_TYPE is the desired final integer type of
2883 the whole pattern, TRUEVAL should be NULL unless optimizing
2884 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2885 in the then_clause, STMTS is where statements with added pattern stmts
2886 should be pushed to. */
2887
2888static tree
2889adjust_bool_pattern (tree var, tree out_type, tree trueval,
9771b263 2890 vec<gimple> *stmts)
71c92d17
JJ
2891{
2892 gimple stmt = SSA_NAME_DEF_STMT (var);
2893 enum tree_code rhs_code, def_rhs_code;
2894 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2895 location_t loc;
2896 gimple pattern_stmt, def_stmt;
2897
2898 rhs1 = gimple_assign_rhs1 (stmt);
2899 rhs2 = gimple_assign_rhs2 (stmt);
2900 rhs_code = gimple_assign_rhs_code (stmt);
2901 loc = gimple_location (stmt);
2902 switch (rhs_code)
2903 {
2904 case SSA_NAME:
2905 CASE_CONVERT:
2906 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2907 itype = TREE_TYPE (irhs1);
2908 pattern_stmt
0d0e4a03
JJ
2909 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2910 SSA_NAME, irhs1);
71c92d17
JJ
2911 break;
2912
2913 case BIT_NOT_EXPR:
2914 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2915 itype = TREE_TYPE (irhs1);
2916 pattern_stmt
0d0e4a03
JJ
2917 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2918 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
2919 break;
2920
2921 case BIT_AND_EXPR:
2922 /* Try to optimize x = y & (a < b ? 1 : 0); into
2923 x = (a < b ? y : 0);
2924
2925 E.g. for:
2926 bool a_b, b_b, c_b;
2927 TYPE d_T;
2928
2929 S1 a_b = x1 CMP1 y1;
2930 S2 b_b = x2 CMP2 y2;
2931 S3 c_b = a_b & b_b;
2932 S4 d_T = (TYPE) c_b;
2933
2934 we would normally emit:
2935
2936 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2937 S2' b_T = x2 CMP2 y2 ? 1 : 0;
2938 S3' c_T = a_T & b_T;
2939 S4' d_T = c_T;
2940
2941 but we can save one stmt by using the
2942 result of one of the COND_EXPRs in the other COND_EXPR and leave
2943 BIT_AND_EXPR stmt out:
2944
2945 S1' a_T = x1 CMP1 y1 ? 1 : 0;
2946 S3' c_T = x2 CMP2 y2 ? a_T : 0;
2947 S4' f_T = c_T;
2948
2949 At least when VEC_COND_EXPR is implemented using masks
2950 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
2951 computes the comparison masks and ands it, in one case with
2952 all ones vector, in the other case with a vector register.
2953 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
2954 often more expensive. */
2955 def_stmt = SSA_NAME_DEF_STMT (rhs2);
2956 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2957 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2958 {
2959 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2960 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2961 if (TYPE_PRECISION (TREE_TYPE (irhs1))
2962 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2963 {
2964 gimple tstmt;
2965 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2966 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
9771b263 2967 tstmt = stmts->pop ();
71c92d17 2968 gcc_assert (tstmt == def_stmt);
9771b263 2969 stmts->quick_push (stmt);
71c92d17
JJ
2970 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2971 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2972 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2973 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2974 return irhs2;
2975 }
2976 else
2977 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2978 goto and_ior_xor;
2979 }
2980 def_stmt = SSA_NAME_DEF_STMT (rhs1);
2981 def_rhs_code = gimple_assign_rhs_code (def_stmt);
2982 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
2983 {
2984 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
2985 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
2986 if (TYPE_PRECISION (TREE_TYPE (irhs2))
2987 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
2988 {
2989 gimple tstmt;
2990 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
2991 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
9771b263 2992 tstmt = stmts->pop ();
71c92d17 2993 gcc_assert (tstmt == def_stmt);
9771b263 2994 stmts->quick_push (stmt);
71c92d17
JJ
2995 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
2996 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 2997 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
2998 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
2999 return irhs1;
3000 }
3001 else
3002 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3003 goto and_ior_xor;
3004 }
3005 /* FALLTHRU */
3006 case BIT_IOR_EXPR:
3007 case BIT_XOR_EXPR:
3008 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3009 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3010 and_ior_xor:
3011 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3012 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3013 {
3014 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3015 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3016 int out_prec = TYPE_PRECISION (out_type);
3017 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3018 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3019 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3020 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3021 else
3022 {
3023 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3024 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3025 }
3026 }
3027 itype = TREE_TYPE (irhs1);
3028 pattern_stmt
0d0e4a03
JJ
3029 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3030 rhs_code, irhs1, irhs2);
71c92d17
JJ
3031 break;
3032
3033 default:
3034 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3035 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2
JJ
3036 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3037 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3038 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3039 {
ef4bddc2 3040 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3041 itype
ab0ef706 3042 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3043 }
3044 else
3045 itype = TREE_TYPE (rhs1);
3046 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3047 if (trueval == NULL_TREE)
3048 trueval = build_int_cst (itype, 1);
3049 else
3050 gcc_checking_assert (useless_type_conversion_p (itype,
3051 TREE_TYPE (trueval)));
3052 pattern_stmt
0d0e4a03
JJ
3053 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3054 COND_EXPR, cond_expr, trueval,
3055 build_int_cst (itype, 0));
71c92d17
JJ
3056 break;
3057 }
3058
9771b263 3059 stmts->safe_push (stmt);
71c92d17
JJ
3060 gimple_set_location (pattern_stmt, loc);
3061 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3062 return gimple_assign_lhs (pattern_stmt);
3063}
3064
3065
3066/* Function vect_recog_bool_pattern
3067
3068 Try to find pattern like following:
3069
3070 bool a_b, b_b, c_b, d_b, e_b;
3071 TYPE f_T;
3072 loop:
3073 S1 a_b = x1 CMP1 y1;
3074 S2 b_b = x2 CMP2 y2;
3075 S3 c_b = a_b & b_b;
3076 S4 d_b = x3 CMP3 y3;
3077 S5 e_b = c_b | d_b;
3078 S6 f_T = (TYPE) e_b;
3079
52264dbf
RB
3080 where type 'TYPE' is an integral type. Or a similar pattern
3081 ending in
3082
3083 S6 f_Y = e_b ? r_Y : s_Y;
3084
3085 as results from if-conversion of a complex condition.
71c92d17
JJ
3086
3087 Input:
3088
3089 * LAST_STMT: A stmt at the end from which the pattern
3090 search begins, i.e. cast of a bool to
3091 an integer type.
3092
3093 Output:
3094
3095 * TYPE_IN: The type of the input arguments to the pattern.
3096
3097 * TYPE_OUT: The type of the output of this pattern.
3098
3099 * Return value: A new stmt that will be used to replace the pattern.
3100
3101 Assuming size of TYPE is the same as size of all comparisons
3102 (otherwise some casts would be added where needed), the above
3103 sequence we create related pattern stmts:
3104 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3105 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3106 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3107 S5' e_T = c_T | d_T;
3108 S6' f_T = e_T;
3109
3110 Instead of the above S3' we could emit:
3111 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3112 S3' c_T = a_T | b_T;
3113 but the above is more efficient. */
3114
3115static gimple
9771b263 3116vect_recog_bool_pattern (vec<gimple> *stmts, tree *type_in,
71c92d17
JJ
3117 tree *type_out)
3118{
9771b263 3119 gimple last_stmt = stmts->pop ();
71c92d17
JJ
3120 enum tree_code rhs_code;
3121 tree var, lhs, rhs, vectype;
3122 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3123 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 3124 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
71c92d17
JJ
3125 gimple pattern_stmt;
3126
3127 if (!is_gimple_assign (last_stmt))
3128 return NULL;
3129
3130 var = gimple_assign_rhs1 (last_stmt);
3131 lhs = gimple_assign_lhs (last_stmt);
3132
3133 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3134 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3135 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3136 return NULL;
3137
3138 rhs_code = gimple_assign_rhs_code (last_stmt);
3139 if (CONVERT_EXPR_CODE_P (rhs_code))
3140 {
78048b1c
JJ
3141 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3142 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
3143 return NULL;
3144 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3145 if (vectype == NULL_TREE)
3146 return NULL;
3147
f5709183 3148 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
71c92d17
JJ
3149 return NULL;
3150
3151 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3152 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3153 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
0d0e4a03 3154 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
71c92d17
JJ
3155 else
3156 pattern_stmt
0d0e4a03 3157 = gimple_build_assign (lhs, NOP_EXPR, rhs);
71c92d17
JJ
3158 *type_out = vectype;
3159 *type_in = vectype;
9771b263 3160 stmts->safe_push (last_stmt);
73fbfcad 3161 if (dump_enabled_p ())
ccb3ad87 3162 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3163 "vect_recog_bool_pattern: detected:\n");
f5709183 3164
52264dbf
RB
3165 return pattern_stmt;
3166 }
3167 else if (rhs_code == COND_EXPR
3168 && TREE_CODE (var) == SSA_NAME)
3169 {
3170 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3171 if (vectype == NULL_TREE)
3172 return NULL;
3173
3174 /* Build a scalar type for the boolean result that when
3175 vectorized matches the vector type of the result in
3176 size and number of elements. */
3177 unsigned prec
3178 = wi::udiv_trunc (TYPE_SIZE (vectype),
3179 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3180 tree type
3181 = build_nonstandard_integer_type (prec,
3182 TYPE_UNSIGNED (TREE_TYPE (var)));
3183 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3184 return NULL;
3185
3186 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
3187 return NULL;
3188
3189 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
3190 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3191 pattern_stmt
0d0e4a03
JJ
3192 = gimple_build_assign (lhs, COND_EXPR,
3193 build2 (NE_EXPR, boolean_type_node,
3194 rhs, build_int_cst (type, 0)),
3195 gimple_assign_rhs2 (last_stmt),
3196 gimple_assign_rhs3 (last_stmt));
52264dbf
RB
3197 *type_out = vectype;
3198 *type_in = vectype;
3199 stmts->safe_push (last_stmt);
3200 if (dump_enabled_p ())
3201 dump_printf_loc (MSG_NOTE, vect_location,
3202 "vect_recog_bool_pattern: detected:\n");
3203
71c92d17
JJ
3204 return pattern_stmt;
3205 }
ab0ef706
JJ
3206 else if (rhs_code == SSA_NAME
3207 && STMT_VINFO_DATA_REF (stmt_vinfo))
3208 {
3209 stmt_vec_info pattern_stmt_info;
3210 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3211 gcc_assert (vectype != NULL_TREE);
78336739
JJ
3212 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3213 return NULL;
f5709183 3214 if (!check_bool_pattern (var, loop_vinfo, bb_vinfo))
ab0ef706
JJ
3215 return NULL;
3216
3217 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype), NULL_TREE, stmts);
3218 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3219 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3220 {
3221 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
0d0e4a03 3222 gimple cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
083481d8 3223 new_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
3224 rhs = rhs2;
3225 }
0d0e4a03 3226 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
f5709183
IR
3227 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3228 bb_vinfo);
ab0ef706
JJ
3229 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3230 STMT_VINFO_DATA_REF (pattern_stmt_info)
3231 = STMT_VINFO_DATA_REF (stmt_vinfo);
3232 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3233 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3234 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3235 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3236 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3237 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3238 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3239 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
78048b1c 3240 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
ab0ef706
JJ
3241 *type_out = vectype;
3242 *type_in = vectype;
9771b263 3243 stmts->safe_push (last_stmt);
73fbfcad 3244 if (dump_enabled_p ())
ccb3ad87 3245 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3246 "vect_recog_bool_pattern: detected:\n");
ab0ef706
JJ
3247 return pattern_stmt;
3248 }
71c92d17
JJ
3249 else
3250 return NULL;
3251}
3252
3253
1107f3ae
IR
3254/* Mark statements that are involved in a pattern. */
3255
3256static inline void
3257vect_mark_pattern_stmts (gimple orig_stmt, gimple pattern_stmt,
3258 tree pattern_vectype)
3259{
3260 stmt_vec_info pattern_stmt_info, def_stmt_info;
3261 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
3262 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (orig_stmt_info);
f5709183 3263 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (orig_stmt_info);
1107f3ae
IR
3264 gimple def_stmt;
3265
1107f3ae 3266 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
ab0ef706
JJ
3267 if (pattern_stmt_info == NULL)
3268 {
f5709183
IR
3269 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, loop_vinfo,
3270 bb_vinfo);
ab0ef706
JJ
3271 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3272 }
3273 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1107f3ae
IR
3274
3275 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3276 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
ab0ef706 3277 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1107f3ae
IR
3278 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3279 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3280 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
363477c0
JJ
3281 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3282 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3283 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
1107f3ae 3284 {
363477c0
JJ
3285 gimple_stmt_iterator si;
3286 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3287 !gsi_end_p (si); gsi_next (&si))
69d2aade 3288 {
363477c0
JJ
3289 def_stmt = gsi_stmt (si);
3290 def_stmt_info = vinfo_for_stmt (def_stmt);
3291 if (def_stmt_info == NULL)
3292 {
f5709183
IR
3293 def_stmt_info = new_stmt_vec_info (def_stmt, loop_vinfo,
3294 bb_vinfo);
363477c0
JJ
3295 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3296 }
3297 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3298 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
56f8faae 3299 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
363477c0
JJ
3300 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3301 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
69d2aade 3302 }
1107f3ae
IR
3303 }
3304}
3305
b8698a0f 3306/* Function vect_pattern_recog_1
20f06221
DN
3307
3308 Input:
3309 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3310 computation pattern.
3311 STMT: A stmt from which the pattern search should start.
3312
3313 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
3314 expression that computes the same functionality and can be used to
3315 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
3316
3317 Output:
b8698a0f
L
3318 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3319 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3320 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
3321 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3322 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3323 to the available target pattern.
3324
b8698a0f 3325 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
3326 for vect_recog_pattern. */
3327
3328static void
92aea285
JJ
3329vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3330 gimple_stmt_iterator si,
9771b263 3331 vec<gimple> *stmts_to_replace)
20f06221 3332{
726a989a 3333 gimple stmt = gsi_stmt (si), pattern_stmt;
383d9c83 3334 stmt_vec_info stmt_info;
383d9c83 3335 loop_vec_info loop_vinfo;
20f06221
DN
3336 tree pattern_vectype;
3337 tree type_in, type_out;
20f06221 3338 enum tree_code code;
b5aeb3bb
IR
3339 int i;
3340 gimple next;
20f06221 3341
9771b263
DN
3342 stmts_to_replace->truncate (0);
3343 stmts_to_replace->quick_push (stmt);
d1fc143d 3344 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
726a989a 3345 if (!pattern_stmt)
b8698a0f
L
3346 return;
3347
9771b263 3348 stmt = stmts_to_replace->last ();
383d9c83
IR
3349 stmt_info = vinfo_for_stmt (stmt);
3350 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3351
b8698a0f
L
3352 if (VECTOR_MODE_P (TYPE_MODE (type_in)))
3353 {
3354 /* No need to check target support (already checked by the pattern
3355 recognition function). */
b690cc0f 3356 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
3357 }
3358 else
3359 {
ef4bddc2 3360 machine_mode vec_mode;
20f06221
DN
3361 enum insn_code icode;
3362 optab optab;
3363
3364 /* Check target support */
b690cc0f
RG
3365 type_in = get_vectype_for_scalar_type (type_in);
3366 if (!type_in)
3367 return;
3368 if (type_out)
3369 type_out = get_vectype_for_scalar_type (type_out);
3370 else
3371 type_out = type_in;
15bbc165
AO
3372 if (!type_out)
3373 return;
b690cc0f 3374 pattern_vectype = type_out;
03d3e953 3375
726a989a
RB
3376 if (is_gimple_assign (pattern_stmt))
3377 code = gimple_assign_rhs_code (pattern_stmt);
3378 else
3379 {
3380 gcc_assert (is_gimple_call (pattern_stmt));
3381 code = CALL_EXPR;
3382 }
3383
b690cc0f
RG
3384 optab = optab_for_tree_code (code, type_in, optab_default);
3385 vec_mode = TYPE_MODE (type_in);
20f06221 3386 if (!optab
947131ba 3387 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 3388 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
20f06221
DN
3389 return;
3390 }
3391
3392 /* Found a vectorizable pattern. */
73fbfcad 3393 if (dump_enabled_p ())
20f06221 3394 {
ccb3ad87 3395 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3396 "pattern recognized: ");
ccb3ad87 3397 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
20f06221 3398 }
b8698a0f 3399
726a989a 3400 /* Mark the stmts that are involved in the pattern. */
1107f3ae 3401 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 3402
b5aeb3bb
IR
3403 /* Patterns cannot be vectorized using SLP, because they change the order of
3404 computation. */
f5709183 3405 if (loop_vinfo)
9771b263 3406 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
f5709183 3407 if (next == stmt)
9771b263 3408 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
51312233 3409
1107f3ae
IR
3410 /* It is possible that additional pattern stmts are created and inserted in
3411 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3412 relevant statements. */
9771b263
DN
3413 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3414 && (unsigned) i < (stmts_to_replace->length () - 1);
51312233
IR
3415 i++)
3416 {
3417 stmt_info = vinfo_for_stmt (stmt);
3418 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
73fbfcad 3419 if (dump_enabled_p ())
51312233 3420 {
ccb3ad87 3421 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3422 "additional pattern stmt: ");
ccb3ad87 3423 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
51312233
IR
3424 }
3425
1107f3ae 3426 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
51312233 3427 }
20f06221
DN
3428}
3429
3430
3431/* Function vect_pattern_recog
3432
3433 Input:
3434 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3435 computation idioms.
3436
9d5e7640
IR
3437 Output - for each computation idiom that is detected we create a new stmt
3438 that provides the same functionality and that can be vectorized. We
20f06221
DN
3439 also record some information in the struct_stmt_info of the relevant
3440 stmts, as explained below:
3441
3442 At the entry to this function we have the following stmts, with the
3443 following initial value in the STMT_VINFO fields:
3444
3445 stmt in_pattern_p related_stmt vec_stmt
3446 S1: a_i = .... - - -
3447 S2: a_2 = ..use(a_i).. - - -
3448 S3: a_1 = ..use(a_2).. - - -
3449 S4: a_0 = ..use(a_1).. - - -
3450 S5: ... = ..use(a_0).. - - -
3451
3452 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
3453 represented by a single stmt. We then:
3454 - create a new stmt S6 equivalent to the pattern (the stmt is not
3455 inserted into the code)
20f06221
DN
3456 - fill in the STMT_VINFO fields as follows:
3457
3458 in_pattern_p related_stmt vec_stmt
b8698a0f 3459 S1: a_i = .... - - -
20f06221
DN
3460 S2: a_2 = ..use(a_i).. - - -
3461 S3: a_1 = ..use(a_2).. - - -
20f06221 3462 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 3463 '---> S6: a_new = .... - S4 -
20f06221
DN
3464 S5: ... = ..use(a_0).. - - -
3465
3466 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 3467 to each other through the RELATED_STMT field).
20f06221
DN
3468
3469 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3470 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3471 remain irrelevant unless used by stmts other than S4.
3472
3473 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 3474 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
3475 a pointer to the new vector stmt VS6 from S6 (as usual).
3476 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
3477
3478 in_pattern_p related_stmt vec_stmt
3479 S1: a_i = .... - - -
3480 S2: a_2 = ..use(a_i).. - - -
3481 S3: a_1 = ..use(a_2).. - - -
3482 > VS6: va_new = .... - - -
20f06221 3483 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 3484 '---> S6: a_new = .... - S4 VS6
20f06221
DN
3485 > VS5: ... = ..vuse(va_new).. - - -
3486 S5: ... = ..use(a_0).. - - -
3487
9d5e7640 3488 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
3489 elsewhere), and we'll end up with:
3490
b8698a0f 3491 VS6: va_new = ....
83197f37
IR
3492 VS5: ... = ..vuse(va_new)..
3493
3494 In case of more than one pattern statements, e.g., widen-mult with
3495 intermediate type:
3496
3497 S1 a_t = ;
3498 S2 a_T = (TYPE) a_t;
3499 '--> S3: a_it = (interm_type) a_t;
3500 S4 prod_T = a_T * CONST;
3501 '--> S5: prod_T' = a_it w* CONST;
3502
3503 there may be other users of a_T outside the pattern. In that case S2 will
3504 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3505 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3506 be recorded in S3. */
20f06221
DN
3507
3508void
f5709183 3509vect_pattern_recog (loop_vec_info loop_vinfo, bb_vec_info bb_vinfo)
20f06221 3510{
f5709183 3511 struct loop *loop;
772e61e1 3512 basic_block *bbs;
f5709183 3513 unsigned int nbbs;
726a989a 3514 gimple_stmt_iterator si;
20f06221 3515 unsigned int i, j;
92aea285 3516 vect_recog_func_ptr vect_recog_func;
00f96dc9 3517 auto_vec<gimple, 1> stmts_to_replace;
f5709183 3518 gimple stmt;
20f06221 3519
73fbfcad 3520 if (dump_enabled_p ())
78c60e3d 3521 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3522 "=== vect_pattern_recog ===\n");
20f06221 3523
f5709183
IR
3524 if (loop_vinfo)
3525 {
3526 loop = LOOP_VINFO_LOOP (loop_vinfo);
3527 bbs = LOOP_VINFO_BBS (loop_vinfo);
3528 nbbs = loop->num_nodes;
3529 }
3530 else
3531 {
772e61e1 3532 bbs = &BB_VINFO_BB (bb_vinfo);
f5709183 3533 nbbs = 1;
f5709183
IR
3534 }
3535
20f06221
DN
3536 /* Scan through the loop stmts, applying the pattern recognition
3537 functions starting at each stmt visited: */
3538 for (i = 0; i < nbbs; i++)
3539 {
3540 basic_block bb = bbs[i];
726a989a 3541 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
20f06221 3542 {
f5709183
IR
3543 if (bb_vinfo && (stmt = gsi_stmt (si))
3544 && vinfo_for_stmt (stmt)
3545 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
3546 continue;
3547
20f06221
DN
3548 /* Scan over all generic vect_recog_xxx_pattern functions. */
3549 for (j = 0; j < NUM_PATTERNS; j++)
3550 {
92aea285
JJ
3551 vect_recog_func = vect_vect_recog_func_ptrs[j];
3552 vect_pattern_recog_1 (vect_recog_func, si,
d1fc143d 3553 &stmts_to_replace);
20f06221
DN
3554 }
3555 }
3556 }
3557}