]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-vect-patterns.c
Remove dead macros
[thirdparty/gcc.git] / gcc / tree-vect-patterns.c
CommitLineData
20f06221 1/* Analysis Utilities for Loop Vectorization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
20f06221
DN
3 Contributed by Dorit Nuzman <dorit@il.ibm.com>
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it under
8the terms of the GNU General Public License as published by the Free
9dcd6f09 9Software Foundation; either version 3, or (at your option) any later
20f06221
DN
10version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
20f06221
DN
20
21#include "config.h"
22#include "system.h"
23#include "coretypes.h"
c7131fb2 24#include "backend.h"
957060b5 25#include "rtl.h"
20f06221 26#include "tree.h"
c7131fb2 27#include "gimple.h"
c7131fb2 28#include "ssa.h"
957060b5
AM
29#include "expmed.h"
30#include "optabs-tree.h"
31#include "insn-config.h"
32#include "recog.h" /* FIXME: for insn_data */
40e23961 33#include "fold-const.h"
d8a2d370 34#include "stor-layout.h"
2fb9a547 35#include "tree-eh.h"
45b0be94 36#include "gimplify.h"
5be5c238 37#include "gimple-iterator.h"
20f06221 38#include "cfgloop.h"
20f06221 39#include "tree-vectorizer.h"
7ee2468b 40#include "dumpfile.h"
9b2b7279 41#include "builtins.h"
7a31e5ef 42#include "case-cfn-macros.h"
20f06221 43
20f06221 44/* Pattern recognition functions */
355fe088 45static gimple *vect_recog_widen_sum_pattern (vec<gimple *> *, tree *,
51312233 46 tree *);
355fe088 47static gimple *vect_recog_widen_mult_pattern (vec<gimple *> *, tree *,
51312233 48 tree *);
355fe088 49static gimple *vect_recog_dot_prod_pattern (vec<gimple *> *, tree *,
51312233 50 tree *);
355fe088 51static gimple *vect_recog_sad_pattern (vec<gimple *> *, tree *,
79d652a5 52 tree *);
355fe088
TS
53static gimple *vect_recog_pow_pattern (vec<gimple *> *, tree *, tree *);
54static gimple *vect_recog_over_widening_pattern (vec<gimple *> *, tree *,
1107f3ae 55 tree *);
355fe088 56static gimple *vect_recog_widen_shift_pattern (vec<gimple *> *,
36ba4aae 57 tree *, tree *);
355fe088
TS
58static gimple *vect_recog_rotate_pattern (vec<gimple *> *, tree *, tree *);
59static gimple *vect_recog_vector_vector_shift_pattern (vec<gimple *> *,
732a0ad3 60 tree *, tree *);
355fe088 61static gimple *vect_recog_divmod_pattern (vec<gimple *> *,
079c527f 62 tree *, tree *);
47486460 63
355fe088 64static gimple *vect_recog_mult_pattern (vec<gimple *> *,
47486460
VK
65 tree *, tree *);
66
355fe088 67static gimple *vect_recog_mixed_size_cond_pattern (vec<gimple *> *,
69d2aade 68 tree *, tree *);
355fe088 69static gimple *vect_recog_bool_pattern (vec<gimple *> *, tree *, tree *);
e6f5c25d 70static gimple *vect_recog_mask_conversion_pattern (vec<gimple *> *, tree *, tree *);
20f06221
DN
71static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
72 vect_recog_widen_mult_pattern,
73 vect_recog_widen_sum_pattern,
0b2229b0 74 vect_recog_dot_prod_pattern,
79d652a5 75 vect_recog_sad_pattern,
1107f3ae 76 vect_recog_pow_pattern,
36ba4aae 77 vect_recog_widen_shift_pattern,
33018845 78 vect_recog_over_widening_pattern,
7e9a3abb 79 vect_recog_rotate_pattern,
732a0ad3 80 vect_recog_vector_vector_shift_pattern,
079c527f 81 vect_recog_divmod_pattern,
47486460 82 vect_recog_mult_pattern,
71c92d17 83 vect_recog_mixed_size_cond_pattern,
e6f5c25d
IE
84 vect_recog_bool_pattern,
85 vect_recog_mask_conversion_pattern};
20f06221 86
083481d8 87static inline void
355fe088 88append_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
083481d8 89{
a1a6c5b2
JJ
90 gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
91 stmt);
083481d8
JJ
92}
93
94static inline void
355fe088 95new_pattern_def_seq (stmt_vec_info stmt_info, gimple *stmt)
083481d8
JJ
96{
97 STMT_VINFO_PATTERN_DEF_SEQ (stmt_info) = NULL;
98 append_pattern_def_seq (stmt_info, stmt);
99}
100
f71cf56a
UW
101/* Check whether STMT2 is in the same loop or basic block as STMT1.
102 Which of the two applies depends on whether we're currently doing
103 loop-based or basic-block-based vectorization, as determined by
104 the vinfo_for_stmt for STMT1 (which must be defined).
105
106 If this returns true, vinfo_for_stmt for STMT2 is guaranteed
107 to be defined as well. */
108
109static bool
355fe088 110vect_same_loop_or_bb_p (gimple *stmt1, gimple *stmt2)
f71cf56a
UW
111{
112 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt1);
61d371eb 113 return vect_stmt_in_region_p (stmt_vinfo->vinfo, stmt2);
f71cf56a
UW
114}
115
9a7a4398
UW
116/* If the LHS of DEF_STMT has a single use, and that statement is
117 in the same loop or basic block, return it. */
118
355fe088
TS
119static gimple *
120vect_single_imm_use (gimple *def_stmt)
9a7a4398
UW
121{
122 tree lhs = gimple_assign_lhs (def_stmt);
123 use_operand_p use_p;
355fe088 124 gimple *use_stmt;
9a7a4398
UW
125
126 if (!single_imm_use (lhs, &use_p, &use_stmt))
127 return NULL;
128
129 if (!vect_same_loop_or_bb_p (def_stmt, use_stmt))
130 return NULL;
131
132 return use_stmt;
133}
134
bc4fb355 135/* Check whether NAME, an ssa-name used in USE_STMT,
79d652a5 136 is a result of a type promotion, such that:
20f06221 137 DEF_STMT: NAME = NOP (name0)
383d9c83
IR
138 If CHECK_SIGN is TRUE, check that either both types are signed or both are
139 unsigned. */
20f06221
DN
140
141static bool
355fe088
TS
142type_conversion_p (tree name, gimple *use_stmt, bool check_sign,
143 tree *orig_type, gimple **def_stmt, bool *promotion)
20f06221 144{
355fe088 145 gimple *dummy_gimple;
20f06221 146 stmt_vec_info stmt_vinfo;
20f06221
DN
147 tree type = TREE_TYPE (name);
148 tree oprnd0;
149 enum vect_def_type dt;
20f06221
DN
150
151 stmt_vinfo = vinfo_for_stmt (use_stmt);
81c40241 152 if (!vect_is_simple_use (name, stmt_vinfo->vinfo, def_stmt, &dt))
20f06221
DN
153 return false;
154
8644a673
IR
155 if (dt != vect_internal_def
156 && dt != vect_external_def && dt != vect_constant_def)
20f06221
DN
157 return false;
158
bc4fb355 159 if (!*def_stmt)
20f06221
DN
160 return false;
161
726a989a 162 if (!is_gimple_assign (*def_stmt))
20f06221
DN
163 return false;
164
bc4fb355 165 if (!CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (*def_stmt)))
20f06221
DN
166 return false;
167
726a989a 168 oprnd0 = gimple_assign_rhs1 (*def_stmt);
20f06221 169
bc4fb355
IR
170 *orig_type = TREE_TYPE (oprnd0);
171 if (!INTEGRAL_TYPE_P (type) || !INTEGRAL_TYPE_P (*orig_type)
172 || ((TYPE_UNSIGNED (type) != TYPE_UNSIGNED (*orig_type)) && check_sign))
173 return false;
174
175 if (TYPE_PRECISION (type) >= (TYPE_PRECISION (*orig_type) * 2))
176 *promotion = true;
bc4fb355 177 else
79d652a5 178 *promotion = false;
20f06221 179
81c40241 180 if (!vect_is_simple_use (oprnd0, stmt_vinfo->vinfo, &dummy_gimple, &dt))
20f06221
DN
181 return false;
182
20f06221
DN
183 return true;
184}
185
726a989a
RB
186/* Helper to return a new temporary for pattern of TYPE for STMT. If STMT
187 is NULL, the caller must set SSA_NAME_DEF_STMT for the returned SSA var. */
188
189static tree
355fe088 190vect_recog_temp_ssa_var (tree type, gimple *stmt)
726a989a 191{
83d5977e 192 return make_temp_ssa_name (type, stmt, "patt");
726a989a 193}
20f06221
DN
194
195/* Function vect_recog_dot_prod_pattern
196
197 Try to find the following pattern:
198
199 type x_t, y_t;
200 TYPE1 prod;
201 TYPE2 sum = init;
202 loop:
203 sum_0 = phi <init, sum_1>
204 S1 x_t = ...
205 S2 y_t = ...
206 S3 x_T = (TYPE1) x_t;
207 S4 y_T = (TYPE1) y_t;
208 S5 prod = x_T * y_T;
209 [S6 prod = (TYPE2) prod; #optional]
210 S7 sum_1 = prod + sum_0;
211
b8698a0f
L
212 where 'TYPE1' is exactly double the size of type 'type', and 'TYPE2' is the
213 same size of 'TYPE1' or bigger. This is a special case of a reduction
20f06221 214 computation.
b8698a0f 215
20f06221
DN
216 Input:
217
51312233
IR
218 * STMTS: Contains a stmt from which the pattern search begins. In the
219 example, when this function is called with S7, the pattern {S3,S4,S5,S6,S7}
220 will be detected.
20f06221
DN
221
222 Output:
223
224 * TYPE_IN: The type of the input arguments to the pattern.
225
226 * TYPE_OUT: The type of the output of this pattern.
227
228 * Return value: A new stmt that will be used to replace the sequence of
229 stmts that constitute the pattern. In this case it will be:
230 WIDEN_DOT_PRODUCT <x_t, y_t, sum_0>
d29de1bf
DN
231
232 Note: The dot-prod idiom is a widening reduction pattern that is
233 vectorized without preserving all the intermediate results. It
234 produces only N/2 (widened) results (by summing up pairs of
235 intermediate results) rather than all N results. Therefore, we
236 cannot allow this pattern when we want to get all the results and in
237 the correct order (as is the case when this computation is in an
238 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 239
355fe088
TS
240static gimple *
241vect_recog_dot_prod_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 242 tree *type_out)
20f06221 243{
355fe088 244 gimple *stmt, *last_stmt = (*stmts)[0];
20f06221
DN
245 tree oprnd0, oprnd1;
246 tree oprnd00, oprnd01;
51312233 247 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 248 tree type, half_type;
355fe088 249 gimple *pattern_stmt;
20f06221 250 tree prod_type;
d29de1bf 251 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 252 struct loop *loop;
f471fe72 253 tree var;
bc4fb355 254 bool promotion;
20f06221 255
f5709183
IR
256 if (!loop_info)
257 return NULL;
258
259 loop = LOOP_VINFO_LOOP (loop_info);
260
328dc477
RB
261 /* We don't allow changing the order of the computation in the inner-loop
262 when doing outer-loop vectorization. */
263 if (loop && nested_in_vect_loop_p (loop, last_stmt))
264 return NULL;
265
51312233 266 if (!is_gimple_assign (last_stmt))
20f06221
DN
267 return NULL;
268
51312233 269 type = gimple_expr_type (last_stmt);
20f06221 270
b8698a0f 271 /* Look for the following pattern
20f06221
DN
272 DX = (TYPE1) X;
273 DY = (TYPE1) Y;
b8698a0f 274 DPROD = DX * DY;
20f06221
DN
275 DDPROD = (TYPE2) DPROD;
276 sum_1 = DDPROD + sum_0;
b8698a0f 277 In which
20f06221
DN
278 - DX is double the size of X
279 - DY is double the size of Y
280 - DX, DY, DPROD all have the same type
281 - sum is the same size of DPROD or bigger
282 - sum has been recognized as a reduction variable.
283
284 This is equivalent to:
285 DPROD = X w* Y; #widen mult
286 sum_1 = DPROD w+ sum_0; #widen summation
287 or
288 DPROD = X w* Y; #widen mult
289 sum_1 = DPROD + sum_0; #summation
290 */
291
292 /* Starting from LAST_STMT, follow the defs of its uses in search
293 of the above pattern. */
294
51312233 295 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
296 return NULL;
297
298 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
299 {
300 /* Has been detected as widening-summation? */
301
302 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a
RB
303 type = gimple_expr_type (stmt);
304 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
20f06221 305 return NULL;
726a989a
RB
306 oprnd0 = gimple_assign_rhs1 (stmt);
307 oprnd1 = gimple_assign_rhs2 (stmt);
20f06221
DN
308 half_type = TREE_TYPE (oprnd0);
309 }
310 else
311 {
355fe088 312 gimple *def_stmt;
20f06221 313
51312233
IR
314 oprnd0 = gimple_assign_rhs1 (last_stmt);
315 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
316 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
317 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221 318 return NULL;
51312233 319 stmt = last_stmt;
20f06221 320
bc4fb355
IR
321 if (type_conversion_p (oprnd0, stmt, true, &half_type, &def_stmt,
322 &promotion)
323 && promotion)
20f06221
DN
324 {
325 stmt = def_stmt;
726a989a 326 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
327 }
328 else
329 half_type = type;
330 }
331
51312233 332 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
333 we know that oprnd1 is the reduction variable (defined by a loop-header
334 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
335 Left to check that oprnd0 is defined by a (widen_)mult_expr */
ba02d3bc
RG
336 if (TREE_CODE (oprnd0) != SSA_NAME)
337 return NULL;
20f06221
DN
338
339 prod_type = half_type;
340 stmt = SSA_NAME_DEF_STMT (oprnd0);
3cb35c12
CF
341
342 /* It could not be the dot_prod pattern if the stmt is outside the loop. */
75264e61 343 if (!gimple_bb (stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
3cb35c12
CF
344 return NULL;
345
b8698a0f 346 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
8665227f 347 inside the loop (in case we are analyzing an outer-loop). */
726a989a 348 if (!is_gimple_assign (stmt))
b8698a0f 349 return NULL;
20f06221
DN
350 stmt_vinfo = vinfo_for_stmt (stmt);
351 gcc_assert (stmt_vinfo);
8644a673 352 if (STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
b3130586 353 return NULL;
726a989a 354 if (gimple_assign_rhs_code (stmt) != MULT_EXPR)
20f06221
DN
355 return NULL;
356 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
357 {
358 /* Has been detected as a widening multiplication? */
359
360 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
726a989a 361 if (gimple_assign_rhs_code (stmt) != WIDEN_MULT_EXPR)
20f06221
DN
362 return NULL;
363 stmt_vinfo = vinfo_for_stmt (stmt);
364 gcc_assert (stmt_vinfo);
8644a673 365 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_internal_def);
726a989a
RB
366 oprnd00 = gimple_assign_rhs1 (stmt);
367 oprnd01 = gimple_assign_rhs2 (stmt);
56f8faae
CH
368 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (last_stmt))
369 = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
20f06221
DN
370 }
371 else
372 {
373 tree half_type0, half_type1;
355fe088 374 gimple *def_stmt;
20f06221
DN
375 tree oprnd0, oprnd1;
376
726a989a
RB
377 oprnd0 = gimple_assign_rhs1 (stmt);
378 oprnd1 = gimple_assign_rhs2 (stmt);
9600efe1
MM
379 if (!types_compatible_p (TREE_TYPE (oprnd0), prod_type)
380 || !types_compatible_p (TREE_TYPE (oprnd1), prod_type))
20f06221 381 return NULL;
bc4fb355
IR
382 if (!type_conversion_p (oprnd0, stmt, true, &half_type0, &def_stmt,
383 &promotion)
384 || !promotion)
20f06221 385 return NULL;
726a989a 386 oprnd00 = gimple_assign_rhs1 (def_stmt);
181f5f3e 387 if (!type_conversion_p (oprnd1, stmt, true, &half_type1, &def_stmt,
bc4fb355
IR
388 &promotion)
389 || !promotion)
20f06221 390 return NULL;
726a989a 391 oprnd01 = gimple_assign_rhs1 (def_stmt);
9600efe1 392 if (!types_compatible_p (half_type0, half_type1))
20f06221
DN
393 return NULL;
394 if (TYPE_PRECISION (prod_type) != TYPE_PRECISION (half_type0) * 2)
395 return NULL;
396 }
397
398 half_type = TREE_TYPE (oprnd00);
399 *type_in = half_type;
400 *type_out = type;
b8698a0f 401
20f06221 402 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 403 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
404 pattern_stmt = gimple_build_assign (var, DOT_PROD_EXPR,
405 oprnd00, oprnd01, oprnd1);
b8698a0f 406
73fbfcad 407 if (dump_enabled_p ())
20f06221 408 {
ccb3ad87 409 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 410 "vect_recog_dot_prod_pattern: detected: ");
ccb3ad87 411 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 412 dump_printf (MSG_NOTE, "\n");
20f06221 413 }
d29de1bf 414
726a989a 415 return pattern_stmt;
20f06221 416}
b8698a0f 417
51312233 418
79d652a5
CH
419/* Function vect_recog_sad_pattern
420
421 Try to find the following Sum of Absolute Difference (SAD) pattern:
422
423 type x_t, y_t;
424 signed TYPE1 diff, abs_diff;
425 TYPE2 sum = init;
426 loop:
427 sum_0 = phi <init, sum_1>
428 S1 x_t = ...
429 S2 y_t = ...
430 S3 x_T = (TYPE1) x_t;
431 S4 y_T = (TYPE1) y_t;
432 S5 diff = x_T - y_T;
433 S6 abs_diff = ABS_EXPR <diff>;
434 [S7 abs_diff = (TYPE2) abs_diff; #optional]
435 S8 sum_1 = abs_diff + sum_0;
436
437 where 'TYPE1' is at least double the size of type 'type', and 'TYPE2' is the
438 same size of 'TYPE1' or bigger. This is a special case of a reduction
439 computation.
440
441 Input:
442
443 * STMTS: Contains a stmt from which the pattern search begins. In the
444 example, when this function is called with S8, the pattern
445 {S3,S4,S5,S6,S7,S8} will be detected.
446
447 Output:
448
449 * TYPE_IN: The type of the input arguments to the pattern.
450
451 * TYPE_OUT: The type of the output of this pattern.
452
453 * Return value: A new stmt that will be used to replace the sequence of
454 stmts that constitute the pattern. In this case it will be:
455 SAD_EXPR <x_t, y_t, sum_0>
456 */
457
355fe088
TS
458static gimple *
459vect_recog_sad_pattern (vec<gimple *> *stmts, tree *type_in,
79d652a5
CH
460 tree *type_out)
461{
355fe088 462 gimple *last_stmt = (*stmts)[0];
79d652a5
CH
463 tree sad_oprnd0, sad_oprnd1;
464 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
465 tree half_type;
466 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
467 struct loop *loop;
468 bool promotion;
469
470 if (!loop_info)
471 return NULL;
472
473 loop = LOOP_VINFO_LOOP (loop_info);
474
328dc477
RB
475 /* We don't allow changing the order of the computation in the inner-loop
476 when doing outer-loop vectorization. */
477 if (loop && nested_in_vect_loop_p (loop, last_stmt))
478 return NULL;
479
79d652a5
CH
480 if (!is_gimple_assign (last_stmt))
481 return NULL;
482
483 tree sum_type = gimple_expr_type (last_stmt);
484
485 /* Look for the following pattern
486 DX = (TYPE1) X;
487 DY = (TYPE1) Y;
488 DDIFF = DX - DY;
489 DAD = ABS_EXPR <DDIFF>;
490 DDPROD = (TYPE2) DPROD;
491 sum_1 = DAD + sum_0;
492 In which
493 - DX is at least double the size of X
494 - DY is at least double the size of Y
495 - DX, DY, DDIFF, DAD all have the same type
496 - sum is the same size of DAD or bigger
497 - sum has been recognized as a reduction variable.
498
499 This is equivalent to:
500 DDIFF = X w- Y; #widen sub
501 DAD = ABS_EXPR <DDIFF>;
502 sum_1 = DAD w+ sum_0; #widen summation
503 or
504 DDIFF = X w- Y; #widen sub
505 DAD = ABS_EXPR <DDIFF>;
506 sum_1 = DAD + sum_0; #summation
507 */
508
509 /* Starting from LAST_STMT, follow the defs of its uses in search
510 of the above pattern. */
511
512 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
513 return NULL;
514
515 tree plus_oprnd0, plus_oprnd1;
516
517 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
518 {
519 /* Has been detected as widening-summation? */
520
355fe088 521 gimple *stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
79d652a5
CH
522 sum_type = gimple_expr_type (stmt);
523 if (gimple_assign_rhs_code (stmt) != WIDEN_SUM_EXPR)
524 return NULL;
525 plus_oprnd0 = gimple_assign_rhs1 (stmt);
526 plus_oprnd1 = gimple_assign_rhs2 (stmt);
527 half_type = TREE_TYPE (plus_oprnd0);
528 }
529 else
530 {
355fe088 531 gimple *def_stmt;
79d652a5 532
79d652a5
CH
533 plus_oprnd0 = gimple_assign_rhs1 (last_stmt);
534 plus_oprnd1 = gimple_assign_rhs2 (last_stmt);
535 if (!types_compatible_p (TREE_TYPE (plus_oprnd0), sum_type)
536 || !types_compatible_p (TREE_TYPE (plus_oprnd1), sum_type))
537 return NULL;
538
539 /* The type conversion could be promotion, demotion,
540 or just signed -> unsigned. */
541 if (type_conversion_p (plus_oprnd0, last_stmt, false,
542 &half_type, &def_stmt, &promotion))
543 plus_oprnd0 = gimple_assign_rhs1 (def_stmt);
544 else
545 half_type = sum_type;
546 }
547
548 /* So far so good. Since last_stmt was detected as a (summation) reduction,
549 we know that plus_oprnd1 is the reduction variable (defined by a loop-header
550 phi), and plus_oprnd0 is an ssa-name defined by a stmt in the loop body.
551 Then check that plus_oprnd0 is defined by an abs_expr. */
552
553 if (TREE_CODE (plus_oprnd0) != SSA_NAME)
554 return NULL;
555
556 tree abs_type = half_type;
355fe088 557 gimple *abs_stmt = SSA_NAME_DEF_STMT (plus_oprnd0);
79d652a5
CH
558
559 /* It could not be the sad pattern if the abs_stmt is outside the loop. */
560 if (!gimple_bb (abs_stmt) || !flow_bb_inside_loop_p (loop, gimple_bb (abs_stmt)))
561 return NULL;
562
563 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
564 inside the loop (in case we are analyzing an outer-loop). */
565 if (!is_gimple_assign (abs_stmt))
566 return NULL;
567
568 stmt_vec_info abs_stmt_vinfo = vinfo_for_stmt (abs_stmt);
569 gcc_assert (abs_stmt_vinfo);
570 if (STMT_VINFO_DEF_TYPE (abs_stmt_vinfo) != vect_internal_def)
571 return NULL;
572 if (gimple_assign_rhs_code (abs_stmt) != ABS_EXPR)
573 return NULL;
574
575 tree abs_oprnd = gimple_assign_rhs1 (abs_stmt);
576 if (!types_compatible_p (TREE_TYPE (abs_oprnd), abs_type))
577 return NULL;
578 if (TYPE_UNSIGNED (abs_type))
579 return NULL;
580
581 /* We then detect if the operand of abs_expr is defined by a minus_expr. */
582
583 if (TREE_CODE (abs_oprnd) != SSA_NAME)
584 return NULL;
585
355fe088 586 gimple *diff_stmt = SSA_NAME_DEF_STMT (abs_oprnd);
79d652a5
CH
587
588 /* It could not be the sad pattern if the diff_stmt is outside the loop. */
589 if (!gimple_bb (diff_stmt)
590 || !flow_bb_inside_loop_p (loop, gimple_bb (diff_stmt)))
591 return NULL;
592
593 /* FORNOW. Can continue analyzing the def-use chain when this stmt in a phi
594 inside the loop (in case we are analyzing an outer-loop). */
595 if (!is_gimple_assign (diff_stmt))
596 return NULL;
597
598 stmt_vec_info diff_stmt_vinfo = vinfo_for_stmt (diff_stmt);
599 gcc_assert (diff_stmt_vinfo);
600 if (STMT_VINFO_DEF_TYPE (diff_stmt_vinfo) != vect_internal_def)
601 return NULL;
602 if (gimple_assign_rhs_code (diff_stmt) != MINUS_EXPR)
603 return NULL;
604
605 tree half_type0, half_type1;
355fe088 606 gimple *def_stmt;
79d652a5
CH
607
608 tree minus_oprnd0 = gimple_assign_rhs1 (diff_stmt);
609 tree minus_oprnd1 = gimple_assign_rhs2 (diff_stmt);
610
611 if (!types_compatible_p (TREE_TYPE (minus_oprnd0), abs_type)
612 || !types_compatible_p (TREE_TYPE (minus_oprnd1), abs_type))
613 return NULL;
614 if (!type_conversion_p (minus_oprnd0, diff_stmt, false,
615 &half_type0, &def_stmt, &promotion)
616 || !promotion)
617 return NULL;
618 sad_oprnd0 = gimple_assign_rhs1 (def_stmt);
619
620 if (!type_conversion_p (minus_oprnd1, diff_stmt, false,
621 &half_type1, &def_stmt, &promotion)
622 || !promotion)
623 return NULL;
624 sad_oprnd1 = gimple_assign_rhs1 (def_stmt);
625
626 if (!types_compatible_p (half_type0, half_type1))
627 return NULL;
628 if (TYPE_PRECISION (abs_type) < TYPE_PRECISION (half_type0) * 2
629 || TYPE_PRECISION (sum_type) < TYPE_PRECISION (half_type0) * 2)
630 return NULL;
631
632 *type_in = TREE_TYPE (sad_oprnd0);
633 *type_out = sum_type;
634
635 /* Pattern detected. Create a stmt to be used to replace the pattern: */
636 tree var = vect_recog_temp_ssa_var (sum_type, NULL);
355fe088
TS
637 gimple *pattern_stmt = gimple_build_assign (var, SAD_EXPR, sad_oprnd0,
638 sad_oprnd1, plus_oprnd1);
79d652a5
CH
639
640 if (dump_enabled_p ())
641 {
642 dump_printf_loc (MSG_NOTE, vect_location,
643 "vect_recog_sad_pattern: detected: ");
644 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
645 dump_printf (MSG_NOTE, "\n");
646 }
647
79d652a5
CH
648 return pattern_stmt;
649}
650
651
36ba4aae
IR
652/* Handle widening operation by a constant. At the moment we support MULT_EXPR
653 and LSHIFT_EXPR.
654
655 For MULT_EXPR we check that CONST_OPRND fits HALF_TYPE, and for LSHIFT_EXPR
656 we check that CONST_OPRND is less or equal to the size of HALF_TYPE.
51312233
IR
657
658 Otherwise, if the type of the result (TYPE) is at least 4 times bigger than
36ba4aae
IR
659 HALF_TYPE, and there is an intermediate type (2 times smaller than TYPE)
660 that satisfies the above restrictions, we can perform a widening opeartion
661 from the intermediate type to TYPE and replace a_T = (TYPE) a_t;
566d377a 662 with a_it = (interm_type) a_t; Store such operation in *WSTMT. */
51312233
IR
663
664static bool
355fe088 665vect_handle_widen_op_by_const (gimple *stmt, enum tree_code code,
36ba4aae 666 tree const_oprnd, tree *oprnd,
355fe088
TS
667 gimple **wstmt, tree type,
668 tree *half_type, gimple *def_stmt)
51312233 669{
83d5977e 670 tree new_type, new_oprnd;
51312233 671
36ba4aae
IR
672 if (code != MULT_EXPR && code != LSHIFT_EXPR)
673 return false;
674
675 if (((code == MULT_EXPR && int_fits_type_p (const_oprnd, *half_type))
676 || (code == LSHIFT_EXPR
677 && compare_tree_int (const_oprnd, TYPE_PRECISION (*half_type))
678 != 1))
679 && TYPE_PRECISION (type) == (TYPE_PRECISION (*half_type) * 2))
51312233
IR
680 {
681 /* CONST_OPRND is a constant of HALF_TYPE. */
682 *oprnd = gimple_assign_rhs1 (def_stmt);
683 return true;
684 }
685
f71cf56a
UW
686 if (TYPE_PRECISION (type) < (TYPE_PRECISION (*half_type) * 4))
687 return false;
688
689 if (!vect_same_loop_or_bb_p (stmt, def_stmt))
51312233
IR
690 return false;
691
36ba4aae 692 /* TYPE is 4 times bigger than HALF_TYPE, try widening operation for
51312233
IR
693 a type 2 times bigger than HALF_TYPE. */
694 new_type = build_nonstandard_integer_type (TYPE_PRECISION (type) / 2,
695 TYPE_UNSIGNED (type));
36ba4aae
IR
696 if ((code == MULT_EXPR && !int_fits_type_p (const_oprnd, new_type))
697 || (code == LSHIFT_EXPR
698 && compare_tree_int (const_oprnd, TYPE_PRECISION (new_type)) == 1))
51312233
IR
699 return false;
700
566d377a
RB
701 /* Use NEW_TYPE for widening operation and create a_T = (NEW_TYPE) a_t; */
702 *oprnd = gimple_assign_rhs1 (def_stmt);
703 new_oprnd = make_ssa_name (new_type);
704 *wstmt = gimple_build_assign (new_oprnd, NOP_EXPR, *oprnd);
705 *oprnd = new_oprnd;
51312233
IR
706
707 *half_type = new_type;
708 return true;
709}
710
711
20f06221
DN
712/* Function vect_recog_widen_mult_pattern
713
714 Try to find the following pattern:
715
d367387c
CH
716 type1 a_t;
717 type2 b_t;
20f06221
DN
718 TYPE a_T, b_T, prod_T;
719
720 S1 a_t = ;
721 S2 b_t = ;
722 S3 a_T = (TYPE) a_t;
723 S4 b_T = (TYPE) b_t;
724 S5 prod_T = a_T * b_T;
725
d367387c 726 where type 'TYPE' is at least double the size of type 'type1' and 'type2'.
20f06221 727
d47657bd 728 Also detect unsigned cases:
383d9c83 729
d367387c
CH
730 unsigned type1 a_t;
731 unsigned type2 b_t;
383d9c83
IR
732 unsigned TYPE u_prod_T;
733 TYPE a_T, b_T, prod_T;
734
735 S1 a_t = ;
736 S2 b_t = ;
737 S3 a_T = (TYPE) a_t;
738 S4 b_T = (TYPE) b_t;
739 S5 prod_T = a_T * b_T;
740 S6 u_prod_T = (unsigned TYPE) prod_T;
741
742 and multiplication by constants:
743
744 type a_t;
745 TYPE a_T, prod_T;
746
747 S1 a_t = ;
748 S3 a_T = (TYPE) a_t;
749 S5 prod_T = a_T * CONST;
750
51312233
IR
751 A special case of multiplication by constants is when 'TYPE' is 4 times
752 bigger than 'type', but CONST fits an intermediate type 2 times smaller
753 than 'TYPE'. In that case we create an additional pattern stmt for S3
754 to create a variable of the intermediate type, and perform widen-mult
755 on the intermediate type as well:
756
757 type a_t;
758 interm_type a_it;
759 TYPE a_T, prod_T, prod_T';
760
761 S1 a_t = ;
762 S3 a_T = (TYPE) a_t;
763 '--> a_it = (interm_type) a_t;
764 S5 prod_T = a_T * CONST;
765 '--> prod_T' = a_it w* CONST;
20f06221 766
51312233
IR
767 Input/Output:
768
769 * STMTS: Contains a stmt from which the pattern search begins. In the
770 example, when this function is called with S5, the pattern {S3,S4,S5,(S6)}
771 is detected. In case of unsigned widen-mult, the original stmt (S5) is
772 replaced with S6 in STMTS. In case of multiplication by a constant
773 of an intermediate type (the last case above), STMTS also contains S3
774 (inserted before S5).
20f06221
DN
775
776 Output:
777
778 * TYPE_IN: The type of the input arguments to the pattern.
779
383d9c83 780 * TYPE_OUT: The type of the output of this pattern.
20f06221
DN
781
782 * Return value: A new stmt that will be used to replace the sequence of
383d9c83 783 stmts that constitute the pattern. In this case it will be:
20f06221 784 WIDEN_MULT <a_t, b_t>
d367387c
CH
785 If the result of WIDEN_MULT needs to be converted to a larger type, the
786 returned stmt will be this type conversion stmt.
20f06221
DN
787*/
788
355fe088
TS
789static gimple *
790vect_recog_widen_mult_pattern (vec<gimple *> *stmts,
51312233 791 tree *type_in, tree *type_out)
20f06221 792{
355fe088
TS
793 gimple *last_stmt = stmts->pop ();
794 gimple *def_stmt0, *def_stmt1;
89d67cca
DN
795 tree oprnd0, oprnd1;
796 tree type, half_type0, half_type1;
355fe088 797 gimple *new_stmt = NULL, *pattern_stmt = NULL;
d367387c 798 tree vectype, vecitype;
726a989a 799 tree var;
89d67cca 800 enum tree_code dummy_code;
5d593372 801 int dummy_int;
9771b263 802 vec<tree> dummy_vec;
36ba4aae 803 bool op1_ok;
bc4fb355 804 bool promotion;
89d67cca 805
51312233 806 if (!is_gimple_assign (last_stmt))
89d67cca
DN
807 return NULL;
808
51312233 809 type = gimple_expr_type (last_stmt);
89d67cca
DN
810
811 /* Starting from LAST_STMT, follow the defs of its uses in search
812 of the above pattern. */
813
51312233 814 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
89d67cca
DN
815 return NULL;
816
51312233
IR
817 oprnd0 = gimple_assign_rhs1 (last_stmt);
818 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
819 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
820 || !types_compatible_p (TREE_TYPE (oprnd1), type))
89d67cca
DN
821 return NULL;
822
383d9c83 823 /* Check argument 0. */
bc4fb355
IR
824 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
825 &promotion)
826 || !promotion)
827 return NULL;
383d9c83 828 /* Check argument 1. */
bc4fb355
IR
829 op1_ok = type_conversion_p (oprnd1, last_stmt, false, &half_type1,
830 &def_stmt1, &promotion);
89d67cca 831
bc4fb355 832 if (op1_ok && promotion)
383d9c83
IR
833 {
834 oprnd0 = gimple_assign_rhs1 (def_stmt0);
835 oprnd1 = gimple_assign_rhs1 (def_stmt1);
836 }
36ba4aae 837 else
383d9c83 838 {
51312233 839 if (TREE_CODE (oprnd1) == INTEGER_CST
383d9c83 840 && TREE_CODE (half_type0) == INTEGER_TYPE
36ba4aae 841 && vect_handle_widen_op_by_const (last_stmt, MULT_EXPR, oprnd1,
566d377a 842 &oprnd0, &new_stmt, type,
36ba4aae 843 &half_type0, def_stmt0))
bfdeda2c
JJ
844 {
845 half_type1 = half_type0;
846 oprnd1 = fold_convert (half_type1, oprnd1);
847 }
383d9c83
IR
848 else
849 return NULL;
850 }
851
d367387c
CH
852 /* If the two arguments have different sizes, convert the one with
853 the smaller type into the larger type. */
854 if (TYPE_PRECISION (half_type0) != TYPE_PRECISION (half_type1))
855 {
566d377a
RB
856 /* If we already used up the single-stmt slot give up. */
857 if (new_stmt)
858 return NULL;
859
d367387c 860 tree* oprnd = NULL;
355fe088 861 gimple *def_stmt = NULL;
d367387c
CH
862
863 if (TYPE_PRECISION (half_type0) < TYPE_PRECISION (half_type1))
864 {
865 def_stmt = def_stmt0;
866 half_type0 = half_type1;
867 oprnd = &oprnd0;
868 }
869 else
870 {
871 def_stmt = def_stmt1;
872 half_type1 = half_type0;
873 oprnd = &oprnd1;
874 }
875
876 tree old_oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 877 tree new_oprnd = make_ssa_name (half_type0);
0d0e4a03 878 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, old_oprnd);
d367387c
CH
879 *oprnd = new_oprnd;
880 }
881
383d9c83
IR
882 /* Handle unsigned case. Look for
883 S6 u_prod_T = (unsigned TYPE) prod_T;
884 Use unsigned TYPE as the type for WIDEN_MULT_EXPR. */
885 if (TYPE_UNSIGNED (type) != TYPE_UNSIGNED (half_type0))
886 {
355fe088 887 gimple *use_stmt;
9a7a4398 888 tree use_lhs;
383d9c83
IR
889 tree use_type;
890
891 if (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (half_type1))
892 return NULL;
893
9a7a4398
UW
894 use_stmt = vect_single_imm_use (last_stmt);
895 if (!use_stmt || !is_gimple_assign (use_stmt)
625a9766 896 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
383d9c83
IR
897 return NULL;
898
899 use_lhs = gimple_assign_lhs (use_stmt);
900 use_type = TREE_TYPE (use_lhs);
901 if (!INTEGRAL_TYPE_P (use_type)
902 || (TYPE_UNSIGNED (type) == TYPE_UNSIGNED (use_type))
903 || (TYPE_PRECISION (type) != TYPE_PRECISION (use_type)))
904 return NULL;
905
906 type = use_type;
51312233 907 last_stmt = use_stmt;
383d9c83 908 }
89d67cca 909
9600efe1 910 if (!types_compatible_p (half_type0, half_type1))
89d67cca
DN
911 return NULL;
912
d367387c
CH
913 /* If TYPE is more than twice larger than HALF_TYPE, we use WIDEN_MULT
914 to get an intermediate result of type ITYPE. In this case we need
915 to build a statement to convert this intermediate result to type TYPE. */
916 tree itype = type;
917 if (TYPE_PRECISION (type) > TYPE_PRECISION (half_type0) * 2)
918 itype = build_nonstandard_integer_type
919 (GET_MODE_BITSIZE (TYPE_MODE (half_type0)) * 2,
920 TYPE_UNSIGNED (type));
921
89d67cca 922 /* Pattern detected. */
73fbfcad 923 if (dump_enabled_p ())
ccb3ad87 924 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 925 "vect_recog_widen_mult_pattern: detected:\n");
89d67cca
DN
926
927 /* Check target support */
928 vectype = get_vectype_for_scalar_type (half_type0);
d367387c 929 vecitype = get_vectype_for_scalar_type (itype);
03d3e953 930 if (!vectype
d367387c 931 || !vecitype
51312233 932 || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
d367387c 933 vecitype, vectype,
a86ec597
RH
934 &dummy_code, &dummy_code,
935 &dummy_int, &dummy_vec))
89d67cca
DN
936 return NULL;
937
938 *type_in = vectype;
d367387c 939 *type_out = get_vectype_for_scalar_type (type);
89d67cca
DN
940
941 /* Pattern supported. Create a stmt to be used to replace the pattern: */
d367387c 942 var = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 943 pattern_stmt = gimple_build_assign (var, WIDEN_MULT_EXPR, oprnd0, oprnd1);
726a989a 944
d367387c 945 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
d367387c
CH
946 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
947
948 /* If the original two operands have different sizes, we may need to convert
949 the smaller one into the larget type. If this is the case, at this point
950 the new stmt is already built. */
951 if (new_stmt)
952 {
953 append_pattern_def_seq (stmt_vinfo, new_stmt);
954 stmt_vec_info new_stmt_info
310213d4 955 = new_stmt_vec_info (new_stmt, stmt_vinfo->vinfo);
d367387c
CH
956 set_vinfo_for_stmt (new_stmt, new_stmt_info);
957 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
958 }
959
960 /* If ITYPE is not TYPE, we need to build a type convertion stmt to convert
961 the result of the widen-mult operation into type TYPE. */
962 if (itype != type)
963 {
964 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
965 stmt_vec_info pattern_stmt_info
310213d4 966 = new_stmt_vec_info (pattern_stmt, stmt_vinfo->vinfo);
d367387c
CH
967 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
968 STMT_VINFO_VECTYPE (pattern_stmt_info) = vecitype;
0d0e4a03
JJ
969 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
970 NOP_EXPR,
971 gimple_assign_lhs (pattern_stmt));
d367387c
CH
972 }
973
73fbfcad 974 if (dump_enabled_p ())
78c60e3d 975 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
726a989a 976
9771b263 977 stmts->safe_push (last_stmt);
726a989a 978 return pattern_stmt;
20f06221
DN
979}
980
981
0b2229b0
RG
982/* Function vect_recog_pow_pattern
983
984 Try to find the following pattern:
985
986 x = POW (y, N);
987
988 with POW being one of pow, powf, powi, powif and N being
989 either 2 or 0.5.
990
991 Input:
992
993 * LAST_STMT: A stmt from which the pattern search begins.
994
995 Output:
996
997 * TYPE_IN: The type of the input arguments to the pattern.
998
999 * TYPE_OUT: The type of the output of this pattern.
1000
1001 * Return value: A new stmt that will be used to replace the sequence of
1002 stmts that constitute the pattern. In this case it will be:
726a989a 1003 x = x * x
0b2229b0 1004 or
726a989a 1005 x = sqrt (x)
0b2229b0
RG
1006*/
1007
355fe088
TS
1008static gimple *
1009vect_recog_pow_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 1010 tree *type_out)
0b2229b0 1011{
355fe088 1012 gimple *last_stmt = (*stmts)[0];
7a31e5ef 1013 tree base, exp = NULL;
355fe088 1014 gimple *stmt;
726a989a 1015 tree var;
0b2229b0 1016
51312233 1017 if (!is_gimple_call (last_stmt) || gimple_call_lhs (last_stmt) == NULL)
0b2229b0
RG
1018 return NULL;
1019
7a31e5ef 1020 switch (gimple_call_combined_fn (last_stmt))
0b2229b0 1021 {
7a31e5ef
RS
1022 CASE_CFN_POW:
1023 CASE_CFN_POWI:
51312233
IR
1024 base = gimple_call_arg (last_stmt, 0);
1025 exp = gimple_call_arg (last_stmt, 1);
0b2229b0
RG
1026 if (TREE_CODE (exp) != REAL_CST
1027 && TREE_CODE (exp) != INTEGER_CST)
726a989a 1028 return NULL;
0b2229b0
RG
1029 break;
1030
726a989a
RB
1031 default:
1032 return NULL;
0b2229b0
RG
1033 }
1034
1035 /* We now have a pow or powi builtin function call with a constant
1036 exponent. */
1037
0b2229b0
RG
1038 *type_out = NULL_TREE;
1039
1040 /* Catch squaring. */
9541ffee 1041 if ((tree_fits_shwi_p (exp)
9439e9a1 1042 && tree_to_shwi (exp) == 2)
0b2229b0 1043 || (TREE_CODE (exp) == REAL_CST
624d31fe 1044 && real_equal (&TREE_REAL_CST (exp), &dconst2)))
c6b1b49b
RG
1045 {
1046 *type_in = TREE_TYPE (base);
726a989a
RB
1047
1048 var = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
0d0e4a03 1049 stmt = gimple_build_assign (var, MULT_EXPR, base, base);
726a989a 1050 return stmt;
c6b1b49b 1051 }
0b2229b0
RG
1052
1053 /* Catch square root. */
1054 if (TREE_CODE (exp) == REAL_CST
624d31fe 1055 && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
0b2229b0
RG
1056 {
1057 tree newfn = mathfn_built_in (TREE_TYPE (base), BUILT_IN_SQRT);
c6b1b49b
RG
1058 *type_in = get_vectype_for_scalar_type (TREE_TYPE (base));
1059 if (*type_in)
1060 {
538dd0b7 1061 gcall *stmt = gimple_build_call (newfn, 1, base);
726a989a
RB
1062 if (vectorizable_function (stmt, *type_in, *type_in)
1063 != NULL_TREE)
1064 {
1065 var = vect_recog_temp_ssa_var (TREE_TYPE (base), stmt);
b8698a0f 1066 gimple_call_set_lhs (stmt, var);
726a989a
RB
1067 return stmt;
1068 }
c6b1b49b 1069 }
0b2229b0
RG
1070 }
1071
726a989a 1072 return NULL;
0b2229b0
RG
1073}
1074
1075
20f06221
DN
1076/* Function vect_recog_widen_sum_pattern
1077
1078 Try to find the following pattern:
1079
b8698a0f 1080 type x_t;
20f06221
DN
1081 TYPE x_T, sum = init;
1082 loop:
1083 sum_0 = phi <init, sum_1>
1084 S1 x_t = *p;
1085 S2 x_T = (TYPE) x_t;
1086 S3 sum_1 = x_T + sum_0;
1087
b8698a0f 1088 where type 'TYPE' is at least double the size of type 'type', i.e - we're
20f06221 1089 summing elements of type 'type' into an accumulator of type 'TYPE'. This is
917f1b7e 1090 a special case of a reduction computation.
20f06221
DN
1091
1092 Input:
1093
1094 * LAST_STMT: A stmt from which the pattern search begins. In the example,
1095 when this function is called with S3, the pattern {S2,S3} will be detected.
b8698a0f 1096
20f06221 1097 Output:
b8698a0f 1098
20f06221
DN
1099 * TYPE_IN: The type of the input arguments to the pattern.
1100
1101 * TYPE_OUT: The type of the output of this pattern.
1102
1103 * Return value: A new stmt that will be used to replace the sequence of
1104 stmts that constitute the pattern. In this case it will be:
1105 WIDEN_SUM <x_t, sum_0>
d29de1bf 1106
b8698a0f 1107 Note: The widening-sum idiom is a widening reduction pattern that is
d29de1bf 1108 vectorized without preserving all the intermediate results. It
b8698a0f
L
1109 produces only N/2 (widened) results (by summing up pairs of
1110 intermediate results) rather than all N results. Therefore, we
1111 cannot allow this pattern when we want to get all the results and in
1112 the correct order (as is the case when this computation is in an
d29de1bf 1113 inner-loop nested in an outer-loop that us being vectorized). */
20f06221 1114
355fe088
TS
1115static gimple *
1116vect_recog_widen_sum_pattern (vec<gimple *> *stmts, tree *type_in,
51312233 1117 tree *type_out)
20f06221 1118{
355fe088 1119 gimple *stmt, *last_stmt = (*stmts)[0];
20f06221 1120 tree oprnd0, oprnd1;
51312233 1121 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
20f06221 1122 tree type, half_type;
355fe088 1123 gimple *pattern_stmt;
d29de1bf 1124 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
f5709183 1125 struct loop *loop;
726a989a 1126 tree var;
bc4fb355 1127 bool promotion;
20f06221 1128
f5709183
IR
1129 if (!loop_info)
1130 return NULL;
1131
1132 loop = LOOP_VINFO_LOOP (loop_info);
1133
328dc477
RB
1134 /* We don't allow changing the order of the computation in the inner-loop
1135 when doing outer-loop vectorization. */
1136 if (loop && nested_in_vect_loop_p (loop, last_stmt))
1137 return NULL;
1138
51312233 1139 if (!is_gimple_assign (last_stmt))
20f06221
DN
1140 return NULL;
1141
51312233 1142 type = gimple_expr_type (last_stmt);
20f06221
DN
1143
1144 /* Look for the following pattern
1145 DX = (TYPE) X;
1146 sum_1 = DX + sum_0;
1147 In which DX is at least double the size of X, and sum_1 has been
1148 recognized as a reduction variable.
1149 */
1150
1151 /* Starting from LAST_STMT, follow the defs of its uses in search
1152 of the above pattern. */
1153
51312233 1154 if (gimple_assign_rhs_code (last_stmt) != PLUS_EXPR)
20f06221
DN
1155 return NULL;
1156
51312233
IR
1157 oprnd0 = gimple_assign_rhs1 (last_stmt);
1158 oprnd1 = gimple_assign_rhs2 (last_stmt);
9600efe1
MM
1159 if (!types_compatible_p (TREE_TYPE (oprnd0), type)
1160 || !types_compatible_p (TREE_TYPE (oprnd1), type))
20f06221
DN
1161 return NULL;
1162
51312233 1163 /* So far so good. Since last_stmt was detected as a (summation) reduction,
20f06221
DN
1164 we know that oprnd1 is the reduction variable (defined by a loop-header
1165 phi), and oprnd0 is an ssa-name defined by a stmt in the loop body.
1166 Left to check that oprnd0 is defined by a cast from type 'type' to type
1167 'TYPE'. */
1168
bc4fb355
IR
1169 if (!type_conversion_p (oprnd0, last_stmt, true, &half_type, &stmt,
1170 &promotion)
1171 || !promotion)
1172 return NULL;
20f06221 1173
726a989a 1174 oprnd0 = gimple_assign_rhs1 (stmt);
20f06221
DN
1175 *type_in = half_type;
1176 *type_out = type;
1177
1178 /* Pattern detected. Create a stmt to be used to replace the pattern: */
726a989a 1179 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1180 pattern_stmt = gimple_build_assign (var, WIDEN_SUM_EXPR, oprnd0, oprnd1);
726a989a 1181
73fbfcad 1182 if (dump_enabled_p ())
20f06221 1183 {
ccb3ad87 1184 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1185 "vect_recog_widen_sum_pattern: detected: ");
ccb3ad87 1186 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1187 dump_printf (MSG_NOTE, "\n");
20f06221 1188 }
d29de1bf 1189
726a989a 1190 return pattern_stmt;
20f06221
DN
1191}
1192
1193
1107f3ae
IR
1194/* Return TRUE if the operation in STMT can be performed on a smaller type.
1195
1196 Input:
1197 STMT - a statement to check.
1198 DEF - we support operations with two operands, one of which is constant.
1199 The other operand can be defined by a demotion operation, or by a
1200 previous statement in a sequence of over-promoted operations. In the
1201 later case DEF is used to replace that operand. (It is defined by a
1202 pattern statement we created for the previous statement in the
1203 sequence).
1204
1205 Input/output:
1206 NEW_TYPE - Output: a smaller type that we are trying to use. Input: if not
1207 NULL, it's the type of DEF.
1208 STMTS - additional pattern statements. If a pattern statement (type
1209 conversion) is created in this function, its original statement is
1210 added to STMTS.
1211
1212 Output:
1213 OP0, OP1 - if the operation fits a smaller type, OP0 and OP1 are the new
1214 operands to use in the new pattern statement for STMT (will be created
1215 in vect_recog_over_widening_pattern ()).
1216 NEW_DEF_STMT - in case DEF has to be promoted, we create two pattern
1217 statements for STMT: the first one is a type promotion and the second
1218 one is the operation itself. We return the type promotion statement
363477c0 1219 in NEW_DEF_STMT and further store it in STMT_VINFO_PATTERN_DEF_SEQ of
1107f3ae
IR
1220 the second pattern statement. */
1221
1222static bool
355fe088
TS
1223vect_operation_fits_smaller_type (gimple *stmt, tree def, tree *new_type,
1224 tree *op0, tree *op1, gimple **new_def_stmt,
1225 vec<gimple *> *stmts)
1107f3ae
IR
1226{
1227 enum tree_code code;
1228 tree const_oprnd, oprnd;
83d5977e 1229 tree interm_type = NULL_TREE, half_type, new_oprnd, type;
355fe088 1230 gimple *def_stmt, *new_stmt;
1107f3ae 1231 bool first = false;
bc4fb355 1232 bool promotion;
f5709183 1233
d6e1acf6
JJ
1234 *op0 = NULL_TREE;
1235 *op1 = NULL_TREE;
1107f3ae
IR
1236 *new_def_stmt = NULL;
1237
1238 if (!is_gimple_assign (stmt))
1239 return false;
1240
1241 code = gimple_assign_rhs_code (stmt);
1242 if (code != LSHIFT_EXPR && code != RSHIFT_EXPR
1243 && code != BIT_IOR_EXPR && code != BIT_XOR_EXPR && code != BIT_AND_EXPR)
1244 return false;
1245
1246 oprnd = gimple_assign_rhs1 (stmt);
1247 const_oprnd = gimple_assign_rhs2 (stmt);
1248 type = gimple_expr_type (stmt);
1249
1250 if (TREE_CODE (oprnd) != SSA_NAME
1251 || TREE_CODE (const_oprnd) != INTEGER_CST)
1252 return false;
1253
9ef7adc0
RG
1254 /* If oprnd has other uses besides that in stmt we cannot mark it
1255 as being part of a pattern only. */
1256 if (!has_single_use (oprnd))
1257 return false;
1258
1107f3ae
IR
1259 /* If we are in the middle of a sequence, we use DEF from a previous
1260 statement. Otherwise, OPRND has to be a result of type promotion. */
1261 if (*new_type)
1262 {
1263 half_type = *new_type;
1264 oprnd = def;
1265 }
1266 else
1267 {
1268 first = true;
bc4fb355 1269 if (!type_conversion_p (oprnd, stmt, false, &half_type, &def_stmt,
f71cf56a
UW
1270 &promotion)
1271 || !promotion
1272 || !vect_same_loop_or_bb_p (stmt, def_stmt))
1107f3ae
IR
1273 return false;
1274 }
1275
1276 /* Can we perform the operation on a smaller type? */
1277 switch (code)
1278 {
1279 case BIT_IOR_EXPR:
1280 case BIT_XOR_EXPR:
1281 case BIT_AND_EXPR:
1282 if (!int_fits_type_p (const_oprnd, half_type))
1283 {
1284 /* HALF_TYPE is not enough. Try a bigger type if possible. */
1285 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1286 return false;
1287
1288 interm_type = build_nonstandard_integer_type (
1289 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1290 if (!int_fits_type_p (const_oprnd, interm_type))
1291 return false;
1292 }
1293
1294 break;
1295
1296 case LSHIFT_EXPR:
1297 /* Try intermediate type - HALF_TYPE is not enough for sure. */
1298 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1299 return false;
1300
1301 /* Check that HALF_TYPE size + shift amount <= INTERM_TYPE size.
1302 (e.g., if the original value was char, the shift amount is at most 8
1303 if we want to use short). */
1304 if (compare_tree_int (const_oprnd, TYPE_PRECISION (half_type)) == 1)
1305 return false;
1306
1307 interm_type = build_nonstandard_integer_type (
1308 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1309
1310 if (!vect_supportable_shift (code, interm_type))
1311 return false;
1312
1313 break;
1314
1315 case RSHIFT_EXPR:
1316 if (vect_supportable_shift (code, half_type))
1317 break;
1318
1319 /* Try intermediate type - HALF_TYPE is not supported. */
1320 if (TYPE_PRECISION (type) < (TYPE_PRECISION (half_type) * 4))
1321 return false;
1322
1323 interm_type = build_nonstandard_integer_type (
1324 TYPE_PRECISION (half_type) * 2, TYPE_UNSIGNED (type));
1325
1326 if (!vect_supportable_shift (code, interm_type))
1327 return false;
1328
1329 break;
1330
1331 default:
1332 gcc_unreachable ();
1333 }
1334
1335 /* There are four possible cases:
1336 1. OPRND is defined by a type promotion (in that case FIRST is TRUE, it's
1337 the first statement in the sequence)
1338 a. The original, HALF_TYPE, is not enough - we replace the promotion
1339 from HALF_TYPE to TYPE with a promotion to INTERM_TYPE.
1340 b. HALF_TYPE is sufficient, OPRND is set as the RHS of the original
1341 promotion.
1342 2. OPRND is defined by a pattern statement we created.
1343 a. Its type is not sufficient for the operation, we create a new stmt:
1344 a type conversion for OPRND from HALF_TYPE to INTERM_TYPE. We store
1345 this statement in NEW_DEF_STMT, and it is later put in
363477c0 1346 STMT_VINFO_PATTERN_DEF_SEQ of the pattern statement for STMT.
1107f3ae
IR
1347 b. OPRND is good to use in the new statement. */
1348 if (first)
1349 {
1350 if (interm_type)
1351 {
1352 /* Replace the original type conversion HALF_TYPE->TYPE with
1353 HALF_TYPE->INTERM_TYPE. */
1354 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)))
1355 {
1356 new_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
1357 /* Check if the already created pattern stmt is what we need. */
1358 if (!is_gimple_assign (new_stmt)
625a9766 1359 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (new_stmt))
1107f3ae
IR
1360 || TREE_TYPE (gimple_assign_lhs (new_stmt)) != interm_type)
1361 return false;
1362
9771b263 1363 stmts->safe_push (def_stmt);
1107f3ae
IR
1364 oprnd = gimple_assign_lhs (new_stmt);
1365 }
1366 else
1367 {
1368 /* Create NEW_OPRND = (INTERM_TYPE) OPRND. */
1369 oprnd = gimple_assign_rhs1 (def_stmt);
b731b390 1370 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1371 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae 1372 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)) = new_stmt;
9771b263 1373 stmts->safe_push (def_stmt);
1107f3ae
IR
1374 oprnd = new_oprnd;
1375 }
1376 }
1377 else
1378 {
1379 /* Retrieve the operand before the type promotion. */
1380 oprnd = gimple_assign_rhs1 (def_stmt);
1381 }
1382 }
1383 else
1384 {
1385 if (interm_type)
1386 {
1387 /* Create a type conversion HALF_TYPE->INTERM_TYPE. */
b731b390 1388 new_oprnd = make_ssa_name (interm_type);
0d0e4a03 1389 new_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, oprnd);
1107f3ae
IR
1390 oprnd = new_oprnd;
1391 *new_def_stmt = new_stmt;
1392 }
1393
1394 /* Otherwise, OPRND is already set. */
1395 }
1396
1397 if (interm_type)
1398 *new_type = interm_type;
1399 else
1400 *new_type = half_type;
1401
1402 *op0 = oprnd;
1403 *op1 = fold_convert (*new_type, const_oprnd);
1404
1405 return true;
1406}
1407
1408
1409/* Try to find a statement or a sequence of statements that can be performed
1410 on a smaller type:
1411
1412 type x_t;
1413 TYPE x_T, res0_T, res1_T;
1414 loop:
1415 S1 x_t = *p;
1416 S2 x_T = (TYPE) x_t;
1417 S3 res0_T = op (x_T, C0);
1418 S4 res1_T = op (res0_T, C1);
1419 S5 ... = () res1_T; - type demotion
1420
1421 where type 'TYPE' is at least double the size of type 'type', C0 and C1 are
1422 constants.
1423 Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
1424 be 'type' or some intermediate type. For now, we expect S5 to be a type
71c92d17 1425 demotion operation. We also check that S3 and S4 have only one use. */
1107f3ae 1426
355fe088
TS
1427static gimple *
1428vect_recog_over_widening_pattern (vec<gimple *> *stmts,
1107f3ae
IR
1429 tree *type_in, tree *type_out)
1430{
355fe088
TS
1431 gimple *stmt = stmts->pop ();
1432 gimple *pattern_stmt = NULL, *new_def_stmt, *prev_stmt = NULL,
1433 *use_stmt = NULL;
9a7a4398 1434 tree op0, op1, vectype = NULL_TREE, use_lhs, use_type;
83d5977e 1435 tree var = NULL_TREE, new_type = NULL_TREE, new_oprnd;
1107f3ae 1436 bool first;
b2a1a74d 1437 tree type = NULL;
1107f3ae
IR
1438
1439 first = true;
1440 while (1)
1441 {
1442 if (!vinfo_for_stmt (stmt)
1443 || STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (stmt)))
1444 return NULL;
1445
1446 new_def_stmt = NULL;
1447 if (!vect_operation_fits_smaller_type (stmt, var, &new_type,
1448 &op0, &op1, &new_def_stmt,
1449 stmts))
1450 {
1451 if (first)
1452 return NULL;
1453 else
1454 break;
1455 }
1456
1457 /* STMT can be performed on a smaller type. Check its uses. */
9a7a4398
UW
1458 use_stmt = vect_single_imm_use (stmt);
1459 if (!use_stmt || !is_gimple_assign (use_stmt))
1107f3ae
IR
1460 return NULL;
1461
1462 /* Create pattern statement for STMT. */
1463 vectype = get_vectype_for_scalar_type (new_type);
1464 if (!vectype)
1465 return NULL;
1466
1467 /* We want to collect all the statements for which we create pattern
1468 statetments, except for the case when the last statement in the
1469 sequence doesn't have a corresponding pattern statement. In such
1470 case we associate the last pattern statement with the last statement
36ba4aae 1471 in the sequence. Therefore, we only add the original statement to
1107f3ae
IR
1472 the list if we know that it is not the last. */
1473 if (prev_stmt)
9771b263 1474 stmts->safe_push (prev_stmt);
1107f3ae
IR
1475
1476 var = vect_recog_temp_ssa_var (new_type, NULL);
62371b92 1477 pattern_stmt
0d0e4a03 1478 = gimple_build_assign (var, gimple_assign_rhs_code (stmt), op0, op1);
1107f3ae 1479 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
083481d8 1480 new_pattern_def_seq (vinfo_for_stmt (stmt), new_def_stmt);
1107f3ae 1481
73fbfcad 1482 if (dump_enabled_p ())
1107f3ae 1483 {
ccb3ad87 1484 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1485 "created pattern stmt: ");
ccb3ad87 1486 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1487 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1488 }
1489
b2a1a74d 1490 type = gimple_expr_type (stmt);
1107f3ae
IR
1491 prev_stmt = stmt;
1492 stmt = use_stmt;
1493
1494 first = false;
1495 }
1496
1497 /* We got a sequence. We expect it to end with a type demotion operation.
1498 Otherwise, we quit (for now). There are three possible cases: the
1499 conversion is to NEW_TYPE (we don't do anything), the conversion is to
1500 a type bigger than NEW_TYPE and/or the signedness of USE_TYPE and
1501 NEW_TYPE differs (we create a new conversion statement). */
1502 if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt)))
1503 {
1504 use_lhs = gimple_assign_lhs (use_stmt);
1505 use_type = TREE_TYPE (use_lhs);
82db3d43 1506 /* Support only type demotion or signedess change. */
1107f3ae 1507 if (!INTEGRAL_TYPE_P (use_type)
82db3d43 1508 || TYPE_PRECISION (type) <= TYPE_PRECISION (use_type))
1107f3ae
IR
1509 return NULL;
1510
82db3d43
IR
1511 /* Check that NEW_TYPE is not bigger than the conversion result. */
1512 if (TYPE_PRECISION (new_type) > TYPE_PRECISION (use_type))
1513 return NULL;
1514
1107f3ae
IR
1515 if (TYPE_UNSIGNED (new_type) != TYPE_UNSIGNED (use_type)
1516 || TYPE_PRECISION (new_type) != TYPE_PRECISION (use_type))
1517 {
1518 /* Create NEW_TYPE->USE_TYPE conversion. */
b731b390 1519 new_oprnd = make_ssa_name (use_type);
0d0e4a03 1520 pattern_stmt = gimple_build_assign (new_oprnd, NOP_EXPR, var);
1107f3ae
IR
1521 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) = pattern_stmt;
1522
1523 *type_in = get_vectype_for_scalar_type (new_type);
1524 *type_out = get_vectype_for_scalar_type (use_type);
1525
1526 /* We created a pattern statement for the last statement in the
1527 sequence, so we don't need to associate it with the pattern
1528 statement created for PREV_STMT. Therefore, we add PREV_STMT
1529 to the list in order to mark it later in vect_pattern_recog_1. */
1530 if (prev_stmt)
9771b263 1531 stmts->safe_push (prev_stmt);
1107f3ae
IR
1532 }
1533 else
1534 {
1535 if (prev_stmt)
363477c0
JJ
1536 STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (use_stmt))
1537 = STMT_VINFO_PATTERN_DEF_SEQ (vinfo_for_stmt (prev_stmt));
1107f3ae
IR
1538
1539 *type_in = vectype;
1540 *type_out = NULL_TREE;
1541 }
1542
9771b263 1543 stmts->safe_push (use_stmt);
1107f3ae
IR
1544 }
1545 else
1546 /* TODO: support general case, create a conversion to the correct type. */
1547 return NULL;
1548
1549 /* Pattern detected. */
73fbfcad 1550 if (dump_enabled_p ())
1107f3ae 1551 {
ccb3ad87 1552 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 1553 "vect_recog_over_widening_pattern: detected: ");
ccb3ad87 1554 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 1555 dump_printf (MSG_NOTE, "\n");
1107f3ae
IR
1556 }
1557
1558 return pattern_stmt;
1559}
1560
36ba4aae
IR
1561/* Detect widening shift pattern:
1562
1563 type a_t;
1564 TYPE a_T, res_T;
1565
1566 S1 a_t = ;
1567 S2 a_T = (TYPE) a_t;
1568 S3 res_T = a_T << CONST;
1569
1570 where type 'TYPE' is at least double the size of type 'type'.
1571
33018845
UW
1572 Also detect cases where the shift result is immediately converted
1573 to another type 'result_type' that is no larger in size than 'TYPE'.
1574 In those cases we perform a widen-shift that directly results in
1575 'result_type', to avoid a possible over-widening situation:
36ba4aae 1576
33018845 1577 type a_t;
36ba4aae 1578 TYPE a_T, res_T;
33018845 1579 result_type res_result;
36ba4aae
IR
1580
1581 S1 a_t = ;
1582 S2 a_T = (TYPE) a_t;
1583 S3 res_T = a_T << CONST;
33018845
UW
1584 S4 res_result = (result_type) res_T;
1585 '--> res_result' = a_t w<< CONST;
36ba4aae
IR
1586
1587 And a case when 'TYPE' is 4 times bigger than 'type'. In that case we
1588 create an additional pattern stmt for S2 to create a variable of an
1589 intermediate type, and perform widen-shift on the intermediate type:
1590
1591 type a_t;
1592 interm_type a_it;
1593 TYPE a_T, res_T, res_T';
1594
1595 S1 a_t = ;
1596 S2 a_T = (TYPE) a_t;
1597 '--> a_it = (interm_type) a_t;
1598 S3 res_T = a_T << CONST;
1599 '--> res_T' = a_it <<* CONST;
1600
1601 Input/Output:
1602
1603 * STMTS: Contains a stmt from which the pattern search begins.
1604 In case of unsigned widen-shift, the original stmt (S3) is replaced with S4
1605 in STMTS. When an intermediate type is used and a pattern statement is
1606 created for S2, we also put S2 here (before S3).
1607
1608 Output:
1609
1610 * TYPE_IN: The type of the input arguments to the pattern.
1611
1612 * TYPE_OUT: The type of the output of this pattern.
1613
1614 * Return value: A new stmt that will be used to replace the sequence of
1615 stmts that constitute the pattern. In this case it will be:
1616 WIDEN_LSHIFT_EXPR <a_t, CONST>. */
1617
355fe088
TS
1618static gimple *
1619vect_recog_widen_shift_pattern (vec<gimple *> *stmts,
36ba4aae
IR
1620 tree *type_in, tree *type_out)
1621{
355fe088
TS
1622 gimple *last_stmt = stmts->pop ();
1623 gimple *def_stmt0;
36ba4aae
IR
1624 tree oprnd0, oprnd1;
1625 tree type, half_type0;
355fe088 1626 gimple *pattern_stmt;
36ba4aae 1627 tree vectype, vectype_out = NULL_TREE;
36ba4aae
IR
1628 tree var;
1629 enum tree_code dummy_code;
1630 int dummy_int;
9771b263 1631 vec<tree> dummy_vec;
355fe088 1632 gimple *use_stmt;
bc4fb355 1633 bool promotion;
36ba4aae
IR
1634
1635 if (!is_gimple_assign (last_stmt) || !vinfo_for_stmt (last_stmt))
1636 return NULL;
1637
36ba4aae 1638 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (last_stmt)))
33018845 1639 return NULL;
36ba4aae
IR
1640
1641 if (gimple_assign_rhs_code (last_stmt) != LSHIFT_EXPR)
1642 return NULL;
1643
1644 oprnd0 = gimple_assign_rhs1 (last_stmt);
1645 oprnd1 = gimple_assign_rhs2 (last_stmt);
1646 if (TREE_CODE (oprnd0) != SSA_NAME || TREE_CODE (oprnd1) != INTEGER_CST)
1647 return NULL;
1648
1649 /* Check operand 0: it has to be defined by a type promotion. */
bc4fb355 1650 if (!type_conversion_p (oprnd0, last_stmt, false, &half_type0, &def_stmt0,
566d377a 1651 &promotion)
bc4fb355
IR
1652 || !promotion)
1653 return NULL;
36ba4aae
IR
1654
1655 /* Check operand 1: has to be positive. We check that it fits the type
1656 in vect_handle_widen_op_by_const (). */
1657 if (tree_int_cst_compare (oprnd1, size_zero_node) <= 0)
1658 return NULL;
1659
1660 oprnd0 = gimple_assign_rhs1 (def_stmt0);
1661 type = gimple_expr_type (last_stmt);
1662
33018845
UW
1663 /* Check for subsequent conversion to another type. */
1664 use_stmt = vect_single_imm_use (last_stmt);
1665 if (use_stmt && is_gimple_assign (use_stmt)
1666 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (use_stmt))
1667 && !STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (use_stmt)))
1668 {
1669 tree use_lhs = gimple_assign_lhs (use_stmt);
1670 tree use_type = TREE_TYPE (use_lhs);
1671
1672 if (INTEGRAL_TYPE_P (use_type)
1673 && TYPE_PRECISION (use_type) <= TYPE_PRECISION (type))
1674 {
1675 last_stmt = use_stmt;
1676 type = use_type;
1677 }
1678 }
1679
36ba4aae 1680 /* Check if this a widening operation. */
355fe088 1681 gimple *wstmt = NULL;
36ba4aae 1682 if (!vect_handle_widen_op_by_const (last_stmt, LSHIFT_EXPR, oprnd1,
566d377a 1683 &oprnd0, &wstmt,
36ba4aae
IR
1684 type, &half_type0, def_stmt0))
1685 return NULL;
1686
36ba4aae 1687 /* Pattern detected. */
73fbfcad 1688 if (dump_enabled_p ())
ccb3ad87 1689 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1690 "vect_recog_widen_shift_pattern: detected:\n");
36ba4aae
IR
1691
1692 /* Check target support. */
1693 vectype = get_vectype_for_scalar_type (half_type0);
1694 vectype_out = get_vectype_for_scalar_type (type);
1695
1696 if (!vectype
1697 || !vectype_out
1698 || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
1699 vectype_out, vectype,
a86ec597
RH
1700 &dummy_code, &dummy_code,
1701 &dummy_int, &dummy_vec))
36ba4aae
IR
1702 return NULL;
1703
1704 *type_in = vectype;
1705 *type_out = vectype_out;
1706
1707 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1708 var = vect_recog_temp_ssa_var (type, NULL);
1709 pattern_stmt =
0d0e4a03 1710 gimple_build_assign (var, WIDEN_LSHIFT_EXPR, oprnd0, oprnd1);
566d377a
RB
1711 if (wstmt)
1712 {
1713 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
566d377a
RB
1714 new_pattern_def_seq (stmt_vinfo, wstmt);
1715 stmt_vec_info new_stmt_info
310213d4 1716 = new_stmt_vec_info (wstmt, stmt_vinfo->vinfo);
566d377a
RB
1717 set_vinfo_for_stmt (wstmt, new_stmt_info);
1718 STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
1719 }
36ba4aae 1720
73fbfcad 1721 if (dump_enabled_p ())
78c60e3d 1722 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
7e9a3abb
JJ
1723
1724 stmts->safe_push (last_stmt);
1725 return pattern_stmt;
1726}
1727
1728/* Detect a rotate pattern wouldn't be otherwise vectorized:
1729
1730 type a_t, b_t, c_t;
1731
1732 S0 a_t = b_t r<< c_t;
1733
1734 Input/Output:
1735
1736 * STMTS: Contains a stmt from which the pattern search begins,
1737 i.e. the shift/rotate stmt. The original stmt (S0) is replaced
1738 with a sequence:
1739
1740 S1 d_t = -c_t;
1741 S2 e_t = d_t & (B - 1);
1742 S3 f_t = b_t << c_t;
1743 S4 g_t = b_t >> e_t;
1744 S0 a_t = f_t | g_t;
1745
1746 where B is element bitsize of type.
1747
1748 Output:
1749
1750 * TYPE_IN: The type of the input arguments to the pattern.
1751
1752 * TYPE_OUT: The type of the output of this pattern.
1753
1754 * Return value: A new stmt that will be used to replace the rotate
1755 S0 stmt. */
1756
355fe088
TS
1757static gimple *
1758vect_recog_rotate_pattern (vec<gimple *> *stmts, tree *type_in, tree *type_out)
7e9a3abb 1759{
355fe088 1760 gimple *last_stmt = stmts->pop ();
7e9a3abb 1761 tree oprnd0, oprnd1, lhs, var, var1, var2, vectype, type, stype, def, def2;
355fe088 1762 gimple *pattern_stmt, *def_stmt;
7e9a3abb
JJ
1763 enum tree_code rhs_code;
1764 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 1765 vec_info *vinfo = stmt_vinfo->vinfo;
7e9a3abb
JJ
1766 enum vect_def_type dt;
1767 optab optab1, optab2;
68119618 1768 edge ext_def = NULL;
7e9a3abb
JJ
1769
1770 if (!is_gimple_assign (last_stmt))
1771 return NULL;
1772
1773 rhs_code = gimple_assign_rhs_code (last_stmt);
1774 switch (rhs_code)
1775 {
1776 case LROTATE_EXPR:
1777 case RROTATE_EXPR:
1778 break;
1779 default:
1780 return NULL;
1781 }
1782
1783 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
1784 return NULL;
1785
1786 lhs = gimple_assign_lhs (last_stmt);
1787 oprnd0 = gimple_assign_rhs1 (last_stmt);
1788 type = TREE_TYPE (oprnd0);
1789 oprnd1 = gimple_assign_rhs2 (last_stmt);
1790 if (TREE_CODE (oprnd0) != SSA_NAME
1791 || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
1792 || !INTEGRAL_TYPE_P (type)
1793 || !TYPE_UNSIGNED (type))
1794 return NULL;
1795
81c40241 1796 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
7e9a3abb
JJ
1797 return NULL;
1798
1799 if (dt != vect_internal_def
1800 && dt != vect_constant_def
1801 && dt != vect_external_def)
1802 return NULL;
1803
1804 vectype = get_vectype_for_scalar_type (type);
1805 if (vectype == NULL_TREE)
1806 return NULL;
1807
1808 /* If vector/vector or vector/scalar rotate is supported by the target,
1809 don't do anything here. */
1810 optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
1811 if (optab1
1812 && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1813 return NULL;
1814
310213d4 1815 if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
7e9a3abb
JJ
1816 {
1817 optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
1818 if (optab2
1819 && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
1820 return NULL;
1821 }
1822
1823 /* If vector/vector or vector/scalar shifts aren't supported by the target,
1824 don't do anything here either. */
1825 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
1826 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_vector);
1827 if (!optab1
1828 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1829 || !optab2
1830 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1831 {
310213d4 1832 if (! is_a <bb_vec_info> (vinfo) && dt == vect_internal_def)
7e9a3abb
JJ
1833 return NULL;
1834 optab1 = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_scalar);
1835 optab2 = optab_for_tree_code (RSHIFT_EXPR, vectype, optab_scalar);
1836 if (!optab1
1837 || optab_handler (optab1, TYPE_MODE (vectype)) == CODE_FOR_nothing
1838 || !optab2
1839 || optab_handler (optab2, TYPE_MODE (vectype)) == CODE_FOR_nothing)
1840 return NULL;
1841 }
1842
1843 *type_in = vectype;
1844 *type_out = vectype;
1845 if (*type_in == NULL_TREE)
1846 return NULL;
1847
68119618
JJ
1848 if (dt == vect_external_def
1849 && TREE_CODE (oprnd1) == SSA_NAME
310213d4 1850 && is_a <loop_vec_info> (vinfo))
68119618 1851 {
310213d4 1852 struct loop *loop = as_a <loop_vec_info> (vinfo)->loop;
68119618
JJ
1853 ext_def = loop_preheader_edge (loop);
1854 if (!SSA_NAME_IS_DEFAULT_DEF (oprnd1))
1855 {
1856 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (oprnd1));
1857 if (bb == NULL
1858 || !dominated_by_p (CDI_DOMINATORS, ext_def->dest, bb))
1859 ext_def = NULL;
1860 }
1861 }
1862
7e9a3abb
JJ
1863 def = NULL_TREE;
1864 if (TREE_CODE (oprnd1) == INTEGER_CST
1865 || TYPE_MODE (TREE_TYPE (oprnd1)) == TYPE_MODE (type))
1866 def = oprnd1;
1867 else if (def_stmt && gimple_assign_cast_p (def_stmt))
1868 {
1869 tree rhs1 = gimple_assign_rhs1 (def_stmt);
1870 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (type)
1871 && TYPE_PRECISION (TREE_TYPE (rhs1))
1872 == TYPE_PRECISION (type))
1873 def = rhs1;
1874 }
1875
1876 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
1877 if (def == NULL_TREE)
1878 {
1879 def = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1880 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
68119618
JJ
1881 if (ext_def)
1882 {
1883 basic_block new_bb
1884 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1885 gcc_assert (!new_bb);
1886 }
1887 else
1888 append_pattern_def_seq (stmt_vinfo, def_stmt);
7e9a3abb
JJ
1889 }
1890 stype = TREE_TYPE (def);
1891
1892 if (TREE_CODE (def) == INTEGER_CST)
1893 {
cc269bb6 1894 if (!tree_fits_uhwi_p (def)
7d362f6c 1895 || tree_to_uhwi (def) >= GET_MODE_PRECISION (TYPE_MODE (type))
7e9a3abb
JJ
1896 || integer_zerop (def))
1897 return NULL;
1898 def2 = build_int_cst (stype,
1899 GET_MODE_PRECISION (TYPE_MODE (type))
ae7e9ddd 1900 - tree_to_uhwi (def));
7e9a3abb
JJ
1901 }
1902 else
1903 {
1904 tree vecstype = get_vectype_for_scalar_type (stype);
1905 stmt_vec_info def_stmt_vinfo;
1906
1907 if (vecstype == NULL_TREE)
1908 return NULL;
1909 def2 = vect_recog_temp_ssa_var (stype, NULL);
0d0e4a03 1910 def_stmt = gimple_build_assign (def2, NEGATE_EXPR, def);
68119618
JJ
1911 if (ext_def)
1912 {
1913 basic_block new_bb
1914 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1915 gcc_assert (!new_bb);
1916 }
1917 else
1918 {
310213d4 1919 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
68119618
JJ
1920 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1921 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1922 append_pattern_def_seq (stmt_vinfo, def_stmt);
1923 }
7e9a3abb
JJ
1924
1925 def2 = vect_recog_temp_ssa_var (stype, NULL);
1926 tree mask
1927 = build_int_cst (stype, GET_MODE_PRECISION (TYPE_MODE (stype)) - 1);
0d0e4a03
JJ
1928 def_stmt = gimple_build_assign (def2, BIT_AND_EXPR,
1929 gimple_assign_lhs (def_stmt), mask);
68119618
JJ
1930 if (ext_def)
1931 {
1932 basic_block new_bb
1933 = gsi_insert_on_edge_immediate (ext_def, def_stmt);
1934 gcc_assert (!new_bb);
1935 }
1936 else
1937 {
310213d4 1938 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
68119618
JJ
1939 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
1940 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecstype;
1941 append_pattern_def_seq (stmt_vinfo, def_stmt);
1942 }
7e9a3abb
JJ
1943 }
1944
1945 var1 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
1946 def_stmt = gimple_build_assign (var1, rhs_code == LROTATE_EXPR
1947 ? LSHIFT_EXPR : RSHIFT_EXPR,
1948 oprnd0, def);
7e9a3abb
JJ
1949 append_pattern_def_seq (stmt_vinfo, def_stmt);
1950
1951 var2 = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03
JJ
1952 def_stmt = gimple_build_assign (var2, rhs_code == LROTATE_EXPR
1953 ? RSHIFT_EXPR : LSHIFT_EXPR,
1954 oprnd0, def2);
7e9a3abb
JJ
1955 append_pattern_def_seq (stmt_vinfo, def_stmt);
1956
1957 /* Pattern detected. */
1958 if (dump_enabled_p ())
1959 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 1960 "vect_recog_rotate_pattern: detected:\n");
7e9a3abb
JJ
1961
1962 /* Pattern supported. Create a stmt to be used to replace the pattern. */
1963 var = vect_recog_temp_ssa_var (type, NULL);
0d0e4a03 1964 pattern_stmt = gimple_build_assign (var, BIT_IOR_EXPR, var1, var2);
7e9a3abb
JJ
1965
1966 if (dump_enabled_p ())
1967 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
36ba4aae 1968
9771b263 1969 stmts->safe_push (last_stmt);
36ba4aae
IR
1970 return pattern_stmt;
1971}
1107f3ae 1972
732a0ad3
JJ
1973/* Detect a vector by vector shift pattern that wouldn't be otherwise
1974 vectorized:
1975
1976 type a_t;
1977 TYPE b_T, res_T;
1978
1979 S1 a_t = ;
1980 S2 b_T = ;
1981 S3 res_T = b_T op a_t;
1982
1983 where type 'TYPE' is a type with different size than 'type',
1984 and op is <<, >> or rotate.
1985
1986 Also detect cases:
1987
1988 type a_t;
1989 TYPE b_T, c_T, res_T;
1990
1991 S0 c_T = ;
1992 S1 a_t = (type) c_T;
1993 S2 b_T = ;
1994 S3 res_T = b_T op a_t;
1995
1996 Input/Output:
1997
1998 * STMTS: Contains a stmt from which the pattern search begins,
1999 i.e. the shift/rotate stmt. The original stmt (S3) is replaced
2000 with a shift/rotate which has same type on both operands, in the
2001 second case just b_T op c_T, in the first case with added cast
363477c0 2002 from a_t to c_T in STMT_VINFO_PATTERN_DEF_SEQ.
732a0ad3
JJ
2003
2004 Output:
2005
2006 * TYPE_IN: The type of the input arguments to the pattern.
2007
2008 * TYPE_OUT: The type of the output of this pattern.
2009
2010 * Return value: A new stmt that will be used to replace the shift/rotate
2011 S3 stmt. */
2012
355fe088
TS
2013static gimple *
2014vect_recog_vector_vector_shift_pattern (vec<gimple *> *stmts,
732a0ad3
JJ
2015 tree *type_in, tree *type_out)
2016{
355fe088 2017 gimple *last_stmt = stmts->pop ();
732a0ad3 2018 tree oprnd0, oprnd1, lhs, var;
355fe088 2019 gimple *pattern_stmt, *def_stmt;
732a0ad3
JJ
2020 enum tree_code rhs_code;
2021 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 2022 vec_info *vinfo = stmt_vinfo->vinfo;
732a0ad3 2023 enum vect_def_type dt;
732a0ad3
JJ
2024
2025 if (!is_gimple_assign (last_stmt))
2026 return NULL;
2027
2028 rhs_code = gimple_assign_rhs_code (last_stmt);
2029 switch (rhs_code)
2030 {
2031 case LSHIFT_EXPR:
2032 case RSHIFT_EXPR:
2033 case LROTATE_EXPR:
2034 case RROTATE_EXPR:
2035 break;
2036 default:
2037 return NULL;
2038 }
2039
2040 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2041 return NULL;
2042
2043 lhs = gimple_assign_lhs (last_stmt);
2044 oprnd0 = gimple_assign_rhs1 (last_stmt);
2045 oprnd1 = gimple_assign_rhs2 (last_stmt);
2046 if (TREE_CODE (oprnd0) != SSA_NAME
2047 || TREE_CODE (oprnd1) != SSA_NAME
2048 || TYPE_MODE (TREE_TYPE (oprnd0)) == TYPE_MODE (TREE_TYPE (oprnd1))
2049 || TYPE_PRECISION (TREE_TYPE (oprnd1))
2050 != GET_MODE_PRECISION (TYPE_MODE (TREE_TYPE (oprnd1)))
2051 || TYPE_PRECISION (TREE_TYPE (lhs))
2052 != TYPE_PRECISION (TREE_TYPE (oprnd0)))
2053 return NULL;
2054
81c40241 2055 if (!vect_is_simple_use (oprnd1, vinfo, &def_stmt, &dt))
732a0ad3
JJ
2056 return NULL;
2057
2058 if (dt != vect_internal_def)
2059 return NULL;
2060
2061 *type_in = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
2062 *type_out = *type_in;
2063 if (*type_in == NULL_TREE)
2064 return NULL;
2065
81c40241 2066 tree def = NULL_TREE;
732a0ad3
JJ
2067 if (gimple_assign_cast_p (def_stmt))
2068 {
2069 tree rhs1 = gimple_assign_rhs1 (def_stmt);
2070 if (TYPE_MODE (TREE_TYPE (rhs1)) == TYPE_MODE (TREE_TYPE (oprnd0))
2071 && TYPE_PRECISION (TREE_TYPE (rhs1))
2072 == TYPE_PRECISION (TREE_TYPE (oprnd0)))
2073 def = rhs1;
2074 }
2075
2076 if (def == NULL_TREE)
2077 {
2078 def = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2079 def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd1);
083481d8 2080 new_pattern_def_seq (stmt_vinfo, def_stmt);
732a0ad3
JJ
2081 }
2082
2083 /* Pattern detected. */
73fbfcad 2084 if (dump_enabled_p ())
ccb3ad87 2085 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2086 "vect_recog_vector_vector_shift_pattern: detected:\n");
732a0ad3
JJ
2087
2088 /* Pattern supported. Create a stmt to be used to replace the pattern. */
2089 var = vect_recog_temp_ssa_var (TREE_TYPE (oprnd0), NULL);
0d0e4a03 2090 pattern_stmt = gimple_build_assign (var, rhs_code, oprnd0, def);
732a0ad3 2091
73fbfcad 2092 if (dump_enabled_p ())
78c60e3d 2093 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt, 0);
732a0ad3 2094
9771b263 2095 stmts->safe_push (last_stmt);
732a0ad3
JJ
2096 return pattern_stmt;
2097}
2098
47486460
VK
2099/* Detect multiplication by constant which are postive or negatives of power 2,
2100 and convert them to shift patterns.
2101
2102 Mult with constants that are postive power of two.
2103 type a_t;
2104 type b_t
2105 S1: b_t = a_t * n
2106
2107 or
2108
2109 Mult with constants that are negative power of two.
2110 S2: b_t = a_t * -n
2111
2112 Input/Output:
2113
2114 STMTS: Contains a stmt from which the pattern search begins,
2115 i.e. the mult stmt. Convert the mult operation to LSHIFT if
2116 constant operand is a power of 2.
2117 type a_t, b_t
2118 S1': b_t = a_t << log2 (n)
2119
2120 Convert the mult operation to LSHIFT and followed by a NEGATE
2121 if constant operand is a negative power of 2.
2122 type a_t, b_t, res_T;
2123 S2': b_t = a_t << log2 (n)
2124 S3': res_T = - (b_t)
2125
2126 Output:
2127
2128 * TYPE_IN: The type of the input arguments to the pattern.
2129
2130 * TYPE_OUT: The type of the output of this pattern.
2131
2132 * Return value: A new stmt that will be used to replace the multiplication
2133 S1 or S2 stmt. */
2134
355fe088
TS
2135static gimple *
2136vect_recog_mult_pattern (vec<gimple *> *stmts,
47486460
VK
2137 tree *type_in, tree *type_out)
2138{
355fe088 2139 gimple *last_stmt = stmts->pop ();
47486460 2140 tree oprnd0, oprnd1, vectype, itype;
355fe088 2141 gimple *pattern_stmt, *def_stmt;
47486460
VK
2142 optab optab;
2143 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
2144 int power2_val, power2_neg_val;
2145 tree shift;
2146
2147 if (!is_gimple_assign (last_stmt))
2148 return NULL;
2149
2150 if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
2151 return NULL;
2152
2153 oprnd0 = gimple_assign_rhs1 (last_stmt);
2154 oprnd1 = gimple_assign_rhs2 (last_stmt);
2155 itype = TREE_TYPE (oprnd0);
2156
2157 if (TREE_CODE (oprnd0) != SSA_NAME
2158 || TREE_CODE (oprnd1) != INTEGER_CST
2159 || !INTEGRAL_TYPE_P (itype)
2160 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
2161 return NULL;
2162
2163 vectype = get_vectype_for_scalar_type (itype);
2164 if (vectype == NULL_TREE)
2165 return NULL;
2166
2167 /* If the target can handle vectorized multiplication natively,
2168 don't attempt to optimize this. */
2169 optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
2170 if (optab != unknown_optab)
2171 {
2172 machine_mode vec_mode = TYPE_MODE (vectype);
2173 int icode = (int) optab_handler (optab, vec_mode);
2174 if (icode != CODE_FOR_nothing)
2175 return NULL;
2176 }
2177
2178 /* If target cannot handle vector left shift then we cannot
2179 optimize and bail out. */
2180 optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
2181 if (!optab
2182 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2183 return NULL;
2184
2185 power2_val = wi::exact_log2 (oprnd1);
2186 power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
2187
2188 /* Handle constant operands that are postive or negative powers of 2. */
2189 if (power2_val != -1)
2190 {
2191 shift = build_int_cst (itype, power2_val);
2192 pattern_stmt
2193 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2194 LSHIFT_EXPR, oprnd0, shift);
2195 }
2196 else if (power2_neg_val != -1)
2197 {
2198 /* If the target cannot handle vector NEGATE then we cannot
2199 do the optimization. */
2200 optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
2201 if (!optab
2202 || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
2203 return NULL;
2204
2205 shift = build_int_cst (itype, power2_neg_val);
2206 def_stmt
2207 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2208 LSHIFT_EXPR, oprnd0, shift);
2209 new_pattern_def_seq (stmt_vinfo, def_stmt);
2210 pattern_stmt
2211 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2212 NEGATE_EXPR, gimple_assign_lhs (def_stmt));
2213 }
2214 else
2215 return NULL;
2216
2217 /* Pattern detected. */
2218 if (dump_enabled_p ())
2219 dump_printf_loc (MSG_NOTE, vect_location,
2220 "vect_recog_mult_pattern: detected:\n");
2221
2222 if (dump_enabled_p ())
2223 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
2224 pattern_stmt,0);
2225
2226 stmts->safe_push (last_stmt);
2227 *type_in = vectype;
2228 *type_out = vectype;
2229
2230 return pattern_stmt;
2231}
2232
079c527f 2233/* Detect a signed division by a constant that wouldn't be
363477c0
JJ
2234 otherwise vectorized:
2235
2236 type a_t, b_t;
2237
2238 S1 a_t = b_t / N;
2239
079c527f 2240 where type 'type' is an integral type and N is a constant.
363477c0 2241
079c527f 2242 Similarly handle modulo by a constant:
363477c0
JJ
2243
2244 S4 a_t = b_t % N;
2245
2246 Input/Output:
2247
2248 * STMTS: Contains a stmt from which the pattern search begins,
079c527f
JJ
2249 i.e. the division stmt. S1 is replaced by if N is a power
2250 of two constant and type is signed:
363477c0
JJ
2251 S3 y_t = b_t < 0 ? N - 1 : 0;
2252 S2 x_t = b_t + y_t;
2253 S1' a_t = x_t >> log2 (N);
2254
079c527f
JJ
2255 S4 is replaced if N is a power of two constant and
2256 type is signed by (where *_T temporaries have unsigned type):
363477c0
JJ
2257 S9 y_T = b_t < 0 ? -1U : 0U;
2258 S8 z_T = y_T >> (sizeof (type_t) * CHAR_BIT - log2 (N));
2259 S7 z_t = (type) z_T;
2260 S6 w_t = b_t + z_t;
2261 S5 x_t = w_t & (N - 1);
2262 S4' a_t = x_t - z_t;
2263
2264 Output:
2265
2266 * TYPE_IN: The type of the input arguments to the pattern.
2267
2268 * TYPE_OUT: The type of the output of this pattern.
2269
2270 * Return value: A new stmt that will be used to replace the division
2271 S1 or modulo S4 stmt. */
2272
355fe088
TS
2273static gimple *
2274vect_recog_divmod_pattern (vec<gimple *> *stmts,
079c527f 2275 tree *type_in, tree *type_out)
363477c0 2276{
355fe088 2277 gimple *last_stmt = stmts->pop ();
5deb57cb 2278 tree oprnd0, oprnd1, vectype, itype, cond;
355fe088 2279 gimple *pattern_stmt, *def_stmt;
363477c0
JJ
2280 enum tree_code rhs_code;
2281 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
310213d4 2282 vec_info *vinfo = stmt_vinfo->vinfo;
363477c0 2283 optab optab;
00f07b86 2284 tree q;
079c527f 2285 int dummy_int, prec;
079c527f 2286 stmt_vec_info def_stmt_vinfo;
363477c0
JJ
2287
2288 if (!is_gimple_assign (last_stmt))
2289 return NULL;
2290
2291 rhs_code = gimple_assign_rhs_code (last_stmt);
2292 switch (rhs_code)
2293 {
2294 case TRUNC_DIV_EXPR:
2295 case TRUNC_MOD_EXPR:
2296 break;
2297 default:
2298 return NULL;
2299 }
2300
2301 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
2302 return NULL;
2303
2304 oprnd0 = gimple_assign_rhs1 (last_stmt);
2305 oprnd1 = gimple_assign_rhs2 (last_stmt);
2306 itype = TREE_TYPE (oprnd0);
2307 if (TREE_CODE (oprnd0) != SSA_NAME
2308 || TREE_CODE (oprnd1) != INTEGER_CST
2309 || TREE_CODE (itype) != INTEGER_TYPE
079c527f 2310 || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
363477c0
JJ
2311 return NULL;
2312
2313 vectype = get_vectype_for_scalar_type (itype);
2314 if (vectype == NULL_TREE)
2315 return NULL;
2316
2317 /* If the target can handle vectorized division or modulo natively,
2318 don't attempt to optimize this. */
2319 optab = optab_for_tree_code (rhs_code, vectype, optab_default);
2225b9f2 2320 if (optab != unknown_optab)
363477c0 2321 {
ef4bddc2 2322 machine_mode vec_mode = TYPE_MODE (vectype);
363477c0 2323 int icode = (int) optab_handler (optab, vec_mode);
e6d4f8f5 2324 if (icode != CODE_FOR_nothing)
363477c0
JJ
2325 return NULL;
2326 }
2327
079c527f
JJ
2328 prec = TYPE_PRECISION (itype);
2329 if (integer_pow2p (oprnd1))
363477c0 2330 {
079c527f
JJ
2331 if (TYPE_UNSIGNED (itype) || tree_int_cst_sgn (oprnd1) != 1)
2332 return NULL;
363477c0 2333
079c527f 2334 /* Pattern detected. */
73fbfcad 2335 if (dump_enabled_p ())
ccb3ad87 2336 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2337 "vect_recog_divmod_pattern: detected:\n");
079c527f
JJ
2338
2339 cond = build2 (LT_EXPR, boolean_type_node, oprnd0,
2340 build_int_cst (itype, 0));
2341 if (rhs_code == TRUNC_DIV_EXPR)
2342 {
2343 tree var = vect_recog_temp_ssa_var (itype, NULL);
2344 tree shift;
2345 def_stmt
0d0e4a03
JJ
2346 = gimple_build_assign (var, COND_EXPR, cond,
2347 fold_build2 (MINUS_EXPR, itype, oprnd1,
2348 build_int_cst (itype, 1)),
2349 build_int_cst (itype, 0));
079c527f
JJ
2350 new_pattern_def_seq (stmt_vinfo, def_stmt);
2351 var = vect_recog_temp_ssa_var (itype, NULL);
2352 def_stmt
0d0e4a03
JJ
2353 = gimple_build_assign (var, PLUS_EXPR, oprnd0,
2354 gimple_assign_lhs (def_stmt));
079c527f
JJ
2355 append_pattern_def_seq (stmt_vinfo, def_stmt);
2356
2357 shift = build_int_cst (itype, tree_log2 (oprnd1));
2358 pattern_stmt
0d0e4a03
JJ
2359 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2360 RSHIFT_EXPR, var, shift);
079c527f
JJ
2361 }
2362 else
2363 {
2364 tree signmask;
2365 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2366 if (compare_tree_int (oprnd1, 2) == 0)
2367 {
2368 signmask = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2369 def_stmt = gimple_build_assign (signmask, COND_EXPR, cond,
2370 build_int_cst (itype, 1),
2371 build_int_cst (itype, 0));
079c527f
JJ
2372 append_pattern_def_seq (stmt_vinfo, def_stmt);
2373 }
2374 else
2375 {
2376 tree utype
2377 = build_nonstandard_integer_type (prec, 1);
2378 tree vecutype = get_vectype_for_scalar_type (utype);
2379 tree shift
2380 = build_int_cst (utype, GET_MODE_BITSIZE (TYPE_MODE (itype))
2381 - tree_log2 (oprnd1));
2382 tree var = vect_recog_temp_ssa_var (utype, NULL);
2383
0d0e4a03
JJ
2384 def_stmt = gimple_build_assign (var, COND_EXPR, cond,
2385 build_int_cst (utype, -1),
2386 build_int_cst (utype, 0));
310213d4 2387 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
079c527f
JJ
2388 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2389 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2390 append_pattern_def_seq (stmt_vinfo, def_stmt);
2391 var = vect_recog_temp_ssa_var (utype, NULL);
0d0e4a03
JJ
2392 def_stmt = gimple_build_assign (var, RSHIFT_EXPR,
2393 gimple_assign_lhs (def_stmt),
2394 shift);
310213d4 2395 def_stmt_vinfo = new_stmt_vec_info (def_stmt, vinfo);
079c527f
JJ
2396 set_vinfo_for_stmt (def_stmt, def_stmt_vinfo);
2397 STMT_VINFO_VECTYPE (def_stmt_vinfo) = vecutype;
2398 append_pattern_def_seq (stmt_vinfo, def_stmt);
2399 signmask = vect_recog_temp_ssa_var (itype, NULL);
2400 def_stmt
0d0e4a03 2401 = gimple_build_assign (signmask, NOP_EXPR, var);
079c527f
JJ
2402 append_pattern_def_seq (stmt_vinfo, def_stmt);
2403 }
2404 def_stmt
0d0e4a03
JJ
2405 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2406 PLUS_EXPR, oprnd0, signmask);
079c527f
JJ
2407 append_pattern_def_seq (stmt_vinfo, def_stmt);
2408 def_stmt
0d0e4a03
JJ
2409 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2410 BIT_AND_EXPR, gimple_assign_lhs (def_stmt),
2411 fold_build2 (MINUS_EXPR, itype, oprnd1,
2412 build_int_cst (itype, 1)));
079c527f
JJ
2413 append_pattern_def_seq (stmt_vinfo, def_stmt);
2414
2415 pattern_stmt
0d0e4a03
JJ
2416 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2417 MINUS_EXPR, gimple_assign_lhs (def_stmt),
2418 signmask);
079c527f
JJ
2419 }
2420
73fbfcad 2421 if (dump_enabled_p ())
78c60e3d
SS
2422 dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
2423 0);
079c527f 2424
9771b263 2425 stmts->safe_push (last_stmt);
079c527f
JJ
2426
2427 *type_in = vectype;
2428 *type_out = vectype;
2429 return pattern_stmt;
363477c0 2430 }
079c527f 2431
6b58915b
RS
2432 if (prec > HOST_BITS_PER_WIDE_INT
2433 || integer_zerop (oprnd1))
079c527f
JJ
2434 return NULL;
2435
00f07b86
RH
2436 if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
2437 return NULL;
079c527f
JJ
2438
2439 STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
2440
2441 if (TYPE_UNSIGNED (itype))
363477c0 2442 {
079c527f
JJ
2443 unsigned HOST_WIDE_INT mh, ml;
2444 int pre_shift, post_shift;
6b58915b
RS
2445 unsigned HOST_WIDE_INT d = (TREE_INT_CST_LOW (oprnd1)
2446 & GET_MODE_MASK (TYPE_MODE (itype)));
5deb57cb 2447 tree t1, t2, t3, t4;
079c527f
JJ
2448
2449 if (d >= ((unsigned HOST_WIDE_INT) 1 << (prec - 1)))
2450 /* FIXME: Can transform this into oprnd0 >= oprnd1 ? 1 : 0. */
2451 return NULL;
2452
2453 /* Find a suitable multiplier and right shift count
2454 instead of multiplying with D. */
2455 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
2456
2457 /* If the suggested multiplier is more than SIZE bits, we can do better
2458 for even divisors, using an initial right shift. */
2459 if (mh != 0 && (d & 1) == 0)
363477c0 2460 {
079c527f
JJ
2461 pre_shift = floor_log2 (d & -d);
2462 mh = choose_multiplier (d >> pre_shift, prec, prec - pre_shift,
2463 &ml, &post_shift, &dummy_int);
2464 gcc_assert (!mh);
2465 }
2466 else
2467 pre_shift = 0;
2468
2469 if (mh != 0)
2470 {
2471 if (post_shift - 1 >= prec)
2472 return NULL;
2473
5deb57cb
JJ
2474 /* t1 = oprnd0 h* ml;
2475 t2 = oprnd0 - t1;
2476 t3 = t2 >> 1;
2477 t4 = t1 + t3;
2478 q = t4 >> (post_shift - 1); */
2479 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2480 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2481 build_int_cst (itype, ml));
079c527f 2482 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2483
5deb57cb 2484 t2 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2485 def_stmt
0d0e4a03 2486 = gimple_build_assign (t2, MINUS_EXPR, oprnd0, t1);
083481d8 2487 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f
JJ
2488
2489 t3 = vect_recog_temp_ssa_var (itype, NULL);
2490 def_stmt
0d0e4a03 2491 = gimple_build_assign (t3, RSHIFT_EXPR, t2, integer_one_node);
079c527f
JJ
2492 append_pattern_def_seq (stmt_vinfo, def_stmt);
2493
5deb57cb 2494 t4 = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2495 def_stmt
0d0e4a03 2496 = gimple_build_assign (t4, PLUS_EXPR, t1, t3);
079c527f
JJ
2497
2498 if (post_shift != 1)
2499 {
2500 append_pattern_def_seq (stmt_vinfo, def_stmt);
2501
5deb57cb 2502 q = vect_recog_temp_ssa_var (itype, NULL);
079c527f 2503 pattern_stmt
0d0e4a03
JJ
2504 = gimple_build_assign (q, RSHIFT_EXPR, t4,
2505 build_int_cst (itype, post_shift - 1));
079c527f
JJ
2506 }
2507 else
2508 {
5deb57cb 2509 q = t4;
079c527f
JJ
2510 pattern_stmt = def_stmt;
2511 }
363477c0
JJ
2512 }
2513 else
2514 {
079c527f
JJ
2515 if (pre_shift >= prec || post_shift >= prec)
2516 return NULL;
2517
2518 /* t1 = oprnd0 >> pre_shift;
5deb57cb
JJ
2519 t2 = t1 h* ml;
2520 q = t2 >> post_shift; */
079c527f
JJ
2521 if (pre_shift)
2522 {
2523 t1 = vect_recog_temp_ssa_var (itype, NULL);
2524 def_stmt
0d0e4a03
JJ
2525 = gimple_build_assign (t1, RSHIFT_EXPR, oprnd0,
2526 build_int_cst (NULL, pre_shift));
079c527f
JJ
2527 append_pattern_def_seq (stmt_vinfo, def_stmt);
2528 }
2529 else
2530 t1 = oprnd0;
363477c0 2531
5deb57cb 2532 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2533 def_stmt = gimple_build_assign (t2, MULT_HIGHPART_EXPR, t1,
2534 build_int_cst (itype, ml));
079c527f 2535
5deb57cb
JJ
2536 if (post_shift)
2537 {
2538 append_pattern_def_seq (stmt_vinfo, def_stmt);
079c527f 2539
5deb57cb
JJ
2540 q = vect_recog_temp_ssa_var (itype, NULL);
2541 def_stmt
0d0e4a03
JJ
2542 = gimple_build_assign (q, RSHIFT_EXPR, t2,
2543 build_int_cst (itype, post_shift));
5deb57cb
JJ
2544 }
2545 else
2546 q = t2;
2547
2548 pattern_stmt = def_stmt;
079c527f
JJ
2549 }
2550 }
2551 else
2552 {
2553 unsigned HOST_WIDE_INT ml;
4ee4c52c 2554 int post_shift;
6b58915b 2555 HOST_WIDE_INT d = TREE_INT_CST_LOW (oprnd1);
079c527f
JJ
2556 unsigned HOST_WIDE_INT abs_d;
2557 bool add = false;
5deb57cb 2558 tree t1, t2, t3, t4;
079c527f
JJ
2559
2560 /* Give up for -1. */
2561 if (d == -1)
2562 return NULL;
2563
079c527f
JJ
2564 /* Since d might be INT_MIN, we have to cast to
2565 unsigned HOST_WIDE_INT before negating to avoid
2566 undefined signed overflow. */
2567 abs_d = (d >= 0
2568 ? (unsigned HOST_WIDE_INT) d
2569 : - (unsigned HOST_WIDE_INT) d);
2570
2571 /* n rem d = n rem -d */
2572 if (rhs_code == TRUNC_MOD_EXPR && d < 0)
2573 {
2574 d = abs_d;
2575 oprnd1 = build_int_cst (itype, abs_d);
2576 }
2577 else if (HOST_BITS_PER_WIDE_INT >= prec
2578 && abs_d == (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2579 /* This case is not handled correctly below. */
2580 return NULL;
2581
4ee4c52c 2582 choose_multiplier (abs_d, prec, prec - 1, &ml, &post_shift, &dummy_int);
079c527f
JJ
2583 if (ml >= (unsigned HOST_WIDE_INT) 1 << (prec - 1))
2584 {
2585 add = true;
2586 ml |= (~(unsigned HOST_WIDE_INT) 0) << (prec - 1);
2587 }
2588 if (post_shift >= prec)
2589 return NULL;
2590
7abed779 2591 /* t1 = oprnd0 h* ml; */
5deb57cb 2592 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2593 def_stmt = gimple_build_assign (t1, MULT_HIGHPART_EXPR, oprnd0,
2594 build_int_cst (itype, ml));
079c527f
JJ
2595
2596 if (add)
2597 {
5deb57cb 2598 /* t2 = t1 + oprnd0; */
7abed779 2599 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2600 t2 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2601 def_stmt = gimple_build_assign (t2, PLUS_EXPR, t1, oprnd0);
079c527f
JJ
2602 }
2603 else
5deb57cb 2604 t2 = t1;
079c527f 2605
5deb57cb 2606 if (post_shift)
079c527f 2607 {
5deb57cb 2608 /* t3 = t2 >> post_shift; */
7abed779 2609 append_pattern_def_seq (stmt_vinfo, def_stmt);
5deb57cb 2610 t3 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2611 def_stmt = gimple_build_assign (t3, RSHIFT_EXPR, t2,
2612 build_int_cst (itype, post_shift));
363477c0 2613 }
079c527f 2614 else
5deb57cb 2615 t3 = t2;
079c527f 2616
807e902e 2617 wide_int oprnd0_min, oprnd0_max;
7abed779
JJ
2618 int msb = 1;
2619 if (get_range_info (oprnd0, &oprnd0_min, &oprnd0_max) == VR_RANGE)
2620 {
807e902e 2621 if (!wi::neg_p (oprnd0_min, TYPE_SIGN (itype)))
7abed779 2622 msb = 0;
807e902e 2623 else if (wi::neg_p (oprnd0_max, TYPE_SIGN (itype)))
7abed779
JJ
2624 msb = -1;
2625 }
079c527f 2626
7abed779
JJ
2627 if (msb == 0 && d >= 0)
2628 {
2629 /* q = t3; */
2630 q = t3;
2631 pattern_stmt = def_stmt;
2632 }
2633 else
2634 {
2635 /* t4 = oprnd0 >> (prec - 1);
2636 or if we know from VRP that oprnd0 >= 0
2637 t4 = 0;
2638 or if we know from VRP that oprnd0 < 0
2639 t4 = -1; */
2640 append_pattern_def_seq (stmt_vinfo, def_stmt);
2641 t4 = vect_recog_temp_ssa_var (itype, NULL);
2642 if (msb != 1)
0d0e4a03
JJ
2643 def_stmt = gimple_build_assign (t4, INTEGER_CST,
2644 build_int_cst (itype, msb));
7abed779 2645 else
0d0e4a03
JJ
2646 def_stmt = gimple_build_assign (t4, RSHIFT_EXPR, oprnd0,
2647 build_int_cst (itype, prec - 1));
7abed779
JJ
2648 append_pattern_def_seq (stmt_vinfo, def_stmt);
2649
2650 /* q = t3 - t4; or q = t4 - t3; */
2651 q = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03
JJ
2652 pattern_stmt = gimple_build_assign (q, MINUS_EXPR, d < 0 ? t4 : t3,
2653 d < 0 ? t3 : t4);
7abed779 2654 }
079c527f
JJ
2655 }
2656
2657 if (rhs_code == TRUNC_MOD_EXPR)
2658 {
2659 tree r, t1;
2660
2661 /* We divided. Now finish by:
2662 t1 = q * oprnd1;
2663 r = oprnd0 - t1; */
2664 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
2665
2666 t1 = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2667 def_stmt = gimple_build_assign (t1, MULT_EXPR, q, oprnd1);
083481d8 2668 append_pattern_def_seq (stmt_vinfo, def_stmt);
363477c0 2669
079c527f 2670 r = vect_recog_temp_ssa_var (itype, NULL);
0d0e4a03 2671 pattern_stmt = gimple_build_assign (r, MINUS_EXPR, oprnd0, t1);
363477c0
JJ
2672 }
2673
079c527f 2674 /* Pattern detected. */
73fbfcad 2675 if (dump_enabled_p ())
78c60e3d 2676 {
ccb3ad87 2677 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 2678 "vect_recog_divmod_pattern: detected: ");
ccb3ad87 2679 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
e645e942 2680 dump_printf (MSG_NOTE, "\n");
78c60e3d 2681 }
363477c0 2682
9771b263 2683 stmts->safe_push (last_stmt);
363477c0
JJ
2684
2685 *type_in = vectype;
2686 *type_out = vectype;
2687 return pattern_stmt;
2688}
2689
69d2aade
JJ
2690/* Function vect_recog_mixed_size_cond_pattern
2691
2692 Try to find the following pattern:
2693
2694 type x_t, y_t;
2695 TYPE a_T, b_T, c_T;
2696 loop:
2697 S1 a_T = x_t CMP y_t ? b_T : c_T;
2698
2699 where type 'TYPE' is an integral type which has different size
bc4fb355 2700 from 'type'. b_T and c_T are either constants (and if 'TYPE' is wider
69d2aade 2701 than 'type', the constants need to fit into an integer type
bc4fb355 2702 with the same width as 'type') or results of conversion from 'type'.
69d2aade
JJ
2703
2704 Input:
2705
2706 * LAST_STMT: A stmt from which the pattern search begins.
2707
2708 Output:
2709
2710 * TYPE_IN: The type of the input arguments to the pattern.
2711
2712 * TYPE_OUT: The type of the output of this pattern.
2713
2714 * Return value: A new stmt that will be used to replace the pattern.
2715 Additionally a def_stmt is added.
2716
2717 a_it = x_t CMP y_t ? b_it : c_it;
2718 a_T = (TYPE) a_it; */
2719
355fe088
TS
2720static gimple *
2721vect_recog_mixed_size_cond_pattern (vec<gimple *> *stmts, tree *type_in,
69d2aade
JJ
2722 tree *type_out)
2723{
355fe088 2724 gimple *last_stmt = (*stmts)[0];
69d2aade
JJ
2725 tree cond_expr, then_clause, else_clause;
2726 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt), def_stmt_info;
bc4fb355 2727 tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
355fe088 2728 gimple *pattern_stmt, *def_stmt;
310213d4 2729 vec_info *vinfo = stmt_vinfo->vinfo;
bc4fb355 2730 tree orig_type0 = NULL_TREE, orig_type1 = NULL_TREE;
355fe088 2731 gimple *def_stmt0 = NULL, *def_stmt1 = NULL;
bc4fb355
IR
2732 bool promotion;
2733 tree comp_scalar_type;
69d2aade
JJ
2734
2735 if (!is_gimple_assign (last_stmt)
2736 || gimple_assign_rhs_code (last_stmt) != COND_EXPR
2737 || STMT_VINFO_DEF_TYPE (stmt_vinfo) != vect_internal_def)
2738 return NULL;
2739
2740 cond_expr = gimple_assign_rhs1 (last_stmt);
2741 then_clause = gimple_assign_rhs2 (last_stmt);
2742 else_clause = gimple_assign_rhs3 (last_stmt);
2743
87aab9b2
JJ
2744 if (!COMPARISON_CLASS_P (cond_expr))
2745 return NULL;
2746
bc4fb355
IR
2747 comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
2748 comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
87aab9b2 2749 if (comp_vectype == NULL_TREE)
69d2aade
JJ
2750 return NULL;
2751
2752 type = gimple_expr_type (last_stmt);
bc4fb355
IR
2753 if (types_compatible_p (type, comp_scalar_type)
2754 || ((TREE_CODE (then_clause) != INTEGER_CST
2755 || TREE_CODE (else_clause) != INTEGER_CST)
2756 && !INTEGRAL_TYPE_P (comp_scalar_type))
2757 || !INTEGRAL_TYPE_P (type))
2758 return NULL;
2759
2760 if ((TREE_CODE (then_clause) != INTEGER_CST
2761 && !type_conversion_p (then_clause, last_stmt, false, &orig_type0,
2762 &def_stmt0, &promotion))
2763 || (TREE_CODE (else_clause) != INTEGER_CST
2764 && !type_conversion_p (else_clause, last_stmt, false, &orig_type1,
2765 &def_stmt1, &promotion)))
2766 return NULL;
2767
2768 if (orig_type0 && orig_type1
2769 && !types_compatible_p (orig_type0, orig_type1))
2770 return NULL;
2771
2772 if (orig_type0)
2773 {
2774 if (!types_compatible_p (orig_type0, comp_scalar_type))
2775 return NULL;
2776 then_clause = gimple_assign_rhs1 (def_stmt0);
2777 itype = orig_type0;
2778 }
2779
2780 if (orig_type1)
2781 {
2782 if (!types_compatible_p (orig_type1, comp_scalar_type))
2783 return NULL;
2784 else_clause = gimple_assign_rhs1 (def_stmt1);
2785 itype = orig_type1;
2786 }
2787
69d2aade 2788
6c825cd4
DS
2789 HOST_WIDE_INT cmp_mode_size
2790 = GET_MODE_UNIT_BITSIZE (TYPE_MODE (comp_vectype));
2791
2792 if (GET_MODE_BITSIZE (TYPE_MODE (type)) == cmp_mode_size)
69d2aade
JJ
2793 return NULL;
2794
2795 vectype = get_vectype_for_scalar_type (type);
2796 if (vectype == NULL_TREE)
2797 return NULL;
2798
2799 if (expand_vec_cond_expr_p (vectype, comp_vectype))
2800 return NULL;
2801
bc4fb355 2802 if (itype == NULL_TREE)
6c825cd4 2803 itype = build_nonstandard_integer_type (cmp_mode_size,
bc4fb355
IR
2804 TYPE_UNSIGNED (type));
2805
69d2aade 2806 if (itype == NULL_TREE
6c825cd4 2807 || GET_MODE_BITSIZE (TYPE_MODE (itype)) != cmp_mode_size)
69d2aade
JJ
2808 return NULL;
2809
2810 vecitype = get_vectype_for_scalar_type (itype);
2811 if (vecitype == NULL_TREE)
2812 return NULL;
2813
2814 if (!expand_vec_cond_expr_p (vecitype, comp_vectype))
2815 return NULL;
2816
6c825cd4 2817 if (GET_MODE_BITSIZE (TYPE_MODE (type)) > cmp_mode_size)
69d2aade 2818 {
bc4fb355
IR
2819 if ((TREE_CODE (then_clause) == INTEGER_CST
2820 && !int_fits_type_p (then_clause, itype))
2821 || (TREE_CODE (else_clause) == INTEGER_CST
2822 && !int_fits_type_p (else_clause, itype)))
69d2aade
JJ
2823 return NULL;
2824 }
2825
0d0e4a03
JJ
2826 def_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2827 COND_EXPR, unshare_expr (cond_expr),
2828 fold_convert (itype, then_clause),
2829 fold_convert (itype, else_clause));
2830 pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2831 NOP_EXPR, gimple_assign_lhs (def_stmt));
69d2aade 2832
083481d8 2833 new_pattern_def_seq (stmt_vinfo, def_stmt);
310213d4 2834 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
69d2aade
JJ
2835 set_vinfo_for_stmt (def_stmt, def_stmt_info);
2836 STMT_VINFO_VECTYPE (def_stmt_info) = vecitype;
2837 *type_in = vecitype;
2838 *type_out = vectype;
2839
73fbfcad 2840 if (dump_enabled_p ())
ccb3ad87 2841 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 2842 "vect_recog_mixed_size_cond_pattern: detected:\n");
f5709183 2843
69d2aade
JJ
2844 return pattern_stmt;
2845}
2846
2847
71c92d17 2848/* Helper function of vect_recog_bool_pattern. Called recursively, return
42fd8198
IE
2849 true if bool VAR can and should be optimized that way. Assume it shouldn't
2850 in case it's a result of a comparison which can be directly vectorized into
2851 a vector comparison. */
71c92d17
JJ
2852
2853static bool
310213d4 2854check_bool_pattern (tree var, vec_info *vinfo)
71c92d17 2855{
355fe088 2856 gimple *def_stmt;
71c92d17 2857 enum vect_def_type dt;
81c40241 2858 tree rhs1;
71c92d17
JJ
2859 enum tree_code rhs_code;
2860
81c40241 2861 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
71c92d17
JJ
2862 return false;
2863
2864 if (dt != vect_internal_def)
2865 return false;
2866
2867 if (!is_gimple_assign (def_stmt))
2868 return false;
2869
81c40241 2870 if (!has_single_use (var))
71c92d17
JJ
2871 return false;
2872
2873 rhs1 = gimple_assign_rhs1 (def_stmt);
2874 rhs_code = gimple_assign_rhs_code (def_stmt);
2875 switch (rhs_code)
2876 {
2877 case SSA_NAME:
310213d4 2878 return check_bool_pattern (rhs1, vinfo);
71c92d17
JJ
2879
2880 CASE_CONVERT:
2881 if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
2882 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
2883 && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
2884 return false;
310213d4 2885 return check_bool_pattern (rhs1, vinfo);
71c92d17
JJ
2886
2887 case BIT_NOT_EXPR:
310213d4 2888 return check_bool_pattern (rhs1, vinfo);
71c92d17
JJ
2889
2890 case BIT_AND_EXPR:
2891 case BIT_IOR_EXPR:
2892 case BIT_XOR_EXPR:
310213d4 2893 if (!check_bool_pattern (rhs1, vinfo))
71c92d17 2894 return false;
310213d4 2895 return check_bool_pattern (gimple_assign_rhs2 (def_stmt), vinfo);
71c92d17
JJ
2896
2897 default:
2898 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
2899 {
42fd8198 2900 tree vecitype, comp_vectype, mask_type;
71c92d17 2901
2f326699
JJ
2902 /* If the comparison can throw, then is_gimple_condexpr will be
2903 false and we can't make a COND_EXPR/VEC_COND_EXPR out of it. */
2904 if (stmt_could_throw_p (def_stmt))
2905 return false;
2906
71c92d17
JJ
2907 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
2908 if (comp_vectype == NULL_TREE)
2909 return false;
2910
42fd8198
IE
2911 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
2912 if (mask_type
2913 && expand_vec_cmp_expr_p (comp_vectype, mask_type))
2914 return false;
2915
71c92d17
JJ
2916 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
2917 {
ef4bddc2 2918 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 2919 tree itype
ab0ef706 2920 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
2921 vecitype = get_vectype_for_scalar_type (itype);
2922 if (vecitype == NULL_TREE)
2923 return false;
2924 }
2925 else
2926 vecitype = comp_vectype;
2927 return expand_vec_cond_expr_p (vecitype, comp_vectype);
2928 }
2929 return false;
2930 }
2931}
2932
2933
2934/* Helper function of adjust_bool_pattern. Add a cast to TYPE to a previous
2935 stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
363477c0 2936 to PATTERN_DEF_SEQ and adding a cast as RELATED_STMT. */
71c92d17
JJ
2937
2938static tree
2939adjust_bool_pattern_cast (tree type, tree var)
2940{
2941 stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
355fe088 2942 gimple *cast_stmt, *pattern_stmt;
71c92d17 2943
363477c0 2944 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo));
71c92d17 2945 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
083481d8 2946 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
0d0e4a03
JJ
2947 cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
2948 NOP_EXPR, gimple_assign_lhs (pattern_stmt));
71c92d17
JJ
2949 STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
2950 return gimple_assign_lhs (cast_stmt);
2951}
2952
2953
2954/* Helper function of vect_recog_bool_pattern. Do the actual transformations,
2955 recursively. VAR is an SSA_NAME that should be transformed from bool
2956 to a wider integer type, OUT_TYPE is the desired final integer type of
2957 the whole pattern, TRUEVAL should be NULL unless optimizing
2958 BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
2959 in the then_clause, STMTS is where statements with added pattern stmts
2960 should be pushed to. */
2961
2962static tree
2963adjust_bool_pattern (tree var, tree out_type, tree trueval,
355fe088 2964 vec<gimple *> *stmts)
71c92d17 2965{
355fe088 2966 gimple *stmt = SSA_NAME_DEF_STMT (var);
71c92d17
JJ
2967 enum tree_code rhs_code, def_rhs_code;
2968 tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
2969 location_t loc;
355fe088 2970 gimple *pattern_stmt, *def_stmt;
71c92d17
JJ
2971
2972 rhs1 = gimple_assign_rhs1 (stmt);
2973 rhs2 = gimple_assign_rhs2 (stmt);
2974 rhs_code = gimple_assign_rhs_code (stmt);
2975 loc = gimple_location (stmt);
2976 switch (rhs_code)
2977 {
2978 case SSA_NAME:
2979 CASE_CONVERT:
2980 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2981 itype = TREE_TYPE (irhs1);
2982 pattern_stmt
0d0e4a03
JJ
2983 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2984 SSA_NAME, irhs1);
71c92d17
JJ
2985 break;
2986
2987 case BIT_NOT_EXPR:
2988 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
2989 itype = TREE_TYPE (irhs1);
2990 pattern_stmt
0d0e4a03
JJ
2991 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
2992 BIT_XOR_EXPR, irhs1, build_int_cst (itype, 1));
71c92d17
JJ
2993 break;
2994
2995 case BIT_AND_EXPR:
2996 /* Try to optimize x = y & (a < b ? 1 : 0); into
2997 x = (a < b ? y : 0);
2998
2999 E.g. for:
3000 bool a_b, b_b, c_b;
3001 TYPE d_T;
3002
3003 S1 a_b = x1 CMP1 y1;
3004 S2 b_b = x2 CMP2 y2;
3005 S3 c_b = a_b & b_b;
3006 S4 d_T = (TYPE) c_b;
3007
3008 we would normally emit:
3009
3010 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3011 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3012 S3' c_T = a_T & b_T;
3013 S4' d_T = c_T;
3014
3015 but we can save one stmt by using the
3016 result of one of the COND_EXPRs in the other COND_EXPR and leave
3017 BIT_AND_EXPR stmt out:
3018
3019 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3020 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3021 S4' f_T = c_T;
3022
3023 At least when VEC_COND_EXPR is implemented using masks
3024 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
3025 computes the comparison masks and ands it, in one case with
3026 all ones vector, in the other case with a vector register.
3027 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
3028 often more expensive. */
3029 def_stmt = SSA_NAME_DEF_STMT (rhs2);
3030 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3031 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3032 {
3033 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3034 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3035 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3036 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3037 {
355fe088 3038 gimple *tstmt;
71c92d17
JJ
3039 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3040 irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
9771b263 3041 tstmt = stmts->pop ();
71c92d17 3042 gcc_assert (tstmt == def_stmt);
9771b263 3043 stmts->quick_push (stmt);
71c92d17
JJ
3044 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3045 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 3046 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
3047 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3048 return irhs2;
3049 }
3050 else
3051 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3052 goto and_ior_xor;
3053 }
3054 def_stmt = SSA_NAME_DEF_STMT (rhs1);
3055 def_rhs_code = gimple_assign_rhs_code (def_stmt);
3056 if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
3057 {
3058 tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
3059 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3060 if (TYPE_PRECISION (TREE_TYPE (irhs2))
3061 == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
3062 {
355fe088 3063 gimple *tstmt;
71c92d17
JJ
3064 stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
3065 irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
9771b263 3066 tstmt = stmts->pop ();
71c92d17 3067 gcc_assert (tstmt == def_stmt);
9771b263 3068 stmts->quick_push (stmt);
71c92d17
JJ
3069 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
3070 = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
363477c0 3071 gcc_assert (!STMT_VINFO_PATTERN_DEF_SEQ (stmt_def_vinfo));
71c92d17
JJ
3072 STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
3073 return irhs1;
3074 }
3075 else
3076 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3077 goto and_ior_xor;
3078 }
3079 /* FALLTHRU */
3080 case BIT_IOR_EXPR:
3081 case BIT_XOR_EXPR:
3082 irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
3083 irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
3084 and_ior_xor:
3085 if (TYPE_PRECISION (TREE_TYPE (irhs1))
3086 != TYPE_PRECISION (TREE_TYPE (irhs2)))
3087 {
3088 int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
3089 int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
3090 int out_prec = TYPE_PRECISION (out_type);
3091 if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
3092 irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
3093 else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
3094 irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
3095 else
3096 {
3097 irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
3098 irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
3099 }
3100 }
3101 itype = TREE_TYPE (irhs1);
3102 pattern_stmt
0d0e4a03
JJ
3103 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3104 rhs_code, irhs1, irhs2);
71c92d17
JJ
3105 break;
3106
3107 default:
3108 gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
3109 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
e6a21dd2
JJ
3110 || !TYPE_UNSIGNED (TREE_TYPE (rhs1))
3111 || (TYPE_PRECISION (TREE_TYPE (rhs1))
3112 != GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (rhs1)))))
71c92d17 3113 {
ef4bddc2 3114 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
71c92d17 3115 itype
ab0ef706 3116 = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
71c92d17
JJ
3117 }
3118 else
3119 itype = TREE_TYPE (rhs1);
3120 cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
3121 if (trueval == NULL_TREE)
3122 trueval = build_int_cst (itype, 1);
3123 else
3124 gcc_checking_assert (useless_type_conversion_p (itype,
3125 TREE_TYPE (trueval)));
3126 pattern_stmt
0d0e4a03
JJ
3127 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
3128 COND_EXPR, cond_expr, trueval,
3129 build_int_cst (itype, 0));
71c92d17
JJ
3130 break;
3131 }
3132
9771b263 3133 stmts->safe_push (stmt);
71c92d17
JJ
3134 gimple_set_location (pattern_stmt, loc);
3135 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
3136 return gimple_assign_lhs (pattern_stmt);
3137}
3138
3139
42fd8198
IE
3140/* Return the proper type for converting bool VAR into
3141 an integer value or NULL_TREE if no such type exists.
3142 The type is chosen so that converted value has the
3143 same number of elements as VAR's vector type. */
3144
3145static tree
3146search_type_for_mask (tree var, vec_info *vinfo)
3147{
3148 gimple *def_stmt;
3149 enum vect_def_type dt;
3150 tree rhs1;
3151 enum tree_code rhs_code;
e6f5c25d 3152 tree res = NULL_TREE, res2;
42fd8198
IE
3153
3154 if (TREE_CODE (var) != SSA_NAME)
3155 return NULL_TREE;
3156
3157 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3158 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3159 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3160 return NULL_TREE;
3161
3162 if (!vect_is_simple_use (var, vinfo, &def_stmt, &dt))
3163 return NULL_TREE;
3164
3165 if (dt != vect_internal_def)
3166 return NULL_TREE;
3167
3168 if (!is_gimple_assign (def_stmt))
3169 return NULL_TREE;
3170
3171 rhs_code = gimple_assign_rhs_code (def_stmt);
3172 rhs1 = gimple_assign_rhs1 (def_stmt);
3173
3174 switch (rhs_code)
3175 {
3176 case SSA_NAME:
3177 case BIT_NOT_EXPR:
3178 CASE_CONVERT:
3179 res = search_type_for_mask (rhs1, vinfo);
3180 break;
3181
3182 case BIT_AND_EXPR:
3183 case BIT_IOR_EXPR:
3184 case BIT_XOR_EXPR:
e6f5c25d
IE
3185 res = search_type_for_mask (rhs1, vinfo);
3186 res2 = search_type_for_mask (gimple_assign_rhs2 (def_stmt), vinfo);
3187 if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
3188 res = res2;
42fd8198
IE
3189 break;
3190
3191 default:
3192 if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
3193 {
e6f5c25d
IE
3194 tree comp_vectype, mask_type;
3195
3196 comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
3197 if (comp_vectype == NULL_TREE)
3198 return NULL_TREE;
3199
3200 mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
3201 if (!mask_type
3202 || !expand_vec_cmp_expr_p (comp_vectype, mask_type))
3203 return NULL_TREE;
3204
42fd8198
IE
3205 if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
3206 || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
3207 {
3208 machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
3209 res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
3210 }
3211 else
3212 res = TREE_TYPE (rhs1);
3213 }
3214 }
3215
3216 return res;
3217}
3218
3219
71c92d17
JJ
3220/* Function vect_recog_bool_pattern
3221
3222 Try to find pattern like following:
3223
3224 bool a_b, b_b, c_b, d_b, e_b;
3225 TYPE f_T;
3226 loop:
3227 S1 a_b = x1 CMP1 y1;
3228 S2 b_b = x2 CMP2 y2;
3229 S3 c_b = a_b & b_b;
3230 S4 d_b = x3 CMP3 y3;
3231 S5 e_b = c_b | d_b;
3232 S6 f_T = (TYPE) e_b;
3233
52264dbf
RB
3234 where type 'TYPE' is an integral type. Or a similar pattern
3235 ending in
3236
3237 S6 f_Y = e_b ? r_Y : s_Y;
3238
3239 as results from if-conversion of a complex condition.
71c92d17
JJ
3240
3241 Input:
3242
3243 * LAST_STMT: A stmt at the end from which the pattern
3244 search begins, i.e. cast of a bool to
3245 an integer type.
3246
3247 Output:
3248
3249 * TYPE_IN: The type of the input arguments to the pattern.
3250
3251 * TYPE_OUT: The type of the output of this pattern.
3252
3253 * Return value: A new stmt that will be used to replace the pattern.
3254
3255 Assuming size of TYPE is the same as size of all comparisons
3256 (otherwise some casts would be added where needed), the above
3257 sequence we create related pattern stmts:
3258 S1' a_T = x1 CMP1 y1 ? 1 : 0;
3259 S3' c_T = x2 CMP2 y2 ? a_T : 0;
3260 S4' d_T = x3 CMP3 y3 ? 1 : 0;
3261 S5' e_T = c_T | d_T;
3262 S6' f_T = e_T;
3263
3264 Instead of the above S3' we could emit:
3265 S2' b_T = x2 CMP2 y2 ? 1 : 0;
3266 S3' c_T = a_T | b_T;
3267 but the above is more efficient. */
3268
355fe088
TS
3269static gimple *
3270vect_recog_bool_pattern (vec<gimple *> *stmts, tree *type_in,
71c92d17
JJ
3271 tree *type_out)
3272{
355fe088 3273 gimple *last_stmt = stmts->pop ();
71c92d17
JJ
3274 enum tree_code rhs_code;
3275 tree var, lhs, rhs, vectype;
3276 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
42fd8198 3277 stmt_vec_info new_stmt_info;
310213d4 3278 vec_info *vinfo = stmt_vinfo->vinfo;
355fe088 3279 gimple *pattern_stmt;
71c92d17
JJ
3280
3281 if (!is_gimple_assign (last_stmt))
3282 return NULL;
3283
3284 var = gimple_assign_rhs1 (last_stmt);
3285 lhs = gimple_assign_lhs (last_stmt);
3286
3287 if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
3288 || !TYPE_UNSIGNED (TREE_TYPE (var)))
3289 && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
3290 return NULL;
3291
3292 rhs_code = gimple_assign_rhs_code (last_stmt);
3293 if (CONVERT_EXPR_CODE_P (rhs_code))
3294 {
78048b1c
JJ
3295 if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
3296 || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
71c92d17
JJ
3297 return NULL;
3298 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3299 if (vectype == NULL_TREE)
3300 return NULL;
3301
42fd8198
IE
3302 if (check_bool_pattern (var, vinfo))
3303 {
3304 rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
3305 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3306 if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3307 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
3308 else
3309 pattern_stmt
3310 = gimple_build_assign (lhs, NOP_EXPR, rhs);
3311 }
71c92d17 3312 else
42fd8198
IE
3313 {
3314 tree type = search_type_for_mask (var, vinfo);
a414c77f 3315 tree cst0, cst1, tmp;
42fd8198
IE
3316
3317 if (!type)
3318 return NULL;
3319
3320 /* We may directly use cond with narrowed type to avoid
3321 multiple cond exprs with following result packing and
3322 perform single cond with packed mask instead. In case
3323 of widening we better make cond first and then extract
3324 results. */
3325 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (lhs)))
3326 type = TREE_TYPE (lhs);
3327
3328 cst0 = build_int_cst (type, 0);
3329 cst1 = build_int_cst (type, 1);
3330 tmp = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3331 pattern_stmt = gimple_build_assign (tmp, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3332
3333 if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
3334 {
3335 tree new_vectype = get_vectype_for_scalar_type (type);
3336 new_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3337 set_vinfo_for_stmt (pattern_stmt, new_stmt_info);
3338 STMT_VINFO_VECTYPE (new_stmt_info) = new_vectype;
3339 new_pattern_def_seq (stmt_vinfo, pattern_stmt);
3340
3341 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3342 pattern_stmt = gimple_build_assign (lhs, CONVERT_EXPR, tmp);
3343 }
3344 }
3345
71c92d17
JJ
3346 *type_out = vectype;
3347 *type_in = vectype;
9771b263 3348 stmts->safe_push (last_stmt);
73fbfcad 3349 if (dump_enabled_p ())
ccb3ad87 3350 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3351 "vect_recog_bool_pattern: detected:\n");
f5709183 3352
52264dbf
RB
3353 return pattern_stmt;
3354 }
3355 else if (rhs_code == COND_EXPR
3356 && TREE_CODE (var) == SSA_NAME)
3357 {
3358 vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3359 if (vectype == NULL_TREE)
3360 return NULL;
3361
3362 /* Build a scalar type for the boolean result that when
3363 vectorized matches the vector type of the result in
3364 size and number of elements. */
3365 unsigned prec
3366 = wi::udiv_trunc (TYPE_SIZE (vectype),
3367 TYPE_VECTOR_SUBPARTS (vectype)).to_uhwi ();
3368 tree type
3369 = build_nonstandard_integer_type (prec,
3370 TYPE_UNSIGNED (TREE_TYPE (var)));
3371 if (get_vectype_for_scalar_type (type) == NULL_TREE)
3372 return NULL;
3373
a414c77f
IE
3374 if (!check_bool_pattern (var, vinfo))
3375 return NULL;
3376
3377 rhs = adjust_bool_pattern (var, type, NULL_TREE, stmts);
52264dbf 3378
52264dbf
RB
3379 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3380 pattern_stmt
a414c77f
IE
3381 = gimple_build_assign (lhs, COND_EXPR,
3382 build2 (NE_EXPR, boolean_type_node,
3383 rhs, build_int_cst (type, 0)),
0d0e4a03
JJ
3384 gimple_assign_rhs2 (last_stmt),
3385 gimple_assign_rhs3 (last_stmt));
52264dbf
RB
3386 *type_out = vectype;
3387 *type_in = vectype;
3388 stmts->safe_push (last_stmt);
3389 if (dump_enabled_p ())
3390 dump_printf_loc (MSG_NOTE, vect_location,
3391 "vect_recog_bool_pattern: detected:\n");
3392
71c92d17
JJ
3393 return pattern_stmt;
3394 }
ab0ef706
JJ
3395 else if (rhs_code == SSA_NAME
3396 && STMT_VINFO_DATA_REF (stmt_vinfo))
3397 {
3398 stmt_vec_info pattern_stmt_info;
3399 vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
3400 gcc_assert (vectype != NULL_TREE);
78336739
JJ
3401 if (!VECTOR_MODE_P (TYPE_MODE (vectype)))
3402 return NULL;
ab0ef706 3403
42fd8198
IE
3404 if (check_bool_pattern (var, vinfo))
3405 rhs = adjust_bool_pattern (var, TREE_TYPE (vectype),
3406 NULL_TREE, stmts);
3407 else
3408 {
3409 tree type = search_type_for_mask (var, vinfo);
a414c77f 3410 tree cst0, cst1, new_vectype;
42fd8198
IE
3411
3412 if (!type)
3413 return NULL;
3414
3415 if (TYPE_MODE (type) == TYPE_MODE (TREE_TYPE (vectype)))
3416 type = TREE_TYPE (vectype);
3417
3418 cst0 = build_int_cst (type, 0);
3419 cst1 = build_int_cst (type, 1);
3420 new_vectype = get_vectype_for_scalar_type (type);
3421
3422 rhs = vect_recog_temp_ssa_var (type, NULL);
a414c77f 3423 pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
42fd8198
IE
3424
3425 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3426 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3427 STMT_VINFO_VECTYPE (pattern_stmt_info) = new_vectype;
3428 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3429 }
3430
ab0ef706
JJ
3431 lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (vectype), lhs);
3432 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
3433 {
3434 tree rhs2 = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
355fe088 3435 gimple *cast_stmt = gimple_build_assign (rhs2, NOP_EXPR, rhs);
42fd8198 3436 append_pattern_def_seq (stmt_vinfo, cast_stmt);
ab0ef706
JJ
3437 rhs = rhs2;
3438 }
0d0e4a03 3439 pattern_stmt = gimple_build_assign (lhs, SSA_NAME, rhs);
310213d4 3440 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
ab0ef706
JJ
3441 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3442 STMT_VINFO_DATA_REF (pattern_stmt_info)
3443 = STMT_VINFO_DATA_REF (stmt_vinfo);
3444 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3445 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3446 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3447 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3448 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3449 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3450 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3451 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
78048b1c 3452 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
ab0ef706
JJ
3453 *type_out = vectype;
3454 *type_in = vectype;
9771b263 3455 stmts->safe_push (last_stmt);
73fbfcad 3456 if (dump_enabled_p ())
ccb3ad87 3457 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3458 "vect_recog_bool_pattern: detected:\n");
ab0ef706
JJ
3459 return pattern_stmt;
3460 }
71c92d17
JJ
3461 else
3462 return NULL;
3463}
3464
3465
e6f5c25d
IE
3466/* A helper for vect_recog_mask_conversion_pattern. Build
3467 conversion of MASK to a type suitable for masking VECTYPE.
3468 Built statement gets required vectype and is appended to
3469 a pattern sequence of STMT_VINFO.
3470
3471 Return converted mask. */
3472
3473static tree
3474build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo,
3475 vec_info *vinfo)
3476{
3477 gimple *stmt;
3478 tree masktype, tmp;
3479 stmt_vec_info new_stmt_info;
3480
3481 masktype = build_same_sized_truth_vector_type (vectype);
3482 tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
3483 stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
3484 new_stmt_info = new_stmt_vec_info (stmt, vinfo);
3485 set_vinfo_for_stmt (stmt, new_stmt_info);
3486 STMT_VINFO_VECTYPE (new_stmt_info) = masktype;
3487 append_pattern_def_seq (stmt_vinfo, stmt);
3488
3489 return tmp;
3490}
3491
3492
3493/* Function vect_recog_mask_conversion_pattern
3494
3495 Try to find statements which require boolean type
3496 converison. Additional conversion statements are
3497 added to handle such cases. For example:
3498
3499 bool m_1, m_2, m_3;
3500 int i_4, i_5;
3501 double d_6, d_7;
3502 char c_1, c_2, c_3;
3503
3504 S1 m_1 = i_4 > i_5;
3505 S2 m_2 = d_6 < d_7;
3506 S3 m_3 = m_1 & m_2;
3507 S4 c_1 = m_3 ? c_2 : c_3;
3508
3509 Will be transformed into:
3510
3511 S1 m_1 = i_4 > i_5;
3512 S2 m_2 = d_6 < d_7;
3513 S3'' m_2' = (_Bool[bitsize=32])m_2
3514 S3' m_3' = m_1 & m_2';
3515 S4'' m_3'' = (_Bool[bitsize=8])m_3'
3516 S4' c_1' = m_3'' ? c_2 : c_3; */
3517
3518static gimple *
3519vect_recog_mask_conversion_pattern (vec<gimple *> *stmts, tree *type_in,
3520 tree *type_out)
3521{
3522 gimple *last_stmt = stmts->pop ();
3523 enum tree_code rhs_code;
3524 tree lhs, rhs1, rhs2, tmp, rhs1_type, rhs2_type, vectype1, vectype2;
3525 stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
3526 stmt_vec_info pattern_stmt_info;
3527 vec_info *vinfo = stmt_vinfo->vinfo;
3528 gimple *pattern_stmt;
3529
3530 /* Check for MASK_LOAD ans MASK_STORE calls requiring mask conversion. */
3531 if (is_gimple_call (last_stmt)
3532 && gimple_call_internal_p (last_stmt)
3533 && (gimple_call_internal_fn (last_stmt) == IFN_MASK_STORE
3534 || gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD))
3535 {
3536 bool load = (gimple_call_internal_fn (last_stmt) == IFN_MASK_LOAD);
3537
3538 if (load)
3539 {
3540 lhs = gimple_call_lhs (last_stmt);
3541 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3542 }
3543 else
3544 {
3545 rhs2 = gimple_call_arg (last_stmt, 3);
3546 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs2));
3547 }
3548
3549 rhs1 = gimple_call_arg (last_stmt, 2);
3550 rhs1_type = search_type_for_mask (rhs1, vinfo);
3551 if (!rhs1_type)
3552 return NULL;
3553 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3554
3555 if (!vectype1 || !vectype2
3556 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3557 return NULL;
3558
3559 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3560
3561 if (load)
3562 {
3563 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3564 pattern_stmt
3565 = gimple_build_call_internal (IFN_MASK_LOAD, 3,
3566 gimple_call_arg (last_stmt, 0),
3567 gimple_call_arg (last_stmt, 1),
3568 tmp);
3569 gimple_call_set_lhs (pattern_stmt, lhs);
3570 }
3571 else
3572 pattern_stmt
3573 = gimple_build_call_internal (IFN_MASK_STORE, 4,
3574 gimple_call_arg (last_stmt, 0),
3575 gimple_call_arg (last_stmt, 1),
3576 tmp,
3577 gimple_call_arg (last_stmt, 3));
3578
3579
3580 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3581 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3582 STMT_VINFO_DATA_REF (pattern_stmt_info)
3583 = STMT_VINFO_DATA_REF (stmt_vinfo);
3584 STMT_VINFO_DR_BASE_ADDRESS (pattern_stmt_info)
3585 = STMT_VINFO_DR_BASE_ADDRESS (stmt_vinfo);
3586 STMT_VINFO_DR_INIT (pattern_stmt_info) = STMT_VINFO_DR_INIT (stmt_vinfo);
3587 STMT_VINFO_DR_OFFSET (pattern_stmt_info)
3588 = STMT_VINFO_DR_OFFSET (stmt_vinfo);
3589 STMT_VINFO_DR_STEP (pattern_stmt_info) = STMT_VINFO_DR_STEP (stmt_vinfo);
3590 STMT_VINFO_DR_ALIGNED_TO (pattern_stmt_info)
3591 = STMT_VINFO_DR_ALIGNED_TO (stmt_vinfo);
3592 DR_STMT (STMT_VINFO_DATA_REF (stmt_vinfo)) = pattern_stmt;
3593
3594 *type_out = vectype1;
3595 *type_in = vectype1;
3596 stmts->safe_push (last_stmt);
3597 if (dump_enabled_p ())
3598 dump_printf_loc (MSG_NOTE, vect_location,
3599 "vect_recog_mask_conversion_pattern: detected:\n");
3600
3601 return pattern_stmt;
3602 }
3603
3604 if (!is_gimple_assign (last_stmt))
3605 return NULL;
3606
3607 lhs = gimple_assign_lhs (last_stmt);
3608 rhs1 = gimple_assign_rhs1 (last_stmt);
3609 rhs_code = gimple_assign_rhs_code (last_stmt);
3610
3611 /* Check for cond expression requiring mask conversion. */
3612 if (rhs_code == COND_EXPR)
3613 {
3614 /* vect_recog_mixed_size_cond_pattern could apply.
3615 Do nothing then. */
3616 if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
3617 return NULL;
3618
3619 vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
3620
3621 if (TREE_CODE (rhs1) == SSA_NAME)
3622 {
3623 rhs1_type = search_type_for_mask (rhs1, vinfo);
3624 if (!rhs1_type)
3625 return NULL;
3626 }
3627 else
3628 rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
3629
3630 vectype2 = get_mask_type_for_scalar_type (rhs1_type);
3631
3632 if (!vectype1 || !vectype2
3633 || TYPE_VECTOR_SUBPARTS (vectype1) == TYPE_VECTOR_SUBPARTS (vectype2))
3634 return NULL;
3635
3636 /* If rhs1 is a comparison we need to move it into a
3637 separate statement. */
3638 if (TREE_CODE (rhs1) != SSA_NAME)
3639 {
3640 tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
3641 pattern_stmt = gimple_build_assign (tmp, rhs1);
3642 rhs1 = tmp;
3643
3644 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
3645 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3646 STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype2;
3647 append_pattern_def_seq (stmt_vinfo, pattern_stmt);
3648 }
3649
3650 tmp = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3651
3652 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3653 pattern_stmt = gimple_build_assign (lhs, COND_EXPR, tmp,
3654 gimple_assign_rhs2 (last_stmt),
3655 gimple_assign_rhs3 (last_stmt));
3656
3657 *type_out = vectype1;
3658 *type_in = vectype1;
3659 stmts->safe_push (last_stmt);
3660 if (dump_enabled_p ())
3661 dump_printf_loc (MSG_NOTE, vect_location,
3662 "vect_recog_mask_conversion_pattern: detected:\n");
3663
3664 return pattern_stmt;
3665 }
3666
3667 /* Now check for binary boolean operations requiring conversion for
3668 one of operands. */
3669 if (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
3670 return NULL;
3671
3672 if (rhs_code != BIT_IOR_EXPR
3673 && rhs_code != BIT_XOR_EXPR
3674 && rhs_code != BIT_AND_EXPR)
3675 return NULL;
3676
3677 rhs2 = gimple_assign_rhs2 (last_stmt);
3678
3679 rhs1_type = search_type_for_mask (rhs1, vinfo);
3680 rhs2_type = search_type_for_mask (rhs2, vinfo);
3681
3682 if (!rhs1_type || !rhs2_type
3683 || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
3684 return NULL;
3685
3686 if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
3687 {
3688 vectype1 = get_mask_type_for_scalar_type (rhs1_type);
3689 if (!vectype1)
3690 return NULL;
3691 rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo, vinfo);
3692 }
3693 else
3694 {
3695 vectype1 = get_mask_type_for_scalar_type (rhs2_type);
3696 if (!vectype1)
3697 return NULL;
3698 rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo, vinfo);
3699 }
3700
3701 lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
3702 pattern_stmt = gimple_build_assign (lhs, rhs_code, rhs1, rhs2);
3703
3704 *type_out = vectype1;
3705 *type_in = vectype1;
3706 stmts->safe_push (last_stmt);
3707 if (dump_enabled_p ())
3708 dump_printf_loc (MSG_NOTE, vect_location,
3709 "vect_recog_mask_conversion_pattern: detected:\n");
3710
3711 return pattern_stmt;
3712}
3713
3714
1107f3ae
IR
3715/* Mark statements that are involved in a pattern. */
3716
3717static inline void
355fe088 3718vect_mark_pattern_stmts (gimple *orig_stmt, gimple *pattern_stmt,
1107f3ae
IR
3719 tree pattern_vectype)
3720{
3721 stmt_vec_info pattern_stmt_info, def_stmt_info;
3722 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt);
310213d4 3723 vec_info *vinfo = orig_stmt_info->vinfo;
355fe088 3724 gimple *def_stmt;
1107f3ae 3725
1107f3ae 3726 pattern_stmt_info = vinfo_for_stmt (pattern_stmt);
ab0ef706
JJ
3727 if (pattern_stmt_info == NULL)
3728 {
310213d4 3729 pattern_stmt_info = new_stmt_vec_info (pattern_stmt, vinfo);
ab0ef706
JJ
3730 set_vinfo_for_stmt (pattern_stmt, pattern_stmt_info);
3731 }
3732 gimple_set_bb (pattern_stmt, gimple_bb (orig_stmt));
1107f3ae
IR
3733
3734 STMT_VINFO_RELATED_STMT (pattern_stmt_info) = orig_stmt;
3735 STMT_VINFO_DEF_TYPE (pattern_stmt_info)
ab0ef706 3736 = STMT_VINFO_DEF_TYPE (orig_stmt_info);
1107f3ae
IR
3737 STMT_VINFO_VECTYPE (pattern_stmt_info) = pattern_vectype;
3738 STMT_VINFO_IN_PATTERN_P (orig_stmt_info) = true;
3739 STMT_VINFO_RELATED_STMT (orig_stmt_info) = pattern_stmt;
363477c0
JJ
3740 STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info)
3741 = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
3742 if (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info))
1107f3ae 3743 {
363477c0
JJ
3744 gimple_stmt_iterator si;
3745 for (si = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (pattern_stmt_info));
3746 !gsi_end_p (si); gsi_next (&si))
69d2aade 3747 {
363477c0
JJ
3748 def_stmt = gsi_stmt (si);
3749 def_stmt_info = vinfo_for_stmt (def_stmt);
3750 if (def_stmt_info == NULL)
3751 {
310213d4 3752 def_stmt_info = new_stmt_vec_info (def_stmt, vinfo);
363477c0
JJ
3753 set_vinfo_for_stmt (def_stmt, def_stmt_info);
3754 }
3755 gimple_set_bb (def_stmt, gimple_bb (orig_stmt));
3756 STMT_VINFO_RELATED_STMT (def_stmt_info) = orig_stmt;
56f8faae 3757 STMT_VINFO_DEF_TYPE (def_stmt_info) = vect_internal_def;
363477c0
JJ
3758 if (STMT_VINFO_VECTYPE (def_stmt_info) == NULL_TREE)
3759 STMT_VINFO_VECTYPE (def_stmt_info) = pattern_vectype;
69d2aade 3760 }
1107f3ae
IR
3761 }
3762}
3763
b8698a0f 3764/* Function vect_pattern_recog_1
20f06221
DN
3765
3766 Input:
3767 PATTERN_RECOG_FUNC: A pointer to a function that detects a certain
3768 computation pattern.
3769 STMT: A stmt from which the pattern search should start.
3770
3771 If PATTERN_RECOG_FUNC successfully detected the pattern, it creates an
b8698a0f
L
3772 expression that computes the same functionality and can be used to
3773 replace the sequence of stmts that are involved in the pattern.
20f06221
DN
3774
3775 Output:
b8698a0f
L
3776 This function checks if the expression returned by PATTERN_RECOG_FUNC is
3777 supported in vector form by the target. We use 'TYPE_IN' to obtain the
3778 relevant vector type. If 'TYPE_IN' is already a vector type, then this
20f06221
DN
3779 indicates that target support had already been checked by PATTERN_RECOG_FUNC.
3780 If 'TYPE_OUT' is also returned by PATTERN_RECOG_FUNC, we check that it fits
3781 to the available target pattern.
3782
b8698a0f 3783 This function also does some bookkeeping, as explained in the documentation
20f06221
DN
3784 for vect_recog_pattern. */
3785
3786static void
92aea285
JJ
3787vect_pattern_recog_1 (vect_recog_func_ptr vect_recog_func,
3788 gimple_stmt_iterator si,
355fe088 3789 vec<gimple *> *stmts_to_replace)
20f06221 3790{
355fe088 3791 gimple *stmt = gsi_stmt (si), *pattern_stmt;
383d9c83 3792 stmt_vec_info stmt_info;
383d9c83 3793 loop_vec_info loop_vinfo;
20f06221
DN
3794 tree pattern_vectype;
3795 tree type_in, type_out;
20f06221 3796 enum tree_code code;
b5aeb3bb 3797 int i;
355fe088 3798 gimple *next;
20f06221 3799
9771b263
DN
3800 stmts_to_replace->truncate (0);
3801 stmts_to_replace->quick_push (stmt);
d1fc143d 3802 pattern_stmt = (* vect_recog_func) (stmts_to_replace, &type_in, &type_out);
726a989a 3803 if (!pattern_stmt)
b8698a0f
L
3804 return;
3805
9771b263 3806 stmt = stmts_to_replace->last ();
383d9c83
IR
3807 stmt_info = vinfo_for_stmt (stmt);
3808 loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3809
e6f5c25d
IE
3810 if (VECTOR_BOOLEAN_TYPE_P (type_in)
3811 || VECTOR_MODE_P (TYPE_MODE (type_in)))
b8698a0f
L
3812 {
3813 /* No need to check target support (already checked by the pattern
3814 recognition function). */
b690cc0f 3815 pattern_vectype = type_out ? type_out : type_in;
20f06221
DN
3816 }
3817 else
3818 {
ef4bddc2 3819 machine_mode vec_mode;
20f06221
DN
3820 enum insn_code icode;
3821 optab optab;
3822
3823 /* Check target support */
b690cc0f
RG
3824 type_in = get_vectype_for_scalar_type (type_in);
3825 if (!type_in)
3826 return;
3827 if (type_out)
3828 type_out = get_vectype_for_scalar_type (type_out);
3829 else
3830 type_out = type_in;
15bbc165
AO
3831 if (!type_out)
3832 return;
b690cc0f 3833 pattern_vectype = type_out;
03d3e953 3834
726a989a
RB
3835 if (is_gimple_assign (pattern_stmt))
3836 code = gimple_assign_rhs_code (pattern_stmt);
3837 else
3838 {
3839 gcc_assert (is_gimple_call (pattern_stmt));
3840 code = CALL_EXPR;
3841 }
3842
b690cc0f
RG
3843 optab = optab_for_tree_code (code, type_in, optab_default);
3844 vec_mode = TYPE_MODE (type_in);
20f06221 3845 if (!optab
947131ba 3846 || (icode = optab_handler (optab, vec_mode)) == CODE_FOR_nothing
b690cc0f 3847 || (insn_data[icode].operand[0].mode != TYPE_MODE (type_out)))
20f06221
DN
3848 return;
3849 }
3850
3851 /* Found a vectorizable pattern. */
73fbfcad 3852 if (dump_enabled_p ())
20f06221 3853 {
ccb3ad87 3854 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3855 "pattern recognized: ");
ccb3ad87 3856 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
20f06221 3857 }
b8698a0f 3858
726a989a 3859 /* Mark the stmts that are involved in the pattern. */
1107f3ae 3860 vect_mark_pattern_stmts (stmt, pattern_stmt, pattern_vectype);
20f06221 3861
b5aeb3bb
IR
3862 /* Patterns cannot be vectorized using SLP, because they change the order of
3863 computation. */
f5709183 3864 if (loop_vinfo)
9771b263 3865 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTIONS (loop_vinfo), i, next)
f5709183 3866 if (next == stmt)
9771b263 3867 LOOP_VINFO_REDUCTIONS (loop_vinfo).ordered_remove (i);
51312233 3868
1107f3ae
IR
3869 /* It is possible that additional pattern stmts are created and inserted in
3870 STMTS_TO_REPLACE. We create a stmt_info for each of them, and mark the
3871 relevant statements. */
9771b263
DN
3872 for (i = 0; stmts_to_replace->iterate (i, &stmt)
3873 && (unsigned) i < (stmts_to_replace->length () - 1);
51312233
IR
3874 i++)
3875 {
3876 stmt_info = vinfo_for_stmt (stmt);
3877 pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
73fbfcad 3878 if (dump_enabled_p ())
51312233 3879 {
ccb3ad87 3880 dump_printf_loc (MSG_NOTE, vect_location,
78c60e3d 3881 "additional pattern stmt: ");
ccb3ad87 3882 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, pattern_stmt, 0);
51312233
IR
3883 }
3884
1107f3ae 3885 vect_mark_pattern_stmts (stmt, pattern_stmt, NULL_TREE);
51312233 3886 }
20f06221
DN
3887}
3888
3889
3890/* Function vect_pattern_recog
3891
3892 Input:
3893 LOOP_VINFO - a struct_loop_info of a loop in which we want to look for
3894 computation idioms.
3895
9d5e7640
IR
3896 Output - for each computation idiom that is detected we create a new stmt
3897 that provides the same functionality and that can be vectorized. We
20f06221
DN
3898 also record some information in the struct_stmt_info of the relevant
3899 stmts, as explained below:
3900
3901 At the entry to this function we have the following stmts, with the
3902 following initial value in the STMT_VINFO fields:
3903
3904 stmt in_pattern_p related_stmt vec_stmt
3905 S1: a_i = .... - - -
3906 S2: a_2 = ..use(a_i).. - - -
3907 S3: a_1 = ..use(a_2).. - - -
3908 S4: a_0 = ..use(a_1).. - - -
3909 S5: ... = ..use(a_0).. - - -
3910
3911 Say the sequence {S1,S2,S3,S4} was detected as a pattern that can be
9d5e7640
IR
3912 represented by a single stmt. We then:
3913 - create a new stmt S6 equivalent to the pattern (the stmt is not
3914 inserted into the code)
20f06221
DN
3915 - fill in the STMT_VINFO fields as follows:
3916
3917 in_pattern_p related_stmt vec_stmt
b8698a0f 3918 S1: a_i = .... - - -
20f06221
DN
3919 S2: a_2 = ..use(a_i).. - - -
3920 S3: a_1 = ..use(a_2).. - - -
20f06221 3921 S4: a_0 = ..use(a_1).. true S6 -
9d5e7640 3922 '---> S6: a_new = .... - S4 -
20f06221
DN
3923 S5: ... = ..use(a_0).. - - -
3924
3925 (the last stmt in the pattern (S4) and the new pattern stmt (S6) point
9d5e7640 3926 to each other through the RELATED_STMT field).
20f06221
DN
3927
3928 S6 will be marked as relevant in vect_mark_stmts_to_be_vectorized instead
3929 of S4 because it will replace all its uses. Stmts {S1,S2,S3} will
3930 remain irrelevant unless used by stmts other than S4.
3931
3932 If vectorization succeeds, vect_transform_stmt will skip over {S1,S2,S3}
9d5e7640 3933 (because they are marked as irrelevant). It will vectorize S6, and record
83197f37
IR
3934 a pointer to the new vector stmt VS6 from S6 (as usual).
3935 S4 will be skipped, and S5 will be vectorized as usual:
20f06221
DN
3936
3937 in_pattern_p related_stmt vec_stmt
3938 S1: a_i = .... - - -
3939 S2: a_2 = ..use(a_i).. - - -
3940 S3: a_1 = ..use(a_2).. - - -
3941 > VS6: va_new = .... - - -
20f06221 3942 S4: a_0 = ..use(a_1).. true S6 VS6
9d5e7640 3943 '---> S6: a_new = .... - S4 VS6
20f06221
DN
3944 > VS5: ... = ..vuse(va_new).. - - -
3945 S5: ... = ..use(a_0).. - - -
3946
9d5e7640 3947 DCE could then get rid of {S1,S2,S3,S4,S5} (if their defs are not used
20f06221
DN
3948 elsewhere), and we'll end up with:
3949
b8698a0f 3950 VS6: va_new = ....
83197f37
IR
3951 VS5: ... = ..vuse(va_new)..
3952
3953 In case of more than one pattern statements, e.g., widen-mult with
3954 intermediate type:
3955
3956 S1 a_t = ;
3957 S2 a_T = (TYPE) a_t;
3958 '--> S3: a_it = (interm_type) a_t;
3959 S4 prod_T = a_T * CONST;
3960 '--> S5: prod_T' = a_it w* CONST;
3961
3962 there may be other users of a_T outside the pattern. In that case S2 will
3963 be marked as relevant (as well as S3), and both S2 and S3 will be analyzed
3964 and vectorized. The vector stmt VS2 will be recorded in S2, and VS3 will
3965 be recorded in S3. */
20f06221
DN
3966
3967void
310213d4 3968vect_pattern_recog (vec_info *vinfo)
20f06221 3969{
f5709183 3970 struct loop *loop;
772e61e1 3971 basic_block *bbs;
f5709183 3972 unsigned int nbbs;
726a989a 3973 gimple_stmt_iterator si;
20f06221 3974 unsigned int i, j;
92aea285 3975 vect_recog_func_ptr vect_recog_func;
355fe088
TS
3976 auto_vec<gimple *, 1> stmts_to_replace;
3977 gimple *stmt;
20f06221 3978
73fbfcad 3979 if (dump_enabled_p ())
78c60e3d 3980 dump_printf_loc (MSG_NOTE, vect_location,
e645e942 3981 "=== vect_pattern_recog ===\n");
20f06221 3982
310213d4 3983 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
f5709183
IR
3984 {
3985 loop = LOOP_VINFO_LOOP (loop_vinfo);
3986 bbs = LOOP_VINFO_BBS (loop_vinfo);
3987 nbbs = loop->num_nodes;
61d371eb
RB
3988
3989 /* Scan through the loop stmts, applying the pattern recognition
3990 functions starting at each stmt visited: */
3991 for (i = 0; i < nbbs; i++)
3992 {
3993 basic_block bb = bbs[i];
3994 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
3995 {
3996 /* Scan over all generic vect_recog_xxx_pattern functions. */
3997 for (j = 0; j < NUM_PATTERNS; j++)
3998 {
3999 vect_recog_func = vect_vect_recog_func_ptrs[j];
4000 vect_pattern_recog_1 (vect_recog_func, si,
4001 &stmts_to_replace);
4002 }
4003 }
4004 }
f5709183
IR
4005 }
4006 else
4007 {
61d371eb
RB
4008 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo);
4009 for (si = bb_vinfo->region_begin;
4010 gsi_stmt (si) != gsi_stmt (bb_vinfo->region_end); gsi_next (&si))
4011 {
4012 if ((stmt = gsi_stmt (si))
f5709183
IR
4013 && vinfo_for_stmt (stmt)
4014 && !STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt)))
61d371eb 4015 continue;
f5709183 4016
61d371eb
RB
4017 /* Scan over all generic vect_recog_xxx_pattern functions. */
4018 for (j = 0; j < NUM_PATTERNS; j++)
4019 {
92aea285
JJ
4020 vect_recog_func = vect_vect_recog_func_ptrs[j];
4021 vect_pattern_recog_1 (vect_recog_func, si,
d1fc143d 4022 &stmts_to_replace);
61d371eb
RB
4023 }
4024 }
20f06221
DN
4025 }
4026}